Merge branch 'master' of git://factorcode.org/git/factor into bitfields

2009-10-07 21:42:37 -05:00 · 2009-10-07 21:42:37 -05:00 · 8ba295d8a8
parent 4e1aa8f638 21c9da7c5f
commit 8ba295d8a8
55 changed files with 1463 additions and 401 deletions
--- a/basis/compiler/cfg/instructions/instructions.factor
+++ b/basis/compiler/cfg/instructions/instructions.factor
@ -280,6 +280,11 @@ def: dst
 use: src
 literal: shuffle rep ;

+PURE-INSN: ##tail>head-vector
+def: dst
+use: src
+literal: rep ;
+
 PURE-INSN: ##merge-vector-head
 def: dst
 use: src1 src2
@ -290,10 +295,39 @@ def: dst
 use: src1 src2
 literal: rep ;

+PURE-INSN: ##signed-pack-vector
+def: dst
+use: src1 src2
+literal: rep ;
+
+PURE-INSN: ##unsigned-pack-vector
+def: dst
+use: src1 src2
+literal: rep ;
+
+PURE-INSN: ##unpack-vector-head
+def: dst
+use: src
+literal: rep ;
+
+PURE-INSN: ##unpack-vector-tail
+def: dst
+use: src
+literal: rep ;
+
+PURE-INSN: ##integer>float-vector
+def: dst
+use: src
+literal: rep ;
+
+PURE-INSN: ##float>integer-vector
+def: dst
+use: src
+literal: rep ;
+
 PURE-INSN: ##compare-vector
 def: dst
 use: src1 src2
-temp: temp
 literal: rep cc ;

 PURE-INSN: ##test-vector
@ -781,8 +815,6 @@ UNION: kill-vreg-insn
 UNION: def-is-use-insn
 ##box-alien
 ##box-displaced-alien
-##compare-vector
-##not-vector
 ##string-nth
 ##unbox-any-c-ptr ;

--- a/basis/compiler/cfg/intrinsics/intrinsics.factor
+++ b/basis/compiler/cfg/intrinsics/intrinsics.factor
@ -171,18 +171,18 @@ IN: compiler.cfg.intrinsics
        { math.vectors.simd.intrinsics:(simd-vbitandn) [ [ ^^andn-vector ] emit-binary-vector-op ] }
        { math.vectors.simd.intrinsics:(simd-vbitor) [ [ ^^or-vector ] emit-binary-vector-op ] }
        { math.vectors.simd.intrinsics:(simd-vbitxor) [ [ ^^xor-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd.intrinsics:(simd-vbitnot) [ [ ^^not-vector ] emit-unary-vector-op ] }
+        { math.vectors.simd.intrinsics:(simd-vbitnot) [ [ generate-not-vector ] emit-unary-vector-op ] }
        { math.vectors.simd.intrinsics:(simd-vand) [ [ ^^and-vector ] emit-binary-vector-op ] }
        { math.vectors.simd.intrinsics:(simd-vandn) [ [ ^^andn-vector ] emit-binary-vector-op ] }
        { math.vectors.simd.intrinsics:(simd-vor) [ [ ^^or-vector ] emit-binary-vector-op ] }
        { math.vectors.simd.intrinsics:(simd-vxor) [ [ ^^xor-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd.intrinsics:(simd-vnot) [ [ ^^not-vector ] emit-unary-vector-op ] }
-        { math.vectors.simd.intrinsics:(simd-v<=) [ [ cc<= ^^compare-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd.intrinsics:(simd-v<) [ [ cc< ^^compare-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd.intrinsics:(simd-v=) [ [ cc= ^^compare-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd.intrinsics:(simd-v>) [ [ cc> ^^compare-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd.intrinsics:(simd-v>=) [ [ cc>= ^^compare-vector ] emit-binary-vector-op ] }
-        { math.vectors.simd.intrinsics:(simd-vunordered?) [ [ cc/<>= ^^compare-vector ] emit-binary-vector-op ] }
+        { math.vectors.simd.intrinsics:(simd-vnot) [ [ generate-not-vector ] emit-unary-vector-op ] }
+        { math.vectors.simd.intrinsics:(simd-v<=) [ [ cc<= generate-compare-vector ] emit-binary-vector-op ] }
+        { math.vectors.simd.intrinsics:(simd-v<) [ [ cc< generate-compare-vector ] emit-binary-vector-op ] }
+        { math.vectors.simd.intrinsics:(simd-v=) [ [ cc= generate-compare-vector ] emit-binary-vector-op ] }
+        { math.vectors.simd.intrinsics:(simd-v>) [ [ cc> generate-compare-vector ] emit-binary-vector-op ] }
+        { math.vectors.simd.intrinsics:(simd-v>=) [ [ cc>= generate-compare-vector ] emit-binary-vector-op ] }
+        { math.vectors.simd.intrinsics:(simd-vunordered?) [ [ cc/<>= generate-compare-vector ] emit-binary-vector-op ] }
        { math.vectors.simd.intrinsics:(simd-vany?) [ [ vcc-any ^^test-vector ] emit-unary-vector-op ] }
        { math.vectors.simd.intrinsics:(simd-vall?) [ [ vcc-all ^^test-vector ] emit-unary-vector-op ] }
        { math.vectors.simd.intrinsics:(simd-vnone?) [ [ vcc-none ^^test-vector ] emit-unary-vector-op ] }
@ -194,8 +194,14 @@ IN: compiler.cfg.intrinsics
        { math.vectors.simd.intrinsics:(simd-gather-2) [ emit-gather-vector-2 ] }
        { math.vectors.simd.intrinsics:(simd-gather-4) [ emit-gather-vector-4 ] }
        { math.vectors.simd.intrinsics:(simd-vshuffle) [ emit-shuffle-vector ] }
-        { math.vectors.simd.intrinsics:(simd-vmerge-head) [ [ ^^merge-vector-head ] emit-binary-vector-op ] }
-        { math.vectors.simd.intrinsics:(simd-vmerge-tail) [ [ ^^merge-vector-tail ] emit-binary-vector-op ] }
+        { math.vectors.simd.intrinsics:(simd-(vmerge-head)) [ [ ^^merge-vector-head ] emit-binary-vector-op ] }
+        { math.vectors.simd.intrinsics:(simd-(vmerge-tail)) [ [ ^^merge-vector-tail ] emit-binary-vector-op ] }
+        { math.vectors.simd.intrinsics:(simd-(v>float)) [ [ ^^integer>float-vector ] emit-unary-vector-op ] }
+        { math.vectors.simd.intrinsics:(simd-(v>integer)) [ [ ^^float>integer-vector ] emit-unary-vector-op ] }
+        { math.vectors.simd.intrinsics:(simd-(vpack-signed)) [ [ ^^signed-pack-vector ] emit-binary-vector-op ] }
+        { math.vectors.simd.intrinsics:(simd-(vpack-unsigned)) [ [ ^^unsigned-pack-vector ] emit-binary-vector-op ] }
+        { math.vectors.simd.intrinsics:(simd-(vunpack-head)) [ [ generate-unpack-vector-head ] emit-unary-vector-op ] }
+        { math.vectors.simd.intrinsics:(simd-(vunpack-tail)) [ [ generate-unpack-vector-tail ] emit-unary-vector-op ] }
        { math.vectors.simd.intrinsics:(simd-select) [ emit-select-vector ] }
        { math.vectors.simd.intrinsics:(simd-sum) [ [ ^^horizontal-add-vector ] emit-unary-vector-op ] }
        { math.vectors.simd.intrinsics:alien-vector [ emit-alien-vector ] }
--- a/basis/compiler/cfg/intrinsics/simd/simd.factor
+++ b/basis/compiler/cfg/intrinsics/simd/simd.factor
@ -2,8 +2,9 @@
 ! See http://factorcode.org/license.txt for BSD license.
 USING: accessors byte-arrays fry cpu.architecture kernel math
 sequences math.vectors.simd.intrinsics macros generalizations
-combinators combinators.short-circuit arrays
+combinators combinators.short-circuit arrays locals
 compiler.tree.propagation.info compiler.cfg.builder.blocks
+compiler.cfg.comparisons
 compiler.cfg.stacks compiler.cfg.stacks.local compiler.cfg.hats
 compiler.cfg.instructions compiler.cfg.registers
 compiler.cfg.intrinsics.alien ;
@ -115,3 +116,64 @@ MACRO: if-literals-match ( quots -- )
        [ byte-array inline-alien-setter? ]
        inline-alien
    ] with emit-vector-op ;
+
+: generate-not-vector ( src rep -- dst )
+    dup %not-vector-reps member?
+    [ ^^not-vector ]
+    [ [ ^^fill-vector ] [ ^^xor-vector ] bi ] if ;
+
+:: (generate-compare-vector) ( src1 src2 rep {cc,swap} -- dst )
+    {cc,swap} first2 :> swap? :> cc
+    swap?
+    [ src2 src1 rep cc ^^compare-vector ]
+    [ src1 src2 rep cc ^^compare-vector ] if ;
+
+:: generate-compare-vector ( src1 src2 rep orig-cc -- dst )
+    rep orig-cc %compare-vector-ccs :> not? :> ccs
+
+    ccs empty?
+    [ rep not? [ ^^fill-vector ] [ ^^zero-vector ] if ]
+    [
+        ccs unclip :> first-cc :> rest-ccs
+        src1 src2 rep first-cc (generate-compare-vector) :> first-dst
+
+        rest-ccs first-dst
+        [ [ src1 src2 rep ] dip (generate-compare-vector) rep ^^or-vector ]
+        reduce
+
+        not? [ rep generate-not-vector ] when
+    ] if ;
+
+:: generate-unpack-vector-head ( src rep -- dst )
+    {
+        {
+            [ rep %unpack-vector-head-reps member? ]
+            [ src rep ^^unpack-vector-head ]
+        }
+        [
+            rep ^^zero-vector :> zero
+            zero src rep cc> ^^compare-vector :> sign
+            src sign rep ^^merge-vector-head
+        ] 
+    } cond ;
+
+:: generate-unpack-vector-tail ( src rep -- dst )
+    {
+        {
+            [ rep %unpack-vector-tail-reps member? ]
+            [ src rep ^^unpack-vector-tail ]
+        }
+        {
+            [ rep %unpack-vector-head-reps member? ]
+            [
+                src rep ^^tail>head-vector :> tail
+                tail rep ^^unpack-vector-head
+            ]
+        }
+        [
+            rep ^^zero-vector :> zero
+            zero src rep cc> ^^compare-vector :> sign
+            src sign rep ^^merge-vector-tail
+        ] 
+    } cond ;
+
--- a/basis/compiler/cfg/linear-scan/linear-scan-tests.factor
+++ b/basis/compiler/cfg/linear-scan/linear-scan-tests.factor
@ -1465,7 +1465,7 @@ V{

 [ ] [ { 1 2 3 } test-linear-scan-on-cfg ] unit-test

-[ { 1 } ] [ 1 get instructions>> first tagged-values>> ] unit-test
+[ { { 0 1 } } ] [ 1 get instructions>> first tagged-values>> ] unit-test

 V{
    T{ ##peek f 0 D 0 }
@ -1487,4 +1487,4 @@ V{

 [ ] [ { 1 2 3 } test-linear-scan-on-cfg ] unit-test

-[ { 1 } ] [ 1 get instructions>> first tagged-values>> ] unit-test
+[ { { 0 1 } } ] [ 1 get instructions>> first tagged-values>> ] unit-test
--- a/basis/compiler/cfg/representations/representations.factor
+++ b/basis/compiler/cfg/representations/representations.factor
@ -226,24 +226,40 @@ SYMBOL: phi-mappings
 M: ##phi conversions-for-insn
    [ , ] [ [ inputs>> values ] [ dst>> ] bi phi-mappings get set-at ] bi ;

-! When a literal zero vector is unboxed, we replace the ##load-reference
-! with a ##zero-vector instruction since this is more efficient.
+! When a literal zeroes/ones vector is unboxed, we replace the ##load-reference
+! with a ##zero-vector or ##fill-vector instruction since this is more efficient.
 : convert-to-zero-vector? ( insn -- ? )
    {
        [ dst>> rep-of vector-rep? ]
-        [ obj>> B{ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 } = ]
+        [ obj>> B{ 0 0 0 0  0 0 0 0  0 0 0 0  0 0 0 0 } = ]
+    } 1&& ;
+: convert-to-fill-vector? ( insn -- ? )
+    {
+        [ dst>> rep-of vector-rep? ]
+        [ obj>> B{ 255 255 255 255  255 255 255 255  255 255 255 255  255 255 255 255 } = ]
    } 1&& ;

-: convert-to-zero-vector ( insn -- )
-    dst>> dup rep-of ##zero-vector ;
+: (convert-to-zero/fill-vector) ( insn -- dst rep )
+    dst>> dup rep-of ; inline
+
+: conversions-for-load-insn ( insn -- ?insn )
+    {
+        {
+            [ dup convert-to-zero-vector? ]
+            [ (convert-to-zero/fill-vector) ##zero-vector f ]
+        }
+        {
+            [ dup convert-to-fill-vector? ]
+            [ (convert-to-zero/fill-vector) ##fill-vector f ]
+        }
+        [ ]
+    } cond ;

 M: ##load-reference conversions-for-insn
-    dup convert-to-zero-vector?
-    [ convert-to-zero-vector ] [ call-next-method ] if ;
+    conversions-for-load-insn [ call-next-method ] when* ;

 M: ##load-constant conversions-for-insn
-    dup convert-to-zero-vector?
-    [ convert-to-zero-vector ] [ call-next-method ] if ;
+    conversions-for-load-insn [ call-next-method ] when* ;

 M: vreg-insn conversions-for-insn
    [ compute-renaming-set ] [ perform-renaming ] bi ;
@ -312,4 +328,4 @@ PRIVATE>
        [ insert-conversions ]
        [ ]
    } cleave
-    representations get cfg get (>>reps) ;
+    representations get cfg get (>>reps) ;
--- a/basis/compiler/cfg/value-numbering/rewrite/rewrite.factor
+++ b/basis/compiler/cfg/value-numbering/rewrite/rewrite.factor
@ -459,7 +459,7 @@ M: ##shuffle-vector rewrite
    value>> over rep>> {
        { float-4-rep [ float>bits 4 >le (fold-scalar>vector) ] }
        { double-2-rep [ double>bits 8 >le (fold-scalar>vector) ] }
-        [ rep-component-type heap-size >le (fold-scalar>vector) ]
+        [ [ untag-fixnum ] dip rep-component-type heap-size >le (fold-scalar>vector) ]
    } case ;

 M: ##scalar>vector rewrite
--- a/basis/compiler/cfg/value-numbering/value-numbering-tests.factor
+++ b/basis/compiler/cfg/value-numbering/value-numbering-tests.factor
@ -4,7 +4,7 @@ cpu.architecture tools.test kernel math combinators.short-circuit
 accessors sequences compiler.cfg.predecessors locals compiler.cfg.dce
 compiler.cfg.ssa.destruction compiler.cfg.loop-detection
 compiler.cfg.representations compiler.cfg assocs vectors arrays
-layouts namespaces alien ;
+layouts literals namespaces alien ;
 IN: compiler.cfg.value-numbering.tests

 : trim-temps ( insns -- insns )
@ -1215,6 +1215,20 @@ cell 8 = [
    } value-numbering-step
 ] unit-test

+[
+    {
+        T{ ##load-constant f 0 $[ 55 tag-fixnum ] }
+        T{ ##load-constant f 1 B{ 55 0 0 0  55 0 0 0  55 0 0 0  55 0 0 0 } }
+        T{ ##copy f 2 1 any-rep }
+    }
+] [
+    {
+        T{ ##load-constant f 0 $[ 55 tag-fixnum ] }
+        T{ ##scalar>vector f 1 0 int-4-rep }
+        T{ ##shuffle-vector f 2 1 { 0 0 0 0 } float-4-rep }
+    } value-numbering-step
+] unit-test
+
 [
    {
        T{ ##load-constant f 0 1.25 }
--- a/basis/compiler/codegen/codegen.factor
+++ b/basis/compiler/codegen/codegen.factor
@ -160,11 +160,19 @@ CODEGEN: ##double>single-float %double>single-float
 CODEGEN: ##integer>float %integer>float
 CODEGEN: ##float>integer %float>integer
 CODEGEN: ##zero-vector %zero-vector
+CODEGEN: ##fill-vector %fill-vector
 CODEGEN: ##gather-vector-2 %gather-vector-2
 CODEGEN: ##gather-vector-4 %gather-vector-4
 CODEGEN: ##shuffle-vector %shuffle-vector
+CODEGEN: ##tail>head-vector %tail>head-vector
 CODEGEN: ##merge-vector-head %merge-vector-head
 CODEGEN: ##merge-vector-tail %merge-vector-tail
+CODEGEN: ##signed-pack-vector %signed-pack-vector
+CODEGEN: ##unsigned-pack-vector %unsigned-pack-vector
+CODEGEN: ##unpack-vector-head %unpack-vector-head
+CODEGEN: ##unpack-vector-tail %unpack-vector-tail
+CODEGEN: ##integer>float-vector %integer>float-vector
+CODEGEN: ##float>integer-vector %float>integer-vector
 CODEGEN: ##compare-vector %compare-vector
 CODEGEN: ##test-vector %test-vector
 CODEGEN: ##add-vector %add-vector
--- a/basis/compiler/tree/propagation/simd/simd.factor
+++ b/basis/compiler/tree/propagation/simd/simd.factor
@ -31,8 +31,14 @@ IN: compiler.tree.propagation.simd
    (simd-hlshift)
    (simd-hrshift)
    (simd-vshuffle)
-    (simd-vmerge-head)
-    (simd-vmerge-tail)
+    (simd-(vmerge-head))
+    (simd-(vmerge-tail))
+    (simd-(v>float))
+    (simd-(v>integer))
+    (simd-(vpack-signed))
+    (simd-(vpack-unsigned))
+    (simd-(vunpack-head))
+    (simd-(vunpack-tail))
    (simd-v<=)
    (simd-v<)
    (simd-v=)
--- a/basis/compression/huffman/huffman.factor
+++ b/basis/compression/huffman/huffman.factor
@ -2,31 +2,35 @@
 ! See http://factorcode.org/license.txt for BSD license.
 USING: accessors arrays assocs fry
 hashtables io kernel locals math math.order math.parser
-math.ranges multiline sequences ;
+math.ranges multiline sequences bitstreams bit-arrays ;
 IN: compression.huffman

 QUALIFIED-WITH: bitstreams bs

 <PRIVATE

-! huffman codes
-
 TUPLE: huffman-code
-    { value }
-    { size }
-    { code } ;
+    { value fixnum }
+    { size fixnum }
+    { code fixnum } ;

-: <huffman-code> ( -- code ) 0 0 0 huffman-code boa ;
-: next-size ( code -- ) [ 1 + ] change-size [ 2 * ] change-code drop ;
-: next-code ( code -- ) [ 1 + ] change-code drop ;
+: <huffman-code> ( -- huffman-code )
+    0 0 0 huffman-code boa ; inline

-:: all-patterns ( huff n -- seq )
-    n log2 huff size>> - :> free-bits
+: next-size ( huffman-code -- )
+    [ 1 + ] change-size
+    [ 2 * ] change-code drop ; inline
+
+: next-code ( huffman-code -- )
+    [ 1 + ] change-code drop ; inline
+
+:: all-patterns ( huffman-code n -- seq )
+    n log2 huffman-code size>> - :> free-bits
    free-bits 0 >
-    [ free-bits 2^ [0,b) [ huff code>> free-bits 2^ * + ] map ]
-    [ huff code>> free-bits neg 2^ /i 1array ] if ;
+    [ free-bits 2^ iota [ huffman-code code>> free-bits 2^ * + ] map ]
+    [ huffman-code code>> free-bits neg 2^ /i 1array ] if ;

-:: huffman-each ( tdesc quot: ( huff -- ) -- )
+:: huffman-each ( tdesc quot: ( huffman-code -- ) -- )
    <huffman-code> :> code
    tdesc
    [
@ -34,7 +38,7 @@ TUPLE: huffman-code
        [ code (>>value) code clone quot call code next-code ] each
    ] each ; inline

-: update-reverse-table ( huff n table -- )
+: update-reverse-table ( huffman-code n table -- )
    [ drop all-patterns ]
    [ nip '[ _ swap _ set-at ] each ] 3bi ;

@ -43,49 +47,29 @@ TUPLE: huffman-code
   tdesc [ n table update-reverse-table ] huffman-each
   table seq>> ;

-:: huffman-table ( tdesc max -- table )
-   max f <array> :> table
-   tdesc [ [ ] [ value>> ] bi table set-nth ] huffman-each
-   table ;
-
 PRIVATE>

-! decoder
-
 TUPLE: huffman-decoder
-    { bs }
-    { tdesc }
-    { rtable }
-    { bits/level } ;
+    { bs bit-reader }
+    { tdesc array }
+    { rtable array }
+    { bits/level fixnum } ;

-: <huffman-decoder> ( bs tdesc -- decoder )
+: <huffman-decoder> ( bs tdesc -- huffman-decoder )
    huffman-decoder new
-    swap >>tdesc
-    swap >>bs
-    16 >>bits/level
-    [ ] [ tdesc>> ] [ bits/level>> 2^ ] tri reverse-table >>rtable ;
+        swap >>tdesc
+        swap >>bs
+        16 >>bits/level
+        dup [ tdesc>> ] [ bits/level>> 2^ ] bi reverse-table >>rtable ; inline

-: read1-huff ( decoder -- elt )
-    16 over [ bs>> bs:peek ] [ rtable>> nth ] bi ! first/last
-    [ size>> swap bs>> bs:seek ] [ value>> ] bi ;
+: read1-huff ( huffman-decoder -- elt )
+    16 over [ bs>> bs:peek ] [ rtable>> nth ] bi
+    [ size>> swap bs>> bs:seek ] [ value>> ] bi ; inline

-! %remove
 : reverse-bits ( value bits -- value' )
-    [ >bin ] [ CHAR: 0 pad-head <reversed> bin> ] bi* ;
+    [ integer>bit-array ] dip
+    f pad-tail reverse bit-array>integer ; inline

-: read1-huff2 ( decoder -- elt )
-    16 over [ bs>> bs:peek 16 reverse-bits ] [ rtable>> nth ] bi ! first/last
-    [ size>> swap bs>> bs:seek ] [ value>> ] bi ;
-
-/*
-: huff>string ( code -- str )
-    [ value>> number>string ]
-    [ [ code>> ] [ size>> bits>string ] bi ] bi
-    " = " glue ;
-
-: huff. ( code -- ) huff>string print ;
-
-:: rtable. ( rtable -- )
-    rtable length>> log2 :> n
-    rtable <enum> [ swap n bits. [ huff. ] each ] assoc-each ;
-*/
+: read1-huff2 ( huffman-decoder -- elt )
+    16 over [ bs>> bs:peek 16 reverse-bits ] [ rtable>> nth ] bi
+    [ size>> swap bs>> bs:seek ] [ value>> ] bi ; inline
--- a/basis/compression/inflate/inflate-tests.factor
+++ b/basis/compression/inflate/inflate-tests.factor
@ -0,0 +1,102 @@
+! Copyright (C) 2009 Doug Coleman.
+! See http://factorcode.org/license.txt for BSD license.
+USING: tools.test compression.inflate ;
+IN: compression.inflate.tests
+
+[
+BV{
+    1 255 255 255 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+    0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 122 121 94 119
+    239 237 227 88 16 16 10 5 16 17 26 172 3 20 19 245 22 54 55
+    70 245 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+    0 0 0 0 0 0 0 0 0 0 139 138 112 127 12 6 234 132 254 250 9
+    24 16 19 38 182 25 27 40 154 2 240 239 235 25 0 0 0 0 0 0 0
+    0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+    163 163 154 57 223 218 192 128 6 4 39 87 13 9 22 63 245 239
+    239 242 240 240 242 243 4 17 17 25 21 0 0 0 0 0 0 0 0 0 0 0
+    0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 223 219
+    197 140 26 21 26 221 108 117 136 170 0 0 0 0 0 0 0 194 148
+    147 138 6 4 4 5 4 33 176 175 161 5 80 81 95 251 0 0 0 0 0 0
+    0 0 0 0 0 0 0 0 0 0 122 121 105 33 246 246 234 80 241 240
+    226 77 28 25 4 58 29 30 68 108 0 0 0 0 0 0 0 0 0 0 0 0 108
+    109 118 250 2 24 24 39 230 225 221 203 107 0 0 0 0 0 0 0 0
+    0 0 0 0 0 0 0 0 103 102 80 101 249 245 214 208 13 6 240 142
+    44 37 29 65 11 13 22 250 11 15 30 180 0 0 0 0 0 0 0 0 0 0 0
+    0 0 0 0 0 1 200 201 196 1 208 195 176 132 224 223 207 50
+    253 6 15 181 251 253 0 6 240 241 239 77 14 10 246 64 33 24
+    13 0 7 252 20 0 247 1 249 0 241 253 1 205 129 132 173 52
+    124 123 107 32 17 16 6 15 115 117 143 209 0 0 0 0 1 255 255
+    255 0 0 0 0 0 128 118 95 119 221 222 204 136 1 3 0 0 22 27
+    35 0 249 239 239 0 30 22 3 0 247 4 18 0 250 248 247 0 29 26
+    31 222 239 249 6 164 241 241 230 48 19 19 28 209 29 30 35
+    154 87 88 109 228 1 255 255 255 0 0 0 0 0 0 0 0 0 136 136
+    116 39 227 224 218 110 245 245 242 61 238 238 237 36 11 1
+    254 9 32 37 20 213 7 14 40 151 2 0 246 36 6 8 20 210 8 8 5
+    4 33 32 41 184 10 11 17 232 69 70 80 251 0 255 255 255 0
+    255 255 255 0 255 255 255 0 255 255 255 0 255 255 255 0 255
+    255 255 0 107 104 82 144 88 81 34 255 162 159 134 122 255
+    255 255 0 255 255 255 0 255 255 255 0 255 255 255 0 195 194
+    184 2 255 255 255 0 255 255 255 0 0 255 255 255 0 255 255
+    255 0 255 255 255 0 255 255 255 0 255 255 255 0 174 171 167
+    15 102 99 63 233 132 129 99 133 255 255 255 0 255 255 255 0
+    255 255 255 0 255 255 255 0 255 255 255 0 255 255 255 0 255
+    255 255 0 255 255 255 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+    119 119 116 4 240 239 217 143 28 28 30 228 34 36 45 232 0 0
+    0 0 0 0 0 0 38 38 38 4 28 28 28 2 0 0 0 0 0 0 0 0 0 0 0 0 0
+    0 0 0 4 0 0 0 0 0 0 0 0 33 33 33 3 38 38 38 9 243 243 243
+    252 14 12 44 24 233 235 4 89 250 251 216 126 92 91 76 241 8
+    9 21 235 69 69 70 2 250 250 249 214 0 0 0 223 0 0 0 0 0 0 0
+    0 0 0 0 0 2 0 0 0 0 0 0 0 0 247 247 247 255 25 25 25 11 45
+    46 48 26 239 239 251 219 3 4 1 114 233 236 1 254 21 21 20
+    113 12 11 2 54 1 2 2 215 206 206 206 230 0 0 0 0 0 0 0 0 0
+    0 0 0 0 0 0 0 1 255 255 255 0 0 0 0 0 0 0 0 0 0 0 0 0 46 46
+    47 8 56 56 49 70 23 21 9 145 237 239 248 180 247 247 2 148
+    225 225 224 234 241 241 240 248 205 205 205 247 0 0 0 0 0 0
+    0 0 0 0 0 0 0 0 0 0 0 255 255 255 0 255 255 255 0 255 255
+    255 0 255 255 255 0 255 255 255 0 255 255 255 0 107 106 96
+    75 90 89 73 75 255 255 255 0 255 255 255 0 255 255 255 0
+    255 255 255 0 255 255 255 0 255 255 255 0 255 255 255 0 255
+    255 255 0
+}
+] [
+B{
+    56 141 99 252 255 255 63 3 41 160 170 50 174 252 253 219
+    199 17 2 2 92 172 2 130 82 107 152 69 132 191 138 153 153
+    187 125 37 70 115 119 87 65 61 15 219 171 150 127 191 56 37
+    4 132 213 182 73 74 107 204 98 250 240 254 181 36 49 154 23
+    47 158 101 121 255 214 129 6 54 22 245 112 94 78 49 251 175
+    239 223 127 250 240 225 211 103 22 65 65 73 81 98 12 184
+    127 251 104 143 148 168 212 221 156 210 142 85 80 161 67 83
+    38 119 177 177 176 176 178 40 110 88 191 144 53 32 48 254
+    55 166 127 51 21 191 125 123 21 240 241 195 35 95 25 73 22
+    43 89 57 151 28 100 249 156 220 178 95 76 18 18 234 207 30
+    222 61 157 141 174 57 61 45 32 245 231 215 107 23 120 217
+    62 244 233 168 202 58 114 243 138 253 226 230 151 219 130
+    174 142 241 196 201 35 140 23 14 111 104 121 112 255 188
+    209 95 54 254 173 191 255 50 176 125 248 248 222 151 143
+    235 155 131 162 4 47 3 251 31 17 134 239 140 63 25 62 254
+    101 60 219 216 178 214 164 166 58 91 65 80 128 141 191 184
+    180 255 34 3 3 3 3 35 44 26 27 202 226 203 239 222 59 211
+    193 200 204 192 32 38 173 204 240 243 253 123 6 57 49 102
+    134 239 44 66 12 191 126 124 103 144 149 146 191 247 254 39
+    219 146 143 31 159 25 8 11 203 92 148 149 83 158 21 30 145
+    251 132 17 57 29 116 116 148 168 63 126 112 43 239 235 215
+    79 182 239 222 189 85 225 102 252 199 169 160 42 114 149
+    157 79 99 58 19 195 55 21 54 14 145 75 28 28 172 44 138 10
+    154 59 184 184 5 95 184 186 5 252 102 248 255 255 63 86 156
+    157 17 52 33 34 80 233 255 162 249 109 85 232 114 135 15
+    237 96 130 177 177 106 94 183 122 57 127 90 178 253 203 150
+    198 228 86 92 22 192 48 19 122 168 150 151 151 176 124 120
+    127 179 95 70 70 238 137 146 138 238 11 152 184 154 154 26
+    139 140 140 12 134 122 22 24 67 81 81 145 89 77 77 141 243
+    243 231 207 127 248 120 116 36 94 190 102 137 252 245 251
+    70 93 76 180 207 71 14 78 209 215 174 174 110 76 191 126
+    253 188 198 192 192 112 31 217 0 184 137 223 191 127 255 47
+    41 41 201 173 171 103 32 245 254 253 239 219 204 44 140 69
+    47 223 48 254 19 21 21 41 228 225 102 50 99 100 98 186 126
+    238 220 185 103 24 233 0 61 55 234 233 233 115 88 88 24 186
+    137 139 114 78 124 251 254 199 150 239 223 153 166 60 124
+    248 224 213 199 143 31 126 156 61 123 246 59 186 1 184 99
+    33 43 193 59 42 210 211 155 80 32 2 0 2 32 94 128
+} zlib-inflate
+] unit-test
--- a/basis/compression/inflate/inflate.factor
+++ b/basis/compression/inflate/inflate.factor
@ -1,59 +1,47 @@
 ! Copyright (C) 2009 Marc Fauconneau.
 ! See http://factorcode.org/license.txt for BSD license.
 USING: accessors arrays assocs byte-vectors combinators
-compression.huffman fry hashtables io.binary kernel locals math
-math.bitwise math.order math.ranges sequences sorting ;
+combinators.smart compression.huffman fry hashtables io.binary
+kernel literals locals math math.bitwise math.order math.ranges
+sequences sorting memoize combinators.short-circuit ;
 QUALIFIED-WITH: bitstreams bs
 IN: compression.inflate

-QUALIFIED-WITH: bitstreams bs
-
 <PRIVATE

-: enum>seq ( assoc -- seq )
-    dup keys [ ] [ max ] map-reduce 1 + f <array>
-    [ '[ swap _ set-nth ] assoc-each ] keep ;
-
 ERROR: zlib-unimplemented ;
 ERROR: bad-zlib-data ;
 ERROR: bad-zlib-header ;
-    
+
 :: check-zlib-header ( data -- )
    16 data bs:peek 2 >le be> 31 mod    ! checksum
-    0 assert=                           
+    0 assert=
    4 data bs:read 8 assert=            ! compression method: deflate
    4 data bs:read                      ! log2(max length)-8, 32K max
-    7 <= [ bad-zlib-header ] unless     
-    5 data bs:seek                      ! drop check bits 
-    1 data bs:read 0 assert=            ! dictionnary - not allowed in png
+    7 <= [ bad-zlib-header ] unless
+    5 data bs:seek                      ! drop check bits
+    1 data bs:read 0 assert=            ! dictionary - not allowed in png
    2 data bs:seek                      ! compression level; ignore
    ;

-:: default-table ( -- table )
-    0 <hashtable> :> table
-    0 143 [a,b] 280 287 [a,b] append 8 table set-at
-    144 255 [a,b] >array 9 table set-at
-    256 279 [a,b] >array 7 table set-at 
-    table enum>seq 1 tail ;
-
 CONSTANT: clen-shuffle { 16 17 18 0 8 7 9 6 10 5 11 4 12 3 13 2 14 1 15 }

-: get-table ( values size -- table ) 
-    16 f <array> clone <enum> 
-    [ '[ _ push-at ] 2each ] keep seq>> 1 tail [ natural-sort ] map ;
+: get-table ( values size -- table )
+    16 f <array> <enum>
+    [ '[ _ push-at ] 2each ] keep
+    seq>> rest-slice [ natural-sort ] map ; inline

 :: decode-huffman-tables ( bitstream -- tables )
    5 bitstream bs:read 257 +
    5 bitstream bs:read 1 +
-    4 bitstream bs:read 4 +
-    clen-shuffle swap head
-    dup [ drop 3 bitstream bs:read ] map
+    4 bitstream bs:read 4 + clen-shuffle swap head 
+
+    dup length iota [ 3 bitstream bs:read ] replicate
    get-table
-    bitstream swap <huffman-decoder> 
+    bitstream swap <huffman-decoder>
    [ 2dup + ] dip swap :> k!
    '[
-        _ read1-huff2
-        {
+        _ read1-huff2 {
            { [ dup 16 = ] [ 2 bitstream bs:read 3 + 2array ] }
            { [ dup 17 = ] [ 3 bitstream bs:read 3 + 2array ] }
            { [ dup 18 = ] [ 7 bitstream bs:read 11 + 2array ] }
@ -61,121 +49,118 @@ CONSTANT: clen-shuffle { 16 17 18 0 8 7 9 6 10 5 11 4 12 3 13 2 14 1 15 }
        } cond
        dup array? [ dup second ] [ 1 ] if
        k swap - dup k! 0 >
-    ] 
-    [ ] produce swap suffix
-    { } [ dup array? [ dup first 16 = ] [ f ] if [ [ unclip-last ] [ second 1 + swap <repetition> append ] bi* ] [ suffix ] if ] reduce
+    ] [ ] produce swap suffix
+    { } [
+            dup { [ array? ] [ first 16 = ] } 1&& [
+                [ unclip-last-slice ]
+                [ second 1 + swap <repetition> append ] bi*
+            ] [
+                suffix
+            ] if
+    ] reduce
    [ dup array? [ second 0 <repetition> ] [ 1array ] if ] map concat
-    nip swap cut 2array [ [ length>> [0,b) ] [ ] bi get-table ] map ;
-    
+    nip swap cut 2array
+    [ [ length>> iota ] [ ] bi get-table ] map ;
+
+MEMO: static-huffman-tables ( -- obj )
+    [
+        0 143 [a,b] [ 8 ] replicate
+        144 255 [a,b] [ 9 ] replicate append
+        256 279 [a,b] [ 7 ] replicate append
+        280 287 [a,b] [ 8 ] replicate append
+    ] append-outputs
+    0 31 [a,b] [ 5 ] replicate 2array
+    [ [ length>> [0,b) ] [ ] bi get-table ] map ;
+
 CONSTANT: length-table
    {
-        3 4 5 6 7 8 9 10
-        11 13 15 17
-        19 23 27 31
-        35 43 51 59
-        67 83 99 115
-        131 163 195 227 258
+        3 4 5 6 7 8 9 10 11 13 15 17 19 23 27 31
+        35 43 51 59 67 83 99 115 131 163 195 227 258
    }

 CONSTANT: dist-table
    {
-        1 2 3 4 
-        5 7 9 13 
-        17 25 33 49
-        65 97 129 193
-        257 385 513 769
-        1025 1537 2049 3073
-        4097 6145 8193 12289
-        16385 24577
+        1 2 3 4 5 7 9 13 17 25 33 49
+        65 97 129 193 257 385 513 769 1025 1537 2049 3073
+        4097 6145 8193 12289 16385 24577
    }

 : nth* ( n seq -- elt )
-    [ length 1 - swap - ] [ nth ] bi ;
+    [ length 1 - swap - ] [ nth ] bi ; inline

 :: inflate-lz77 ( seq -- bytes )
    1000 <byte-vector> :> bytes
-    seq
-    [
+    seq [
        dup array?
        [ first2 '[ _ 1 - bytes nth* bytes push ] times ]
        [ bytes push ] if
-    ] each 
+    ] each
    bytes ;

-:: inflate-dynamic ( bitstream -- bytes )
-    bitstream decode-huffman-tables
-    bitstream '[ _ swap <huffman-decoder> ] map :> tables
+:: inflate-huffman ( bitstream tables -- bytes )
+    bitstream tables [ <huffman-decoder> ] with map :> tables
    [
        tables first read1-huff2
-        dup 256 >
-        [
-            dup 285 = 
-            [ ]
-            [ 
-                dup 264 > 
-                [ 
-                    dup 261 - 4 /i dup 5 > 
-                    [ bad-zlib-data ] when 
-                    bitstream bs:read 2array 
-                ]
-                when 
-            ] if
-            ! 5 bitstream read-bits ! distance
+        dup 256 > [
+            dup 285 = [
+                dup 264 > [
+                    dup 261 - 4 /i
+                    dup 5 > [ bad-zlib-data ] when
+                    bitstream bs:read 2array
+                ] when
+            ] unless
+
            tables second read1-huff2
-            dup 3 > 
-            [ 
+
+            dup 3 > [
                dup 2 - 2 /i dup 13 >
                [ bad-zlib-data ] when
                bitstream bs:read 2array
-            ] 
-            when
-            2array
-        ]
-        when
-        dup 256 = not
-    ]
-    [ ] produce nip
+            ] when 2array
+        ] when dup 256 = not
+    ] [ ] produce nip
    [
        dup array? [
-            first2
-            [  
+            first2 [
                dup array? [ first2 ] [ 0 ] if
                [ 257 - length-table nth ] [ + ] bi*
-            ] 
-            [
+            ] [
                dup array? [ first2 ] [ 0 ] if
                [ dist-table nth ] [ + ] bi*
-            ] bi*
-            2array
+            ] bi* 2array
        ] when
    ] map ;
-    
-:: inflate-raw ( bitstream -- bytes ) 
-    8 bitstream bs:align 
+
+:: inflate-raw ( bitstream -- bytes )
+    8 bitstream bs:align
    16 bitstream bs:read :> len
    16 bitstream bs:read :> nlen
-    len nlen + 16 >signed -1 assert= ! len + ~len = -1
+
+    ! len + ~len = -1
+    len nlen + 16 >signed -1 assert=
+
    bitstream byte-pos>>
    bitstream byte-pos>> len +
    bitstream bytes>> <slice>
    len 8 * bitstream bs:seek ;

-: inflate-static ( bitstream -- bytes ) zlib-unimplemented ;
+: inflate-dynamic ( bitstream -- array )
+    dup decode-huffman-tables inflate-huffman ;

-:: inflate-loop ( bitstream -- bytes )
-    [ 1 bitstream bs:read 0 = ]
-    [
+: inflate-static ( bitstream -- array )
+    static-huffman-tables inflate-huffman ;
+
+:: inflate-loop ( bitstream -- array )
+    [ 1 bitstream bs:read 0 = ] [
        bitstream
        2 bitstream bs:read
-        { 
+        {
            { 0 [ inflate-raw ] }
            { 1 [ inflate-static ] }
            { 2 [ inflate-dynamic ] }
            { 3 [ bad-zlib-data f ] }
-        }
-        case
-    ]
-    [ produce ] keep call suffix concat ;
+        } case
+    ] [ produce ] keep call suffix concat ;

 PRIVATE>

--- a/basis/cords/cords.factor
+++ b/basis/cords/cords.factor
@ -6,35 +6,37 @@ IN: cords

 <PRIVATE

-TUPLE: simple-cord first second ;
+TUPLE: simple-cord
+    { first read-only } { second read-only } ;

 M: simple-cord length
-    [ first>> length ] [ second>> length ] bi + ;
+    [ first>> length ] [ second>> length ] bi + ; inline

-M: simple-cord virtual-seq first>> ;
+M: simple-cord virtual-seq first>> ; inline

 M: simple-cord virtual@
    2dup first>> length <
-    [ first>> ] [ [ first>> length - ] [ second>> ] bi ] if ;
+    [ first>> ] [ [ first>> length - ] [ second>> ] bi ] if ; inline

-TUPLE: multi-cord count seqs ;
+TUPLE: multi-cord
+    { count read-only } { seqs read-only } ;

-M: multi-cord length count>> ;
+M: multi-cord length count>> ; inline

 M: multi-cord virtual@
    dupd
    seqs>> [ first <=> ] with search nip
-    [ first - ] [ second ] bi ;
+    [ first - ] [ second ] bi ; inline

 M: multi-cord virtual-seq
-    seqs>> [ f ] [ first second ] if-empty ;
+    seqs>> [ f ] [ first second ] if-empty ; inline

 : <cord> ( seqs -- cord )
    dup length 2 = [
        first2 simple-cord boa
    ] [
        [ 0 [ length + ] accumulate ] keep zip multi-cord boa
-    ] if ;
+    ] if ; inline

 PRIVATE>

@ -52,7 +54,7 @@ INSTANCE: multi-cord virtual-sequence
        { [ over cord? ] [ [ seqs>> values ] dip suffix <cord> ] }
        { [ dup cord? ] [ seqs>> values swap prefix <cord> ] }
        [ 2array <cord> ]
-    } cond ;
+    } cond ; inline

 : cord-concat ( seqs -- cord )
    {
@ -67,4 +69,4 @@ INSTANCE: multi-cord virtual-sequence
                } cond
            ] map concat <cord>
        ]
-    } cond ;
+    } cond ; inline
--- a/basis/cpu/architecture/architecture.factor
+++ b/basis/cpu/architecture/architecture.factor
@ -1,7 +1,7 @@
 ! Copyright (C) 2006, 2009 Slava Pestov.
 ! See http://factorcode.org/license.txt for BSD license.
-USING: accessors arrays generic kernel kernel.private math
-memory namespaces make sequences layouts system hashtables
+USING: accessors arrays assocs generic kernel kernel.private
+math memory namespaces make sequences layouts system hashtables
 classes alien byte-arrays combinators words sets fry ;
 IN: cpu.architecture

@ -56,6 +56,18 @@ uint-4-rep
 longlong-2-rep
 ulonglong-2-rep ;

+UNION: signed-int-vector-rep
+char-16-rep
+short-8-rep
+int-4-rep
+longlong-2-rep ;
+
+UNION: unsigned-int-vector-rep
+uchar-16-rep
+ushort-8-rep
+uint-4-rep
+ulonglong-2-rep ;
+
 UNION: scalar-rep
 char-scalar-rep
 uchar-scalar-rep
@ -83,6 +95,18 @@ double-rep
 vector-rep
 scalar-rep ;

+: unsign-rep ( rep -- rep' )
+    {
+        { uint-4-rep           int-4-rep }
+        { ulonglong-2-rep      longlong-2-rep }
+        { ushort-8-rep         short-8-rep }
+        { uchar-16-rep         char-16-rep }
+        { uchar-scalar-rep     char-scalar-rep }
+        { ushort-scalar-rep    short-scalar-rep }
+        { uint-scalar-rep      int-scalar-rep }
+        { ulonglong-scalar-rep longlong-scalar-rep }
+    } ?at drop ;
+
 ! Register classes
 SINGLETONS: int-regs float-regs ;

@ -218,9 +242,16 @@ HOOK: %fill-vector cpu ( dst rep -- )
 HOOK: %gather-vector-2 cpu ( dst src1 src2 rep -- )
 HOOK: %gather-vector-4 cpu ( dst src1 src2 src3 src4 rep -- )
 HOOK: %shuffle-vector cpu ( dst src shuffle rep -- )
+HOOK: %tail>head-vector cpu ( dst src rep -- )
 HOOK: %merge-vector-head cpu ( dst src1 src2 rep -- )
 HOOK: %merge-vector-tail cpu ( dst src1 src2 rep -- )
-HOOK: %compare-vector cpu ( dst src1 src2 temp rep cc -- )
+HOOK: %signed-pack-vector cpu ( dst src1 src2 rep -- )
+HOOK: %unsigned-pack-vector cpu ( dst src1 src2 rep -- )
+HOOK: %unpack-vector-head cpu ( dst src rep -- )
+HOOK: %unpack-vector-tail cpu ( dst src rep -- )
+HOOK: %integer>float-vector cpu ( dst src rep -- )
+HOOK: %float>integer-vector cpu ( dst src rep -- )
+HOOK: %compare-vector cpu ( dst src1 src2 rep cc -- )
 HOOK: %test-vector cpu ( dst src1 temp rep vcc -- )
 HOOK: %test-vector-branch cpu ( label src1 temp rep vcc -- )
 HOOK: %add-vector cpu ( dst src1 src2 rep -- )
@ -259,7 +290,14 @@ HOOK: %gather-vector-2-reps cpu ( -- reps )
 HOOK: %gather-vector-4-reps cpu ( -- reps )
 HOOK: %shuffle-vector-reps cpu ( -- reps )
 HOOK: %merge-vector-reps cpu ( -- reps )
+HOOK: %signed-pack-vector-reps cpu ( -- reps )
+HOOK: %unsigned-pack-vector-reps cpu ( -- reps )
+HOOK: %unpack-vector-head-reps cpu ( -- reps )
+HOOK: %unpack-vector-tail-reps cpu ( -- reps )
+HOOK: %integer>float-vector-reps cpu ( -- reps )
+HOOK: %float>integer-vector-reps cpu ( -- reps )
 HOOK: %compare-vector-reps cpu ( cc -- reps )
+HOOK: %compare-vector-ccs cpu ( rep cc -- {cc,swap?}s not? )
 HOOK: %test-vector-reps cpu ( -- reps )
 HOOK: %add-vector-reps cpu ( -- reps )
 HOOK: %saturated-add-vector-reps cpu ( -- reps )
--- a/basis/cpu/ppc/ppc.factor
+++ b/basis/cpu/ppc/ppc.factor
@ -263,6 +263,11 @@ M: ppc %gather-vector-2-reps { } ;
 M: ppc %gather-vector-4-reps { } ;
 M: ppc %shuffle-vector-reps { } ;
 M: ppc %merge-vector-reps { } ;
+M: ppc %signed-pack-vector-reps { } ;
+M: ppc %unsigned-pack-vector-reps { } ;
+M: ppc %unpack-vector-reps { } ;
+M: ppc %integer>float-vector-reps { } ;
+M: ppc %float>integer-vector-reps { } ;
 M: ppc %compare-vector-reps drop { } ;
 M: ppc %test-vector-reps { } ;
 M: ppc %add-vector-reps { } ;
--- a/basis/cpu/x86/x86.factor
+++ b/basis/cpu/x86/x86.factor
@ -588,14 +588,6 @@ M: x86 %fill-vector-reps
        { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
    } available-reps ;

-: unsign-rep ( rep -- rep' )
-    {
-        { uint-4-rep      int-4-rep }
-        { ulonglong-2-rep longlong-2-rep }
-        { ushort-8-rep    short-8-rep }
-        { uchar-16-rep    char-16-rep }
-    } ?at drop ;
-
 ! M:: x86 %broadcast-vector ( dst src rep -- )
 !     rep unsign-rep {
 !         { float-4-rep [
@ -749,14 +741,81 @@ M: x86 %merge-vector-reps
        { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
    } available-reps ;

-:: compare-float-v-operands ( dst src1 src2 temp rep cc -- dst' src' rep cc' )
-    cc { cc> cc>= cc/> cc/>= } member?
-    [ dst src2 src1 rep two-operand rep cc swap-cc ]
-    [ dst src1 src2 rep two-operand rep cc         ] if ;
+M: x86 %signed-pack-vector
+    [ two-operand ] keep
+    {
+        { int-4-rep    [ PACKSSDW ] }
+        { short-8-rep  [ PACKSSWB ] }
+    } case ;
+
+M: x86 %signed-pack-vector-reps
+    {
+        { sse2? { short-8-rep int-4-rep } }
+    } available-reps ;
+
+M: x86 %unsigned-pack-vector
+    [ two-operand ] keep
+    unsign-rep {
+        { int-4-rep   [ PACKUSDW ] }
+        { short-8-rep [ PACKUSWB ] }
+    } case ;
+
+M: x86 %unsigned-pack-vector-reps
+    {
+        { sse2? { short-8-rep } }
+        { sse4.1? { int-4-rep } }
+    } available-reps ;
+
+M: x86 %tail>head-vector ( dst src rep -- )
+    dup {
+        { float-4-rep [ drop MOVHLPS ] }
+        { double-2-rep [ [ %copy ] [ drop UNPCKHPD ] 3bi ] }
+        [ drop [ %copy ] [ drop PUNPCKHQDQ ] 3bi ]
+    } case ;
+
+M: x86 %unpack-vector-head ( dst src rep -- )
+    {
+        { char-16-rep  [ PMOVSXBW ] }
+        { uchar-16-rep [ PMOVZXBW ] }
+        { short-8-rep  [ PMOVSXWD ] }
+        { ushort-8-rep [ PMOVZXWD ] }
+        { int-4-rep    [ PMOVSXDQ ] }
+        { uint-4-rep   [ PMOVZXDQ ] }
+        { float-4-rep  [ CVTPS2PD ] }
+    } case ;
+
+M: x86 %unpack-vector-head-reps ( -- reps )
+    {
+        { sse2? { float-4-rep } }
+        { sse4.1? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
+    } available-reps ;
+
+M: x86 %unpack-vector-tail-reps ( -- reps ) { } ;
+
+M: x86 %integer>float-vector ( dst src rep -- )
+    {
+        { int-4-rep [ CVTDQ2PS ] }
+    } case ;
+
+M: x86 %integer>float-vector-reps
+    {
+        { sse2? { int-4-rep } }
+    } available-reps ;
+
+M: x86 %float>integer-vector ( dst src rep -- )
+    {
+        { float-4-rep [ CVTTPS2DQ ] }
+    } case ;
+
+M: x86 %float>integer-vector-reps
+    {
+        { sse2? { float-4-rep } }
+    } available-reps ;
+
 : (%compare-float-vector) ( dst src rep double single -- )
    [ double-2-rep eq? ] 2dip if ; inline
-: %compare-float-vector ( dst src1 src2 temp rep cc -- )
-    compare-float-v-operands {
+: %compare-float-vector ( dst src rep cc -- )
+    {
        { cc<    [ [ CMPLTPD    ] [ CMPLTPS    ] (%compare-float-vector) ] }
        { cc<=   [ [ CMPLEPD    ] [ CMPLEPS    ] (%compare-float-vector) ] }
        { cc=    [ [ CMPEQPD    ] [ CMPEQPS    ] (%compare-float-vector) ] }
@ -767,16 +826,6 @@ M: x86 %merge-vector-reps
        { cc/<>= [ [ CMPUNORDPD ] [ CMPUNORDPS ] (%compare-float-vector) ] }
    } case ;

-:: compare-int-v-operands ( dst src1 src2 temp rep cc -- not-dst/f cmp-dst src' rep cc' )
-    cc order-cc :> occ
-    occ {
-        { cc=  [ f   dst  src1 src2 rep two-operand rep cc= ] }
-        { cc/= [ dst temp src1 src2 rep two-operand rep cc= ] }
-        { cc<= [ dst temp src1 src2 rep two-operand rep cc> ] }
-        { cc<  [ f   dst  src2 src1 rep two-operand rep cc> ] }
-        { cc>  [ f   dst  src1 src2 rep two-operand rep cc> ] }
-        { cc>= [ dst temp src2 src1 rep two-operand rep cc> ] }
-    } case ;
 :: (%compare-int-vector) ( dst src rep int64 int32 int16 int8 -- )
    rep unsign-rep :> rep'
    dst src rep' {
@ -785,15 +834,14 @@ M: x86 %merge-vector-reps
        { short-8-rep    [ int16 call ] }
        { char-16-rep    [ int8  call ] }
    } case ; inline
-:: %compare-int-vector ( dst src1 src2 temp rep cc -- )
-    dst src1 src2 temp rep cc compare-int-v-operands :> cc' :> rep :> src' :> cmp-dst :> not-dst
-    cmp-dst src' rep cc' {
+: %compare-int-vector ( dst src rep cc -- )
+    {
        { cc= [ [ PCMPEQQ ] [ PCMPEQD ] [ PCMPEQW ] [ PCMPEQB ] (%compare-int-vector) ] }
        { cc> [ [ PCMPGTQ ] [ PCMPGTD ] [ PCMPGTW ] [ PCMPGTB ] (%compare-int-vector) ] }
-    } case
-    not-dst [ cmp-dst rep %not-vector ] when* ;
+    } case ;

-M: x86 %compare-vector ( dst src1 src2 temp rep cc -- )
+M: x86 %compare-vector ( dst src1 src2 rep cc -- )
+    [ [ two-operand ] keep ] dip
    over float-vector-rep?
    [ %compare-float-vector ]
    [ %compare-int-vector ] if ;
@ -804,11 +852,6 @@ M: x86 %compare-vector ( dst src1 src2 temp rep cc -- )
        { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
        { sse4.1? { longlong-2-rep ulonglong-2-rep } }
    } available-reps ;
-: %compare-vector-unord-reps ( -- reps )
-    {
-        { sse? { float-4-rep } }
-        { sse2? { double-2-rep } }
-    } available-reps ;
 : %compare-vector-ord-reps ( -- reps )
    {
        { sse? { float-4-rep } }
@ -819,10 +862,44 @@ M: x86 %compare-vector ( dst src1 src2 temp rep cc -- )
 M: x86 %compare-vector-reps
    {
        { [ dup { cc= cc/= } memq? ] [ drop %compare-vector-eq-reps ] }
-        { [ dup { cc<>= cc/<>= } memq? ] [ drop %compare-vector-unord-reps ] }
        [ drop %compare-vector-ord-reps ]
    } cond ;

+: %compare-float-vector-ccs ( cc -- ccs not? )
+    {
+        { cc<    [ { { cc<  f   }              } f ] }
+        { cc<=   [ { { cc<= f   }              } f ] }
+        { cc>    [ { { cc<  t   }              } f ] }
+        { cc>=   [ { { cc<= t   }              } f ] }
+        { cc=    [ { { cc=  f   }              } f ] }
+        { cc<>   [ { { cc<  f   } { cc<    t } } f ] }
+        { cc<>=  [ { { cc<>= f  }              } f ] }
+        { cc/<   [ { { cc/<  f  }              } f ] }
+        { cc/<=  [ { { cc/<= f  }              } f ] }
+        { cc/>   [ { { cc/<  t  }              } f ] }
+        { cc/>=  [ { { cc/<= t  }              } f ] }
+        { cc/=   [ { { cc/=  f  }              } f ] }
+        { cc/<>  [ { { cc/=  f  } { cc/<>= f } } f ] }
+        { cc/<>= [ { { cc/<>= f }              } f ] }
+    } case ;
+
+: %compare-int-vector-ccs ( cc -- ccs not? )
+    order-cc {
+        { cc<    [ { { cc> t } } f ] }
+        { cc<=   [ { { cc> f } } t ] }
+        { cc>    [ { { cc> f } } f ] }
+        { cc>=   [ { { cc> t } } t ] }
+        { cc=    [ { { cc= f } } f ] }
+        { cc/=   [ { { cc= f } } t ] }
+        { t      [ {           } t ] }
+        { f      [ {           } f ] }
+    } case ;
+
+M: x86 %compare-vector-ccs
+    swap float-vector-rep?
+    [ %compare-float-vector-ccs ]
+    [ %compare-int-vector-ccs ] if ;
+
 :: %test-vector-mask ( dst temp mask vcc -- )
    vcc {
        { vcc-any    [ dst dst TEST dst temp \ CMOVNE %boolean ] }
@ -1146,15 +1223,7 @@ M: x86 %xor-vector-reps
        { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
    } available-reps ;

-M:: x86 %not-vector ( dst src rep -- )
-    dst rep %fill-vector
-    dst dst src rep %xor-vector ;
-
-M: x86 %not-vector-reps
-    {
-        { sse? { float-4-rep } }
-        { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
-    } available-reps ;
+M: x86 %not-vector-reps { } ;

 M: x86 %shl-vector ( dst src1 src2 rep -- )
    [ two-operand ] keep
--- a/basis/images/png/png.factor
+++ b/basis/images/png/png.factor
@ -26,6 +26,9 @@ CONSTANT: indexed-color 3
 CONSTANT: greyscale-alpha 4
 CONSTANT: truecolor-alpha 6

+CONSTANT: interlace-none 0
+CONSTANT: interlace-adam7 1
+
 : <loading-png> ( -- image )
    loading-png new
    V{ } clone >>chunks ;
@ -86,8 +89,8 @@ ERROR: unimplemented-color-type image ;

 : png-bytes-per-pixel ( loading-png -- n )
    dup color-type>> {
-        { 2 [ scale-bit-depth 3 * ] }
-        { 6 [ scale-bit-depth 4 * ] }
+        { truecolor [ scale-bit-depth 3 * ] }
+        { truecolor-alpha [ scale-bit-depth 4 * ] }
        [ unknown-color-type ]
    } case ; inline

@ -118,20 +121,41 @@ ERROR: unimplemented-color-type image ;
    lines dup first length 0 <array> prefix
    [ n 1 - 0 <array> prepend ] map
    2 clump [
-        n swap first2 [ ] [ n 1 - swap nth ] [ [ 0 n 1 - ] dip set-nth ] tri
+        n swap first2
+        [ ]
+        [ n 1 - swap nth ]
+        [ [ 0 n 1 - ] dip set-nth ] tri
        png-unfilter-line
    ] map B{ } concat-as ;

+ERROR: unimplemented-interlace ;
+
+: reverse-interlace ( byte-array loading-png -- byte-array )
+    {
+        { interlace-none [ ] }
+        { interlace-adam7 [ unimplemented-interlace ] }
+        [ unimplemented-interlace ]
+    } case ;
+
 : png-image-bytes ( loading-png -- byte-array )
    [ png-bytes-per-pixel ]
-    [ inflate-data ]
+    [ [ inflate-data ] [ interlace-method>> ] bi reverse-interlace ]
    [ png-group-width ] tri group reverse-png-filter ;

+ERROR: unknown-component-type n ;
+
+: png-component ( loading-png -- obj )
+    bit-depth>> {
+        { 8 [ ubyte-components ] }
+        { 16 [ ushort-components ] }
+        [ unknown-component-type ]
+    } case ;
+
 : loading-png>image ( loading-png -- image )
    [ image new ] dip {
        [ png-image-bytes >>bitmap ]
        [ [ width>> ] [ height>> ] bi 2array >>dim ]
-        [ drop ubyte-components >>component-type ]
+        [ png-component >>component-type ]
    } cleave ;

 : decode-greyscale ( loading-png -- image )
--- a/basis/io/streams/limited/limited-docs.factor
+++ b/basis/io/streams/limited/limited-docs.factor
@ -88,7 +88,7 @@ HELP: stream-throws
 { stream-eofs stream-throws } related-words

 ARTICLE: "io.streams.limited" "Limited input streams"
-"The " { $vocab-link "io.streams.limited" } " vocabulary wraps a stream to behave as if it had only a limited number of bytes, either throwing an error or returning " { $link f } " upon reaching the end." $nl
+"The " { $vocab-link "io.streams.limited" } " vocabulary wraps a stream to behave as if it had only a limited number of bytes, either throwing an error or returning " { $link f } " upon reaching the end. Limiting a non-seekable stream keeps a byte count and triggers the end-of-stream behavior when this byte count has been reached. However, limiting a seekable stream creates a window of bytes that supports seeking and re-reading of bytes in that window." $nl
 "Wrap a stream in a limited stream:"
 { $subsections limit }
 "Wrap the current " { $link input-stream } " in a limited stream:"
--- a/basis/math/bitwise/bitwise-docs.factor
+++ b/basis/math/bitwise/bitwise-docs.factor
@ -1,6 +1,6 @@
 ! Copyright (C) 2008 Doug Coleman.
 ! See http://factorcode.org/license.txt for BSD license.
-USING: assocs help.markup help.syntax math sequences ;
+USING: assocs help.markup help.syntax math sequences kernel ;
 IN: math.bitwise

 HELP: bitfield
@ -67,17 +67,21 @@ HELP: bit-clear?

 HELP: bit-count
 { $values
-     { "x" integer }
+     { "obj" object }
     { "n" integer }
 }
-{ $description "Returns the number of set bits as an integer." }
+{ $description "Returns the number of set bits as an object. This word only works on non-negative integers or objects that can be represented as a byte-array." }
 { $examples 
    { $example "USING: math.bitwise prettyprint ;"
               "HEX: f0 bit-count ."
               "4"
    }
    { $example "USING: math.bitwise prettyprint ;"
-               "-7 bit-count ."
+               "-1 32 bits bit-count ."
+               "32"
+    }
+    { $example "USING: math.bitwise prettyprint ;"
+               "B{ 1 0 1 } bit-count ."
               "2"
    }
 } ;
@ -206,6 +210,20 @@ HELP: mask?
    }
 } ;

+HELP: even-parity?
+{ $values
+    { "obj" object }
+    { "?" boolean }
+}
+{ $description "Returns true if the number of set bits in an object is even." } ;
+
+HELP: odd-parity?
+{ $values
+    { "obj" object }
+    { "?" boolean }
+}
+{ $description "Returns true if the number of set bits in an object is odd." } ;
+
 HELP: on-bits
 { $values
     { "n" integer }
@ -368,6 +386,8 @@ $nl
 { $subsections on-bits }
 "Counting the number of set bits:"
 { $subsections bit-count }
+"Testing the parity of an object:"
+{ $subsections even-parity? odd-parity? }
 "More efficient modding by powers of two:"
 { $subsections wrap }
 "Bit-rolling:"
--- a/basis/math/bitwise/bitwise-tests.factor
+++ b/basis/math/bitwise/bitwise-tests.factor
@ -1,4 +1,7 @@
-USING: accessors math math.bitwise tools.test kernel words ;
+USING: accessors math math.bitwise tools.test kernel words
+specialized-arrays alien.c-types math.vectors.simd
+sequences destructors libc ;
+SPECIALIZED-ARRAY: int
 IN: math.bitwise.tests

 [ 0 ] [ 1 0 0 bitroll ] unit-test
@ -37,3 +40,23 @@ CONSTANT: b 2
 [ 4 ] [ BIN: 1010101 bit-count ] unit-test
 [ 0 ] [ BIN: 0 bit-count ] unit-test
 [ 1 ] [ BIN: 1 bit-count ] unit-test
+
+SIMD: uint
+SPECIALIZED-ARRAY: uint
+SPECIALIZED-ARRAY: uint-4
+
+[ 1 ] [ uint-4{ 1 0 0 0 } bit-count ] unit-test
+
+[ 1 ] [
+    [
+        2 malloc-int-array &free 1 0 pick set-nth bit-count
+    ] with-destructors
+] unit-test
+
+[ 1 ] [ B{ 1 0 0 } bit-count ] unit-test
+[ 3 ] [ B{ 1 1 1 } bit-count ] unit-test
+
+[ t ] [ BIN: 0 even-parity? ] unit-test
+[ f ] [ BIN: 1 even-parity? ] unit-test
+[ f ] [ BIN: 0 odd-parity? ] unit-test
+[ t ] [ BIN: 1 odd-parity? ] unit-test
--- a/basis/math/bitwise/bitwise.factor
+++ b/basis/math/bitwise/bitwise.factor
@ -1,7 +1,9 @@
 ! Copyright (C) 2007, 2008 Slava Pestov, Doug Coleman.
 ! See http://factorcode.org/license.txt for BSD license.
 USING: arrays assocs combinators combinators.smart fry kernel
-macros math math.bits sequences sequences.private words ;
+macros math math.bits sequences sequences.private words
+byte-arrays alien alien.c-types specialized-arrays ;
+SPECIALIZED-ARRAY: uchar
 IN: math.bitwise

 ! utilities
@ -84,24 +86,36 @@ DEFER: byte-bit-count
 GENERIC: (bit-count) ( x -- n )

 M: fixnum (bit-count)
-    [
-        {
-            [           byte-bit-count ]
-            [ -8  shift byte-bit-count ]
-            [ -16 shift byte-bit-count ]
-            [ -24 shift byte-bit-count ]
-        } cleave
-    ] sum-outputs ;
+    0 swap [
+        dup 0 >
+    ] [
+        [ 8 bits byte-bit-count ] [ -8 shift ] bi
+        [ + ] dip
+    ] while drop ;

 M: bignum (bit-count)
    dup 0 = [ drop 0 ] [
        [ byte-bit-count ] [ -8 shift (bit-count) ] bi +
    ] if ;

+: byte-array-bit-count ( byte-array -- n )
+    0 [ byte-bit-count + ] reduce ;
+
 PRIVATE>

-: bit-count ( x -- n )
-    dup 0 < [ bitnot ] when (bit-count) ; inline
+ERROR: invalid-bit-count-target object ;
+
+GENERIC: bit-count ( obj -- n )
+
+M: integer bit-count
+    dup 0 < [ invalid-bit-count-target ] when (bit-count) ; inline
+
+M: byte-array bit-count
+    byte-array-bit-count ;
+
+M: object bit-count
+    [ >c-ptr ] [ byte-length ] bi <direct-uchar-array>
+    byte-array-bit-count ;

 : >signed ( x n -- y )
    2dup neg 1 + shift 1 = [ 2^ - ] [ drop ] if ;
@ -113,3 +127,7 @@ PRIVATE>
 : next-even ( m -- n ) >even 2 + ; foldable

 : next-odd ( m -- n ) dup even? [ 1 + ] [ 2 + ] if ; foldable
+
+: even-parity? ( obj -- ? ) bit-count even? ;
+
+: odd-parity? ( obj -- ? ) bit-count odd? ;
--- a/basis/math/vectors/conversion/authors.txt
+++ b/basis/math/vectors/conversion/authors.txt
@ -0,0 +1 @@
+Joe Groff
--- a/basis/math/vectors/conversion/backend/backend.factor
+++ b/basis/math/vectors/conversion/backend/backend.factor
@ -0,0 +1,21 @@
+! (c)Joe Groff bsd license
+USING: accessors alien.c-types arrays assocs classes combinators
+cords fry kernel math math.vectors sequences ;
+IN: math.vectors.conversion.backend
+
+: saturate-map-as ( v quot result -- w )
+    [ element-type '[ @ _ c-type-clamp ] ] keep map-as ; inline
+
+: (v>float) ( i to-type -- f )
+    [ >float ] swap new map-as ;
+: (v>integer) ( f to-type -- i )
+    [ >integer ] swap new map-as ;
+: (vpack-signed) ( a b to-type -- ab )
+    [ cord-append [ ] ] dip new saturate-map-as ;
+: (vpack-unsigned) ( a b to-type -- ab )
+    [ cord-append [ ] ] dip new saturate-map-as ;
+: (vunpack-head) ( ab to-type -- a )
+    [ dup length 2 /i head-slice ] dip new like ;
+: (vunpack-tail) ( ab to-type -- b )
+    [ dup length 2 /i tail-slice ] dip new like ;
+
--- a/basis/math/vectors/conversion/conversion-tests.factor
+++ b/basis/math/vectors/conversion/conversion-tests.factor
@ -0,0 +1,149 @@
+! (c)Joe Groff bsd license
+USING: accessors arrays compiler continuations generalizations
+kernel kernel.private locals math.vectors.conversion math.vectors.simd
+sequences stack-checker tools.test ;
+FROM: alien.c-types => char uchar short ushort int uint longlong ulonglong float double ;
+SIMD: uchar
+SIMD: char
+SIMD: ushort
+SIMD: short
+SIMD: uint
+SIMD: int
+SIMD: ulonglong
+SIMD: longlong
+SIMD: float
+SIMD: double
+IN: math.vectors.conversion.tests
+
+ERROR: optimized-vconvert-inconsistent
+    unoptimized-result
+    optimized-result ;
+
+MACRO:: test-vconvert ( from-type to-type -- )
+    [ from-type to-type vconvert ] :> quot
+    quot infer :> effect
+    effect in>> length :> inputs
+    effect out>> length :> outputs
+
+    inputs from-type <array> :> declaration
+
+    [
+        inputs narray
+        [ quot with-datastack ]
+        [ [ [ declaration declare quot call ] compile-call ] with-datastack ] bi
+        2dup = [ optimized-vconvert-inconsistent ] unless
+        drop outputs firstn
+    ] ;
+
+[ uint-4{ 5 1 2 6 } int-4 float-4 vconvert ]
+[ bad-vconvert-input? ] must-fail-with
+
+[ int-4{ 1 2 3 4 } uint-4{ 5 1 2 6 } int-4 short-8 vconvert ]
+[ bad-vconvert-input? ] must-fail-with
+
+[ uint-4{ 1 2 3 4 } int-4{ 5 1 2 6 } int-4 short-8 vconvert ]
+[ bad-vconvert-input? ] must-fail-with
+
+[ uint-4{ 5 1 2 6 } int-4 longlong-2 vconvert ]
+[ bad-vconvert-input? ] must-fail-with
+
+[ float-4{ -5.0 1.0 2.0 6.0 } ]
+[ int-4{ -5 1 2 6 } int-4 float-4 test-vconvert ] unit-test
+
+[ int-4{ -5 1 2 6 } ]
+[ float-4{ -5.0 1.0 2.0 6.0 } float-4 int-4 test-vconvert ] unit-test
+
+[ int-4{ -5 1 2 6 } ]
+[ float-4{ -5.0 1.0 2.3 6.7 } float-4 int-4 test-vconvert ] unit-test
+
+[ double-2{ -5.0 1.0 } ]
+[ longlong-2{ -5 1 } longlong-2 double-2 test-vconvert ] unit-test
+
+[ longlong-4{ -5 1 2 6 } ]
+[ double-4{ -5.0 1.0 2.3 6.7 } double-4 longlong-4 test-vconvert ] unit-test
+
+! TODO we should be able to do double->int pack
+! [ int-8{ -5 1 2 6 12 34 -56 78 } ]
+[ double-4{ -5.0 1.0 2.0 6.0 } double-4{ 12.0 34.0 -56.0 78.0 } double-4 int-8 test-vconvert ]
+[ error>> bad-vconvert? ] must-fail-with
+
+[ float-4{ -1.25 2.0 3.0 -4.0 } ]
+[ double-2{ -1.25 2.0 } double-2{ 3.0 -4.0 } double-2 float-4 test-vconvert ] unit-test
+
+[ int-4{ -1 2 3 -4 } ]
+[ longlong-2{ -1 2 } longlong-2{ 3 -4 } longlong-2 int-4 test-vconvert ] unit-test
+
+[ short-8{ -1 2 3 -32768 5 32767 -7 32767 } ]
+[ int-4{ -1 2 3 -40000 } int-4{ 5 60000 -7 80000 } int-4 short-8 test-vconvert ] unit-test
+
+[ short-16{ -1 2 3 -32768 3 2 1 0 5 32767 -7 32767 7 6 5 4 } ]
+[
+    int-8{ -1 2 3 -40000 3 2 1 0 }
+    int-8{ 5 60000 -7 80000 7 6 5 4 } int-8 short-16 test-vconvert
+] unit-test
+
+[ ushort-8{ 0 2 3 0 5 60000 0 65535 } ]
+[ int-4{ -1 2 3 -40000 } int-4{ 5 60000 -7 80000 } int-4 ushort-8 test-vconvert ] unit-test
+
+[ ushort-8{ 65535 2 3 65535 5 60000 65535 65535 } ]
+[ uint-4{ -1 2 3 -40000 } uint-4{ 5 60000 -7 80000 } uint-4 ushort-8 test-vconvert ] unit-test
+
+[ uint-4{ -1 2 3 -40000 } uint-4{ 5 60000 -7 80000 } uint-4 short-8 test-vconvert ]
+[ error>> bad-vconvert? ] must-fail-with
+
+! TODO we should be able to do 256->128 pack
+! [ float-4{ -1.25 2.0 3.0 -4.0 } ]
+[ double-4{ -1.25 2.0 3.0 -4.0 } double-4 float-4 test-vconvert ]
+[ error>> bad-vconvert? ] must-fail-with
+
+! [ int-4{ -1 2 3 -4 } ]
+[ longlong-4{ -1 2 3 -4 } longlong-4 int-4 test-vconvert ]
+[ error>> bad-vconvert? ] must-fail-with
+
+[ double-2{ -1.25 2.0 } double-2{ 3.0 -4.0 } ]
+[ float-4{ -1.25 2.0 3.0 -4.0 } float-4 double-2 test-vconvert ] unit-test
+
+[ int-4{ -1 2 3 -4 } ]
+[ int-4{ -1 2 3 -4 } int-4 int-4 test-vconvert ] unit-test
+
+[ longlong-2{ -1 2 } longlong-2{ 3 -4 } ]
+[ int-4{ -1 2 3 -4 } int-4 longlong-2 test-vconvert ] unit-test
+
+[ int-4{ -1 2 3 -4 } int-4 ulonglong-2 test-vconvert ]
+[ error>> bad-vconvert? ] must-fail-with
+
+[ ulonglong-2{ 1 2 } ulonglong-2{ 3 4 } ]
+[ uint-4{ 1 2 3 4 } uint-4 ulonglong-2 test-vconvert ] unit-test
+
+[ longlong-4{ 1 2 3 4 } longlong-4{ 3 4 5 6 } ]
+[ uint-8{ 1 2 3 4 3 4 5 6 } uint-8 longlong-4 test-vconvert ] unit-test
+
+[ int-4{ 1 2 -3 -4 } int-4{ 5 -6 7 -8 } ]
+[ short-8{ 1 2 -3 -4 5 -6 7 -8 } short-8 int-4 test-vconvert ] unit-test
+
+[ uint-4{ 1 2 3 4 } uint-4{ 5 6 7 8 } ]
+[ ushort-8{ 1 2 3 4 5 6 7 8 } ushort-8 uint-4 test-vconvert ] unit-test
+
+[ longlong-4{ 1 2 3 4 } longlong-4{ 3 4 5 6 } ]
+[ uint-8{ 1 2 3 4 3 4 5 6 } uint-8 longlong-4 test-vconvert ] unit-test
+
+! TODO we should be able to do 128->256 unpack
+! [ longlong-4{ 1 2 3 4 } ]
+[ uint-4{ 1 2 3 4 } uint-4 longlong-4 test-vconvert ]
+[ error>> bad-vconvert? ] must-fail-with
+
+! TODO we should be able to do multi-tier pack/unpack
+! [ longlong-2{ 1 2 } longlong-2{ 3 4 } longlong-2{ 5 6 } longlong-2{ 7 8 } ]
+[ ushort-8{ 1 2 3 4 5 6 7 8 } ushort-8 longlong-2 test-vconvert ]
+[ error>> bad-vconvert? ] must-fail-with
+
+! [ ushort-8{ 1 2 3 4 5 6 7 8 } ]
+[
+    longlong-2{ 1 2 }
+    longlong-2{ 3 4 }
+    longlong-2{ 5 6 }
+    longlong-2{ 7 8 }
+    longlong-2 ushort-8 test-vconvert
+]
+[ error>> bad-vconvert? ] must-fail-with
+
--- a/basis/math/vectors/conversion/conversion.factor
+++ b/basis/math/vectors/conversion/conversion.factor
@ -0,0 +1,83 @@
+! (c)Joe Groff bsd license
+USING: accessors alien.c-types arrays assocs classes combinators
+combinators.short-circuit cords fry kernel locals math
+math.vectors math.vectors.conversion.backend sequences ;
+FROM: alien.c-types => char uchar short ushort int uint longlong ulonglong float double ;
+IN: math.vectors.conversion
+
+ERROR: bad-vconvert from-type to-type ;
+ERROR: bad-vconvert-input value expected-type ;
+
+<PRIVATE
+
+: float-type? ( c-type -- ? )
+    { float double } memq? ;
+: unsigned-type? ( c-type -- ? )
+    { uchar ushort uint ulonglong } memq? ;
+
+: check-vconvert-type ( value expected-type -- value )
+    2dup instance? [ drop ] [ bad-vconvert-input ] if ; inline
+
+:: [vconvert] ( from-element to-element from-size to-size from-type to-type -- quot )
+    {
+        {
+            [ from-element to-element eq? ]
+            [ [ ] ]
+        }
+        {
+            [ from-element to-element [ float-type? not ] both? ]
+            [ [ underlying>> to-type boa ] ]
+        }
+        {
+            [ from-element float-type? ]
+            [ [ to-type (v>integer) ] ]
+        }
+        {
+            [ to-element   float-type? ]
+            [ [ to-type (v>float)   ] ]
+        }
+    } cond
+    [ from-type check-vconvert-type ] prepose ;
+
+:: [vpack] ( from-element to-element from-size to-size from-type to-type -- quot )
+    from-size to-size /i log2 :> steps
+
+    {
+        [ steps 1 = not ]
+        [ from-element to-element [ float-type? ] bi@ xor ]
+        [ from-element unsigned-type? to-element unsigned-type? not and ]
+    } 0|| [ from-type to-type bad-vconvert ] when
+
+    to-element unsigned-type? [ to-type (vpack-unsigned) ] [ to-type (vpack-signed) ] ?
+    [ [ from-type check-vconvert-type ] bi@ ] prepose ;
+
+:: [vunpack] ( from-element to-element from-size to-size from-type to-type -- quot )
+    to-size from-size /i log2 :> steps
+
+    {
+        [ steps 1 = not ]
+        [ from-element to-element [ float-type? ] bi@ xor ]
+        [ from-element unsigned-type? not to-element unsigned-type? and ]
+    } 0|| [ from-type to-type bad-vconvert ] when
+
+    [
+        from-type check-vconvert-type
+        [ to-type (vunpack-head) ] [ to-type (vunpack-tail) ] bi
+    ] ;
+
+PRIVATE>
+
+MACRO:: vconvert ( from-type to-type -- )
+    from-type new [ element-type ] [ byte-length ] bi :> from-length :> from-element
+    to-type   new [ element-type ] [ byte-length ] bi :> to-length   :> to-element
+    from-element heap-size :> from-size
+    to-element   heap-size :> to-size   
+
+    from-length to-length = [ from-type to-type bad-vconvert ] unless
+
+    from-element to-element from-size to-size from-type to-type {
+        { [ from-size to-size < ] [ [vunpack] ] }
+        { [ from-size to-size = ] [ [vconvert] ] }
+        { [ from-size to-size > ] [ [vpack] ] }
+    } cond ;
+
--- a/basis/math/vectors/conversion/summary.txt
+++ b/basis/math/vectors/conversion/summary.txt
@ -0,0 +1 @@
+Conversion, packing, and unpacking of SIMD vectors
--- a/basis/math/vectors/simd/functor/functor.factor
+++ b/basis/math/vectors/simd/functor/functor.factor
@ -3,6 +3,7 @@
 USING: accessors assocs byte-arrays classes classes.algebra effects fry
 functors generalizations kernel literals locals math math.functions
 math.vectors math.vectors.private math.vectors.simd.intrinsics
+math.vectors.conversion.backend
 math.vectors.specialization parser prettyprint.custom sequences
 sequences.private strings words definitions macros cpu.architecture
 namespaces arrays quotations combinators combinators.short-circuit sets
@ -174,6 +175,8 @@ A-vn->v-op   DEFINES-PRIVATE ${A}-vn->v-op
 A-vv->n-op   DEFINES-PRIVATE ${A}-vv->n-op
 A-v->v-op    DEFINES-PRIVATE ${A}-v->v-op
 A-v->n-op    DEFINES-PRIVATE ${A}-v->n-op
+A-v-conversion-op DEFINES-PRIVATE ${A}-v-conversion-op
+A-vv-conversion-op DEFINES-PRIVATE ${A}-vv-conversion-op

 A-element-class [ A-rep rep-component-type c:c-type-boxed-class ]

@ -251,10 +254,29 @@ INSTANCE: A sequence
 : A-v->n-op ( v quot -- n )
    [ underlying>> A-rep ] dip call ; inline

+: A-v-conversion-op ( v1 to-type quot -- v2 )
+    swap [ underlying>> A-rep ] [ call ] [ '[ _ boa ] call( u -- v ) ] tri* ; inline
+
+: A-vv-conversion-op ( v1 v2 to-type quot -- v2 )
+    swap {
+        [ underlying>> ]
+        [ underlying>> A-rep ]
+        [ call ]
+        [ '[ _ boa ] call( u -- v ) ]
+    } spread ; inline
+
 simd new
    \ A >>class
    \ A-with >>ctor
    \ A-rep >>rep
+    {
+        { (v>float) A-v-conversion-op }
+        { (v>integer) A-v-conversion-op }
+        { (vpack-signed) A-vv-conversion-op }
+        { (vpack-unsigned) A-vv-conversion-op }
+        { (vunpack-head) A-v-conversion-op }
+        { (vunpack-tail) A-v-conversion-op }
+    } >>special-wrappers
    {
        { { +vector+ +vector+ -> +vector+ } A-vv->v-op }
        { { +vector+ +scalar+ -> +vector+ } A-vn->v-op }
@ -327,6 +349,10 @@ A-vany-op    DEFINES-PRIVATE ${A}-vany-op
 A-vall-op    DEFINES-PRIVATE ${A}-vall-op
 A-vmerge-head-op    DEFINES-PRIVATE ${A}-vmerge-head-op
 A-vmerge-tail-op    DEFINES-PRIVATE ${A}-vmerge-tail-op
+A-v-conversion-op   DEFINES-PRIVATE ${A}-v-conversion-op
+A-vpack-op          DEFINES-PRIVATE ${A}-vpack-op
+A-vunpack-head-op   DEFINES-PRIVATE ${A}-vunpack-head-op
+A-vunpack-tail-op   DEFINES-PRIVATE ${A}-vunpack-tail-op

 WHERE

@ -424,16 +450,41 @@ INSTANCE: A sequence
 : A-vmerge-head-op ( v1 v2 quot -- v )
    drop
    [ underlying1>> ] bi@
-    [ A-rep (simd-vmerge-head) ]
-    [ A-rep (simd-vmerge-tail) ] 2bi
-    \ A boa ;
+    [ A-rep (simd-(vmerge-head)) ]
+    [ A-rep (simd-(vmerge-tail)) ] 2bi
+    \ A boa ; inline
    
 : A-vmerge-tail-op ( v1 v2 quot -- v )
    drop
    [ underlying2>> ] bi@
-    [ A-rep (simd-vmerge-head) ]
-    [ A-rep (simd-vmerge-tail) ] 2bi
-    \ A boa ;
+    [ A-rep (simd-(vmerge-head)) ]
+    [ A-rep (simd-(vmerge-tail)) ] 2bi
+    \ A boa ; inline
+
+: A-v-conversion-op ( v1 to-type quot -- v )
+    swap [ 
+        [ [ underlying1>> A-rep ] dip call ]
+        [ [ underlying2>> A-rep ] dip call ] 2bi
+    ] dip '[ _ boa ] call( u1 u2 -- v ) ; inline
+
+: A-vpack-op ( v1 v2 to-type quot -- v )
+    swap [ 
+        '[ [ underlying1>> ] [ underlying2>> ] bi A-rep @ ] bi*
+    ] dip '[ _ boa ] call( u1 u2 -- v ) ; inline
+
+: A-vunpack-head-op ( v1 to-type quot -- v )
+    '[
+        underlying1>>
+        [ A-rep @ ]
+        [ A-rep (simd-(vunpack-tail)) ] bi
+    ] dip '[ _ boa ] call( u1 u2 -- v ) ; inline
+
+: A-vunpack-tail-op ( v1 to-type quot -- v )
+    '[
+        underlying2>>
+        [ A-rep (simd-(vunpack-head)) ]
+        [ A-rep @ ] bi
+    ] dip '[ _ boa ] call( u1 u2 -- v ) ; inline

 simd new
    \ A >>class
@ -445,8 +496,14 @@ simd new
        { vnone? A-vany-op }
        { vany?  A-vany-op }
        { vall?  A-vall-op }
-        { vmerge-head A-vmerge-head-op }
-        { vmerge-tail A-vmerge-tail-op }
+        { (vmerge-head) A-vmerge-head-op }
+        { (vmerge-tail) A-vmerge-tail-op }
+        { (v>integer) A-v-conversion-op }
+        { (v>float) A-v-conversion-op }
+        { (vpack-signed) A-vpack-op }
+        { (vpack-unsigned) A-vpack-op }
+        { (vunpack-head) A-vunpack-head-op }
+        { (vunpack-tail) A-vunpack-tail-op }
    } >>special-wrappers
    {
        { { +vector+ +vector+ -> +vector+ } A-vv->v-op }
--- a/basis/math/vectors/simd/intrinsics/intrinsics.factor
+++ b/basis/math/vectors/simd/intrinsics/intrinsics.factor
@ -2,7 +2,9 @@
 ! See http://factorcode.org/license.txt for BSD license.
 USING: alien alien.c-types alien.data assocs combinators
 cpu.architecture compiler.cfg.comparisons fry generalizations
-kernel libc macros math sequences effects accessors namespaces
+kernel libc macros math
+math.vectors.conversion.backend
+sequences sets effects accessors namespaces
 lexer parser vocabs.parser words arrays math.vectors ;
 IN: math.vectors.simd.intrinsics

@ -12,17 +14,27 @@ ERROR: bad-simd-call ;

 : simd-effect ( word -- effect )
    stack-effect [ in>> "rep" suffix ] [ out>> ] bi <effect> ;
+: simd-conversion-effect ( word -- effect )
+    stack-effect [ in>> but-last "rep" suffix ] [ out>> ] bi <effect> ;

 SYMBOL: simd-ops

 V{ } clone simd-ops set-global

-SYNTAX: SIMD-OP:
-    scan-word dup name>> "(simd-" ")" surround create-in
-    [ nip [ bad-simd-call ] define ]
-    [ [ simd-effect ] dip set-stack-effect ]
+: (SIMD-OP:) ( accum quot -- accum )
+    [
+        scan-word dup name>> "(simd-" ")" surround create-in
+        [ nip [ bad-simd-call ] define ]
+    ] dip
+    '[ _ dip set-stack-effect ]
    [ 2array simd-ops get push ]
-    2tri ;
+    2tri ; inline
+
+SYNTAX: SIMD-OP:
+    [ simd-effect ] (SIMD-OP:) ;
+
+SYNTAX: SIMD-CONVERSION-OP:
+    [ simd-conversion-effect ] (SIMD-OP:) ;

 >>

@ -55,8 +67,8 @@ SIMD-OP: vrshift
 SIMD-OP: hlshift
 SIMD-OP: hrshift
 SIMD-OP: vshuffle
-SIMD-OP: vmerge-head
-SIMD-OP: vmerge-tail
+SIMD-OP: (vmerge-head)
+SIMD-OP: (vmerge-tail)
 SIMD-OP: v<=
 SIMD-OP: v<
 SIMD-OP: v=
@ -67,6 +79,13 @@ SIMD-OP: vany?
 SIMD-OP: vall?
 SIMD-OP: vnone?

+SIMD-CONVERSION-OP: (v>float)
+SIMD-CONVERSION-OP: (v>integer)
+SIMD-CONVERSION-OP: (vpack-signed)
+SIMD-CONVERSION-OP: (vpack-unsigned)
+SIMD-CONVERSION-OP: (vunpack-head)
+SIMD-CONVERSION-OP: (vunpack-tail)
+
 : (simd-with) ( x rep -- v ) bad-simd-call ;
 : (simd-gather-2) ( a b rep -- v ) bad-simd-call ;
 : (simd-gather-4) ( a b c d rep -- v ) bad-simd-call ;
@ -118,48 +137,58 @@ MACRO: (simd-boa) ( rep -- quot )

 GENERIC# supported-simd-op? 1 ( rep intrinsic -- ? )

+: (%unpack-reps) ( -- reps )
+    %merge-vector-reps [ int-vector-rep? ] filter
+    %unpack-vector-head-reps union ;
+
 M: vector-rep supported-simd-op?
    {
-        { \ (simd-v+)           [ %add-vector-reps            ] }
-        { \ (simd-vs+)          [ %saturated-add-vector-reps  ] }
-        { \ (simd-v+-)          [ %add-sub-vector-reps        ] }
-        { \ (simd-v-)           [ %sub-vector-reps            ] }
-        { \ (simd-vs-)          [ %saturated-sub-vector-reps  ] }
-        { \ (simd-v*)           [ %mul-vector-reps            ] }
-        { \ (simd-vs*)          [ %saturated-mul-vector-reps  ] }
-        { \ (simd-v/)           [ %div-vector-reps            ] }
-        { \ (simd-vmin)         [ %min-vector-reps            ] }
-        { \ (simd-vmax)         [ %max-vector-reps            ] }
-        { \ (simd-v.)           [ %dot-vector-reps            ] }
-        { \ (simd-vsqrt)        [ %sqrt-vector-reps           ] }
-        { \ (simd-sum)          [ %horizontal-add-vector-reps ] }
-        { \ (simd-vabs)         [ %abs-vector-reps            ] }
-        { \ (simd-vbitand)      [ %and-vector-reps            ] }
-        { \ (simd-vbitandn)     [ %andn-vector-reps           ] }
-        { \ (simd-vbitor)       [ %or-vector-reps             ] }
-        { \ (simd-vbitxor)      [ %xor-vector-reps            ] }
-        { \ (simd-vbitnot)      [ %not-vector-reps            ] }
-        { \ (simd-vand)         [ %and-vector-reps            ] }
-        { \ (simd-vandn)        [ %andn-vector-reps           ] }
-        { \ (simd-vor)          [ %or-vector-reps             ] }
-        { \ (simd-vxor)         [ %xor-vector-reps            ] }
-        { \ (simd-vnot)         [ %not-vector-reps            ] }
-        { \ (simd-vlshift)      [ %shl-vector-reps            ] }
-        { \ (simd-vrshift)      [ %shr-vector-reps            ] }
-        { \ (simd-hlshift)      [ %horizontal-shl-vector-reps ] }
-        { \ (simd-hrshift)      [ %horizontal-shr-vector-reps ] }
-        { \ (simd-vshuffle)     [ %shuffle-vector-reps        ] }
-        { \ (simd-vmerge-head)  [ %merge-vector-reps          ] }
-        { \ (simd-vmerge-tail)  [ %merge-vector-reps          ] }
-        { \ (simd-v<=)          [ cc<= %compare-vector-reps   ] }
-        { \ (simd-v<)           [ cc< %compare-vector-reps    ] }
-        { \ (simd-v=)           [ cc= %compare-vector-reps    ] }
-        { \ (simd-v>)           [ cc> %compare-vector-reps    ] }
-        { \ (simd-v>=)          [ cc>= %compare-vector-reps   ] }
-        { \ (simd-vunordered?)  [ cc/<>= %compare-vector-reps ] }
-        { \ (simd-gather-2)     [ %gather-vector-2-reps       ] }
-        { \ (simd-gather-4)     [ %gather-vector-4-reps       ] }
-        { \ (simd-vany?)        [ %test-vector-reps           ] }
-        { \ (simd-vall?)        [ %test-vector-reps           ] }
-        { \ (simd-vnone?)       [ %test-vector-reps           ] }
+        { \ (simd-v+)            [ %add-vector-reps            ] }
+        { \ (simd-vs+)           [ %saturated-add-vector-reps  ] }
+        { \ (simd-v+-)           [ %add-sub-vector-reps        ] }
+        { \ (simd-v-)            [ %sub-vector-reps            ] }
+        { \ (simd-vs-)           [ %saturated-sub-vector-reps  ] }
+        { \ (simd-v*)            [ %mul-vector-reps            ] }
+        { \ (simd-vs*)           [ %saturated-mul-vector-reps  ] }
+        { \ (simd-v/)            [ %div-vector-reps            ] }
+        { \ (simd-vmin)          [ %min-vector-reps            ] }
+        { \ (simd-vmax)          [ %max-vector-reps            ] }
+        { \ (simd-v.)            [ %dot-vector-reps            ] }
+        { \ (simd-vsqrt)         [ %sqrt-vector-reps           ] }
+        { \ (simd-sum)           [ %horizontal-add-vector-reps ] }
+        { \ (simd-vabs)          [ %abs-vector-reps            ] }
+        { \ (simd-vbitand)       [ %and-vector-reps            ] }
+        { \ (simd-vbitandn)      [ %andn-vector-reps           ] }
+        { \ (simd-vbitor)        [ %or-vector-reps             ] }
+        { \ (simd-vbitxor)       [ %xor-vector-reps            ] }
+        { \ (simd-vbitnot)       [ %xor-vector-reps            ] }
+        { \ (simd-vand)          [ %and-vector-reps            ] }
+        { \ (simd-vandn)         [ %andn-vector-reps           ] }
+        { \ (simd-vor)           [ %or-vector-reps             ] }
+        { \ (simd-vxor)          [ %xor-vector-reps            ] }
+        { \ (simd-vnot)          [ %xor-vector-reps            ] }
+        { \ (simd-vlshift)       [ %shl-vector-reps            ] }
+        { \ (simd-vrshift)       [ %shr-vector-reps            ] }
+        { \ (simd-hlshift)       [ %horizontal-shl-vector-reps ] }
+        { \ (simd-hrshift)       [ %horizontal-shr-vector-reps ] }
+        { \ (simd-vshuffle)      [ %shuffle-vector-reps        ] }
+        { \ (simd-(vmerge-head)) [ %merge-vector-reps          ] }
+        { \ (simd-(vmerge-tail)) [ %merge-vector-reps          ] }
+        { \ (simd-(v>float))        [ %integer>float-vector-reps ] }
+        { \ (simd-(v>integer))      [ %float>integer-vector-reps ] }
+        { \ (simd-(vpack-signed))   [ %signed-pack-vector-reps   ] }
+        { \ (simd-(vpack-unsigned)) [ %unsigned-pack-vector-reps ] }
+        { \ (simd-(vunpack-head))   [ (%unpack-reps)             ] }
+        { \ (simd-(vunpack-tail))   [ (%unpack-reps)             ] }
+        { \ (simd-v<=)           [ cc<= %compare-vector-reps   ] }
+        { \ (simd-v<)            [ cc< %compare-vector-reps    ] }
+        { \ (simd-v=)            [ cc= %compare-vector-reps    ] }
+        { \ (simd-v>)            [ cc> %compare-vector-reps    ] }
+        { \ (simd-v>=)           [ cc>= %compare-vector-reps   ] }
+        { \ (simd-vunordered?)   [ cc/<>= %compare-vector-reps ] }
+        { \ (simd-gather-2)      [ %gather-vector-2-reps       ] }
+        { \ (simd-gather-4)      [ %gather-vector-4-reps       ] }
+        { \ (simd-vany?)         [ %test-vector-reps           ] }
+        { \ (simd-vall?)         [ %test-vector-reps           ] }
+        { \ (simd-vnone?)        [ %test-vector-reps           ] }
    } case member? ;
--- a/basis/math/vectors/simd/simd-tests.factor
+++ b/basis/math/vectors/simd/simd-tests.factor
@ -4,6 +4,7 @@ math.private math.vectors math.vectors.simd
 math.vectors.simd.private prettyprint random sequences system
 tools.test vocabs assocs compiler.cfg.debugger words
 locals math.vectors.specialization combinators cpu.architecture
+math.vectors.conversion.backend
 math.vectors.simd.intrinsics namespaces byte-arrays alien
 specialized-arrays classes.struct eval classes.algebra sets
 quotations math.constants compiler.units ;
@ -128,6 +129,8 @@ CONSTANT: simd-classes

 [ HEX: ffffffff ] [ HEX: ffffffff [ uint-4-with ] compile-call first ] unit-test

+[ HEX: ffffffff ] [ [ HEX: ffffffff uint-4-with ] compile-call first ] unit-test
+
 "== Checking -boa constructors" print

 [ { } ] [
@ -181,6 +184,9 @@ CONSTANT: simd-classes
    {
        hlshift hrshift vshuffle vbroadcast
        vany? vall? vnone?
+        (v>float) (v>integer)
+        (vpack-signed) (vpack-unsigned)
+        (vunpack-head) (vunpack-tail)
    } unique assoc-diff ;

 : ops-to-check ( elt-class -- alist )
--- a/basis/math/vectors/specialization/specialization.factor
+++ b/basis/math/vectors/specialization/specialization.factor
@ -2,6 +2,7 @@
 ! See http://factorcode.org/license.txt for BSD license.
 USING: words kernel make sequences effects sets kernel.private
 accessors combinators math math.intervals math.vectors
+math.vectors.conversion.backend
 namespaces assocs fry splitting classes.algebra generalizations
 locals compiler.tree.propagation.info ;
 IN: math.vectors.specialization
@ -98,8 +99,14 @@ H{
    { hrshift { +vector+ +literal+ -> +vector+ } }
    { vshuffle { +vector+ +literal+ -> +vector+ } }
    { vbroadcast { +vector+ +literal+ -> +vector+ } }
-    { vmerge-head { +vector+ +vector+ -> +vector+ } }
-    { vmerge-tail { +vector+ +vector+ -> +vector+ } }
+    { (vmerge-head) { +vector+ +vector+ -> +vector+ } }
+    { (vmerge-tail) { +vector+ +vector+ -> +vector+ } }
+    { (v>float) { +vector+ +literal+ -> +vector+ } }
+    { (v>integer) { +vector+ +literal+ -> +vector+ } }
+    { (vpack-signed) { +vector+ +vector+ +literal+ -> +vector+ } }
+    { (vpack-unsigned) { +vector+ +vector+ +literal+ -> +vector+ } }
+    { (vunpack-head) { +vector+ +literal+ -> +vector+ } }
+    { (vunpack-tail) { +vector+ +literal+ -> +vector+ } }
    { v<= { +vector+ +vector+ -> +vector+ } }
    { v< { +vector+ +vector+ -> +vector+ } }
    { v= { +vector+ +vector+ -> +vector+ } }
@ -152,8 +159,13 @@ ERROR: bad-vector-word word ;
        { [ dup complex class<= ] [ vector-words keys { vsqrt } diff ] }
        [ { } ]
    } cond
-    ! Don't specialize horizontal shifts or shuffles at all, they're only for SIMD
-    { hlshift hrshift vshuffle vbroadcast } diff
+    ! Don't specialize horizontal shifts, shuffles, and conversions at all, they're only for SIMD
+    {
+        hlshift hrshift vshuffle vbroadcast
+        (v>integer) (v>float)
+        (vpack-signed) (vpack-unsigned)
+        (vunpack-head) (vunpack-tail)
+    } diff
    nip ;

 :: specialize-vector-words ( array-type elt-type simd -- )
--- a/basis/math/vectors/vectors-docs.factor
+++ b/basis/math/vectors/vectors-docs.factor
@ -59,7 +59,8 @@ $nl
 { $subsection vbroadcast }
 { $subsection hlshift } 
 { $subsection hrshift }
-{ $subsection vmerge } ;
+{ $subsection vmerge }
+{ $subsection (vmerge) } ;

 ARTICLE: "math-vectors-logic" "Vector component- and bit-wise logic"
 { $notes
@ -357,37 +358,50 @@ HELP: hrshift
 { $description "Shifts the entire SIMD array to the right by " { $snippet "n" } " bytes, filling the vacated left-hand bits with zeroes. This word may only be used in a context where the compiler can statically infer that the input is a SIMD array." } ;

 HELP: vmerge
-{ $values { "u" "a sequence" } { "v" "a sequence" } { "h" "a sequence" } { "t" "a sequence" } }
-{ $description "Creates two new sequences of the same type and size as " { $snippet "u" } " and " { $snippet "v" } " by interleaving the elements of " { $snippet "u" } " and " { $snippet "v" } "." }
+{ $values { "u" "a sequence" } { "v" "a sequence" } { "w" "a sequence" } }
+{ $description "Creates a new sequence of the same type as and twice the length of " { $snippet "u" } " and " { $snippet "v" } " by interleaving the elements of " { $snippet "u" } " and " { $snippet "v" } "." }
 { $examples
 { $example """USING: kernel math.vectors prettyprint ;

-{ "A" "B" "C" "D" } { "1" "2" "3" "4" } vmerge [ . ] bi@"""
+{ "A" "B" "C" "D" } { "1" "2" "3" "4" } vmerge ."""
+"""{ "A" "1" "B" "2" "C" "3" "D" "4" }"""
+} } ;
+
+HELP: (vmerge)
+{ $values { "u" "a sequence" } { "v" "a sequence" } { "h" "a sequence" } { "t" "a sequence" } }
+{ $description "Creates two new sequences of the same type and size as " { $snippet "u" } " and " { $snippet "v" } " by interleaving the elements of " { $snippet "u" } " and " { $snippet "v" } "." }
+{ $notes "For hardware-supported SIMD vector types this word compiles to a single instruction per output value." }
+{ $examples
+{ $example """USING: kernel math.vectors prettyprint ;
+
+{ "A" "B" "C" "D" } { "1" "2" "3" "4" } (vmerge) [ . ] bi@"""
 """{ "A" "1" "B" "2" }
 { "C" "3" "D" "4" }"""
 } } ;

-HELP: vmerge-head
+HELP: (vmerge-head)
 { $values { "u" "a sequence" } { "v" "a sequence" } { "h" "a sequence" } }
-{ $description "Creates two new sequences of the same type and size as " { $snippet "u" } " and " { $snippet "v" } " by interleaving the elements from the first half of " { $snippet "u" } " and " { $snippet "v" } "." }
+{ $description "Creates a new sequence of the same type and size as " { $snippet "u" } " and " { $snippet "v" } " by interleaving the elements from the first half of " { $snippet "u" } " and " { $snippet "v" } "." }
+{ $notes "For hardware-supported SIMD vector types this word compiles to a single instruction." }
 { $examples
 { $example """USING: kernel math.vectors prettyprint ;

-{ "A" "B" "C" "D" } { "1" "2" "3" "4" } vmerge-head ."""
+{ "A" "B" "C" "D" } { "1" "2" "3" "4" } (vmerge-head) ."""
 """{ "A" "1" "B" "2" }"""
 } } ;

-HELP: vmerge-tail
+HELP: (vmerge-tail)
 { $values { "u" "a sequence" } { "v" "a sequence" } { "t" "a sequence" } }
-{ $description "Creates two new sequences of the same type and size as " { $snippet "u" } " and " { $snippet "v" } " by interleaving the elements from the tail half of " { $snippet "u" } " and " { $snippet "v" } "." }
+{ $description "Creates a new sequence of the same type and size as " { $snippet "u" } " and " { $snippet "v" } " by interleaving the elements from the tail half of " { $snippet "u" } " and " { $snippet "v" } "." }
+{ $notes "For hardware-supported SIMD vector types this word compiles to a single instruction." }
 { $examples
 { $example """USING: kernel math.vectors prettyprint ;

-{ "A" "B" "C" "D" } { "1" "2" "3" "4" } vmerge-tail ."""
+{ "A" "B" "C" "D" } { "1" "2" "3" "4" } (vmerge-tail) ."""
 """{ "C" "3" "D" "4" }"""
 } } ;

-{ vmerge vmerge-head vmerge-tail } related-words
+{ vmerge (vmerge) (vmerge-head) (vmerge-tail) } related-words

 HELP: vbroadcast
 { $values { "u" "a SIMD array" } { "n" "a non-negative integer" } { "v" "a SIMD array" } }
--- a/basis/math/vectors/vectors.factor
+++ b/basis/math/vectors/vectors.factor
@ -91,10 +91,15 @@ PRIVATE>
 : hlshift ( u n -- w ) '[ _ <byte-array> prepend 16 head ] change-underlying ;
 : hrshift ( u n -- w ) '[ _ <byte-array> append 16 tail* ] change-underlying ;

-: vmerge-head ( u v -- h ) over length 2 / '[ _ head-slice ] bi@ [ zip ] keep concat-as ;
-: vmerge-tail ( u v -- t ) over length 2 / '[ _ tail-slice ] bi@ [ zip ] keep concat-as ;
+: (vmerge-head) ( u v -- h )
+    over length 2 /i '[ _ head-slice ] bi@ [ zip ] keep concat-as ;
+: (vmerge-tail) ( u v -- t )
+    over length 2 /i '[ _ tail-slice ] bi@ [ zip ] keep concat-as ;

-: vmerge ( u v -- h t ) [ vmerge-head ] [ vmerge-tail ] 2bi ; inline
+: (vmerge) ( u v -- h t )
+    [ (vmerge-head) ] [ (vmerge-tail) ] 2bi ; inline
+
+: vmerge ( u v -- w ) [ zip ] keep concat-as ;

 : vand ( u v -- w )  over '[ [ _ element>bool ] bi@ and ] 2map ;
 : vandn ( u v -- w ) over '[ [ _ element>bool ] bi@ [ not ] dip and ] 2map ;
--- a/basis/random/mersenne-twister/mersenne-twister.factor
+++ b/basis/random/mersenne-twister/mersenne-twister.factor
@ -80,3 +80,4 @@ M: mersenne-twister random-32* ( mt -- r )
 [
    default-mersenne-twister random-generator set-global
 ] "bootstrap.random" add-init-hook
+
--- a/basis/random/sfmt/sfmt-tests.factor
+++ b/basis/random/sfmt/sfmt-tests.factor
@ -4,14 +4,27 @@ USING: accessors kernel random random.sfmt random.sfmt.private
 sequences tools.test ;
 IN: random.sfmt.tests

-[ ] [ 100 <sfmt-19937> drop ] unit-test
+! Period certified by virtue of seed
+[ ] [ 5 <sfmt-19937> drop ] unit-test

-[ 1096298955 ]
-[ 100 <sfmt-19937> dup generate dup generate uint-array>> first ] unit-test
+[ 1331696015 ]
+[ 5 <sfmt-19937> dup generate dup generate uint-array>> first ] unit-test

-[ 2556114782 ]
-[ 100 <sfmt-19937> random-32* ] unit-test
+[ 1432875926 ]
+[ 5 <sfmt-19937> random-32* ] unit-test

+
+! Period certified by flipping a bit
+[ ] [ 7 <sfmt-19937> drop ] unit-test
+
+[ 1674111379 ]
+[ 7 <sfmt-19937> dup generate dup generate uint-array>> first ] unit-test
+
+[ 489955657 ]
+[ 7 <sfmt-19937> random-32* ] unit-test
+
+
+! Test re-seeding SFMT
 [ t ]
 [
    100 <sfmt-19937>
--- a/basis/random/sfmt/sfmt.factor
+++ b/basis/random/sfmt/sfmt.factor
@ -2,7 +2,8 @@
 ! See http://factorcode.org/license.txt for BSD license.
 USING: accessors alien.c-types kernel locals math math.ranges
 math.bitwise math.vectors math.vectors.simd random
-sequences specialized-arrays sequences.private classes.struct ;
+sequences specialized-arrays sequences.private classes.struct
+combinators.short-circuit fry ;
 SIMD: uint
 SPECIALIZED-ARRAY: uint
 SPECIALIZED-ARRAY: uint-4
@ -16,8 +17,9 @@ STRUCT: sfmt-state
    { seed uint }
    { n uint }
    { m uint }
-    { ix uint }
+    { index uint }
    { mask uint-4 }
+    { parity uint-4 }
    { r1 uint-4 }
    { r2 uint-4 } ;

@ -50,12 +52,12 @@ M:: sfmt generate ( sfmt -- )
    sfmt uint-4-array>> :> array
    state n>> 2 - array nth state (>>r1)
    state n>> 1 - array nth state (>>r2)
-    state m>> :> m 
-    state n>> :> n 
+    state m>> :> m
+    state n>> :> n
    state mask>> :> mask

    n m - >fixnum iota [| i |
-        i array nth-unsafe 
+        i array nth-unsafe
        i m + array nth-unsafe
        mask state r1>> state r2>> formula :> r

@ -75,48 +77,66 @@ M:: sfmt generate ( sfmt -- )
        state r2>> state (>>r1)
        r state (>>r2)
    ] each
-    
-    0 state (>>ix) ;
+
+    0 state (>>index) ;
+
+: period-certified? ( sfmt -- ? )
+    [ uint-4-array>> first ]
+    [ state>> parity>> ] bi vbitand odd-parity? ;
+
+: first-set-bit ( x -- n )
+    0 swap [
+        dup { [ 0 > ] [ 1 bitand 0 = ] } 1&&
+    ] [
+        [ 1 + ] [ -1 shift ] bi*
+    ] while drop ;
+
+: correct-period ( sfmt -- )
+    [ drop 0 ]
+    [ state>> parity>> first first-set-bit ]
+    [ uint-array>> swap '[ _ toggle-bit ] change-nth ] tri ;
+
+: certify-period ( sfmt -- sfmt )
+    dup period-certified? [ dup correct-period ] unless ;

 : <sfmt-array> ( sfmt -- uint-array uint-4-array )
-    state>> 
-    [ n>> 4 * iota >uint-array ] [ seed>> ] bi
+    state>>
+    [ n>> 4 * 1 swap [a,b] >uint-array ] [ seed>> ] bi
    [
        [
-            [
-                [ -30 shift ] [ ] bi bitxor
-                state-multiplier * 32 bits
-            ] dip +
-        ] unless-zero 32 bits
+            [ -30 shift ] [ ] bi bitxor
+            state-multiplier * 32 bits
+        ] dip + 32 bits
    ] uint-array{ } accumulate-as nip
    dup underlying>> byte-array>uint-4-array ;

-: <sfmt-state> ( seed n m mask -- sfmt )
+: <sfmt-state> ( seed n m mask parity -- sfmt )
    sfmt-state <struct>
+        swap >>parity
        swap >>mask
        swap >>m
        swap >>n
        swap >>seed
-        0 >>ix ;
+        0 >>index ;

 : init-sfmt ( sfmt -- sfmt' )
    dup <sfmt-array> [ >>uint-array ] [ >>uint-4-array ] bi*
-    [ generate ] keep ; inline
+    certify-period [ generate ] keep ; inline

-: <sfmt> ( seed n m mask -- sfmt )
+: <sfmt> ( seed n m mask parity -- sfmt )
    <sfmt-state>
    sfmt new
        swap >>state
        init-sfmt ; inline

 : refill-sfmt? ( sfmt -- ? )
-    state>> [ ix>> ] [ n>> 4 * ] bi >= ;
+    state>> [ index>> ] [ n>> 4 * ] bi >= ; inline

-: next-ix ( sfmt -- ix )
-    state>> [ dup 1 + ] change-ix drop ; inline
+: next-index ( sfmt -- index )
+    state>> [ dup 1 + ] change-index drop ; inline

 : next ( sfmt -- n )
-    [ next-ix ] [ uint-array>> ] bi nth-unsafe ; inline
+    [ next-index ] [ uint-array>> ] bi nth-unsafe ; inline

 PRIVATE>

@ -128,5 +148,10 @@ M: sfmt seed-random ( sfmt seed -- sfmt )
    [ drop init-sfmt ] 2bi ;

 : <sfmt-19937> ( seed -- sfmt )
-    348 330 uint-4{ HEX: BFFFFFF6 HEX: BFFAFFFF HEX: DDFECB7F HEX: DFFFFFEF }
+    156 122
+    uint-4{ HEX: dfffffef HEX: ddfecb7f HEX: bffaffff HEX: bffffff6 }
+    uint-4{ HEX: 1 HEX: 0 HEX: 0 HEX: 13c9e684 }
    <sfmt> ; inline
+
+: default-sfmt ( -- sfmt )
+    [ random-32 ] with-secure-random <sfmt-19937> ;
--- a/basis/ui/gadgets/buttons/buttons.factor
+++ b/basis/ui/gadgets/buttons/buttons.factor
@ -69,8 +69,8 @@ C: <button-pen> button-pen

 : button-pen ( button pen -- button pen )
    over find-button {
-        { [ dup { [ pressed?>> ] [ selected?>> ] } 1&& ]
-            [ drop pressed-selected>>
+        { [ dup { [ pressed?>> ] [ selected?>> ] } 1&& ] [
+            drop pressed-selected>>
        ] }
        { [ dup pressed?>> ] [ drop pressed>> ] }
        { [ dup selected?>> ] [ drop selected>> ] }
--- a/basis/ui/gadgets/editors/editors.factor
+++ b/basis/ui/gadgets/editors/editors.factor
@ -112,7 +112,7 @@ M: editor ungraft*
    } cond ;

 : clicked-loc ( editor -- loc )
-    [ hand-rel ] [ point>loc ] bi ;
+    [ hand-rel ] keep point>loc ;

 : click-loc ( editor model -- )
    [ clicked-loc ] dip set-model ;
@ -130,7 +130,7 @@ M: editor ungraft*
    [ loc>x ] [ [ first ] dip line>y ceiling ] 2bi 2array ;

 : caret-loc ( editor -- loc )
-    [ editor-caret ] [ loc>point ] bi ;
+    [ editor-caret ] keep loc>point ;

 : caret-dim ( editor -- dim )
    [ 0 ] dip line-height 2array ;
@ -139,7 +139,7 @@ M: editor ungraft*
    dup graft-state>> second [
        [
            [ caret-loc ] [ caret-dim { 2 1 } v+ ] bi <rect>
-        ] [ scroll>rect ] bi
+        ] keep scroll>rect
    ] [ drop ] if ;

 : draw-caret? ( editor -- ? )
@ -212,7 +212,7 @@ M: editor cap-height font>> font-metrics cap-height>> ;
    [ nip relayout ] 2tri ;

 : caret/mark-changed ( editor -- )
-    [ restart-blinking ] [ scroll>caret ] bi ;
+    [ restart-blinking ] keep scroll>caret ;

 M: editor model-changed
    {
--- a/basis/ui/gadgets/gadgets.factor
+++ b/basis/ui/gadgets/gadgets.factor
@ -115,7 +115,7 @@ M: gadget gadget-text-separator
    gadget-text-separator '[ _ % ] [ gadget-text* ] interleave ;

 M: gadget gadget-text*
-    [ children>> ] [ gadget-seq-text ] bi ;
+    [ children>> ] keep gadget-seq-text ;

 M: array gadget-text*
    [ gadget-text* ] each ;
@ -183,7 +183,7 @@ GENERIC: pref-dim* ( gadget -- dim )

 : pref-dim ( gadget -- dim )
    dup pref-dim>> [ ] [
-        [ pref-dim* ] [ dup layout-state>> ] bi
+        [ pref-dim* ] [ ] [ layout-state>> ] tri
        [ drop ] [ dupd (>>pref-dim) ] if
    ] ?if ;

@ -388,7 +388,7 @@ M: gadget request-focus-on parent>> request-focus-on ;
 M: f request-focus-on 2drop ;

 : request-focus ( gadget -- )
-    [ focusable-child ] [ request-focus-on ] bi ;
+    [ focusable-child ] keep request-focus-on ;

 : focus-path ( gadget -- seq )
    [ focus>> ] follow ;
--- a/core/layouts/layouts.factor
+++ b/core/layouts/layouts.factor
@ -27,6 +27,9 @@ SYMBOL: mega-cache-size
 : tag-fixnum ( n -- tagged )
    tag-bits get shift ;

+: untag-fixnum ( n -- tagged )
+    tag-bits get neg shift ;
+
 ! We do this in its own compilation unit so that they can be
 ! folded below
 <<
--- a/extra/benchmark/mt/authors.txt
+++ b/extra/benchmark/mt/authors.txt
@ -0,0 +1 @@
+Doug Coleman
--- a/extra/benchmark/mt/mt.factor
+++ b/extra/benchmark/mt/mt.factor
@ -0,0 +1,11 @@
+! Copyright (C) 2009 Doug Coleman.
+! See http://factorcode.org/license.txt for BSD license.
+USING: fry kernel math random random.mersenne-twister ;
+IN: benchmark.mt
+
+: mt-benchmark ( n -- )
+    >fixnum HEX: 533d <mersenne-twister> '[ _ random-32* drop ] times ;
+
+: mt-main ( -- ) 10000000 mt-benchmark ;
+
+MAIN: mt-main
--- a/extra/benchmark/sfmt/sfmt.factor
+++ b/extra/benchmark/sfmt/sfmt.factor
@ -6,6 +6,6 @@ IN: benchmark.sfmt
 : sfmt-benchmark ( n -- )
    >fixnum HEX: 533d <sfmt-19937> '[ _ random-32* drop ] times ;

-: sfmt-main ( -- ) 100000000 sfmt-benchmark ;
+: sfmt-main ( -- ) 10000000 sfmt-benchmark ;

 MAIN: sfmt-main
--- a/extra/crypto/xor/xor.factor
+++ b/extra/crypto/xor/xor.factor
@ -9,4 +9,4 @@ ERROR: empty-xor-key ;

 : xor-crypt ( seq key -- seq' )
    [ empty-xor-key ] when-empty
-    [ dup length ] dip '[ _ mod-nth bitxor ] 2map ;
+    [ dup length iota ] dip '[ _ mod-nth bitxor ] 2map ;
--- a/extra/math/matrices/simd/simd.factor
+++ b/extra/math/matrices/simd/simd.factor
@ -120,7 +120,7 @@ TYPED:: m4^n ( m: matrix4 n: fixnum -- m^n: matrix4 )
    identity-matrix4 n [ m m4. ] times ;

 : vmerge-diagonal* ( x y -- h t )
-    [ vmerge-head ] [ swap vmerge-tail ] 2bi ; inline
+    [ (vmerge-head) ] [ swap (vmerge-tail) ] 2bi ; inline
 : vmerge-diagonal ( x -- h t )
    0.0 float-4-with vmerge-diagonal* ; inline

@ -128,7 +128,7 @@ TYPED: diagonal-matrix4 ( diagonal: float-4 -- matrix: matrix4 )
    [ vmerge-diagonal [ vmerge-diagonal ] bi@ ] make-matrix4 ;

 : vmerge-transpose ( a b c d -- a' b' c' d' )
-    [ vmerge ] bi-curry@ bi* ; inline
+    [ (vmerge) ] bi-curry@ bi* ; inline

 TYPED: transpose-matrix4 ( matrix: matrix4 -- matrix: matrix4 )
    [ rows vmerge-transpose vmerge-transpose ] make-matrix4 ;
@ -144,8 +144,8 @@ TYPED:: translation-matrix4 ( offset: float-4 -- matrix: matrix4 )
    [
        float-4{ 1.0 1.0 1.0 1.0 } :> diagonal

-        offset 0 float-4-with vmerge
-        [ 0 float-4-with swap vmerge ] bi@ drop :> z :> y :> x
+        offset 0 float-4-with (vmerge)
+        [ 0 float-4-with swap (vmerge) ] bi@ drop :> z :> y :> x

        diagonal y vmerge-diagonal*
        [ x vmerge-diagonal* ]
@ -194,7 +194,7 @@ TYPED:: frustum-matrix4 ( xy: float-4 near: float far: float -- matrix: matrix4
        float-4{ t t f f } xy near far - float-4-with v? ! denom
        v/ :> fov
        
-        fov 0.0 float-4-with vmerge-head vmerge-diagonal
+        fov 0.0 float-4-with (vmerge-head) vmerge-diagonal
        fov float-4{ f f t t } vand
        float-4{ 0.0 0.0 -1.0 0.0 }
    ] make-matrix4 ;
--- a/extra/random/cmwc/authors.txt
+++ b/extra/random/cmwc/authors.txt
@ -0,0 +1 @@
+Doug Coleman
--- a/extra/random/cmwc/cmwc-tests.factor
+++ b/extra/random/cmwc/cmwc-tests.factor
@ -0,0 +1,42 @@
+! Copyright (C) 2009 Doug Coleman.
+! See http://factorcode.org/license.txt for BSD license.
+USING: arrays kernel random random.cmwc sequences tools.test ;
+IN: random.cmwc.tests
+
+[ ] [
+    cmwc-4096 [
+        random-32 drop
+    ] with-random
+] unit-test
+
+[
+{
+    4294604858
+    4294948512
+    4294929730
+    4294910948
+    4294892166
+    4294873384
+    4294854602
+    4294835820
+    4294817038
+    4294798256
+}
+] [
+    cmwc-4096
+    4096 iota >array 362436 <cmwc-seed> seed-random [
+        10 [ random-32 ] replicate
+    ] with-random
+] unit-test
+
+[ t ] [
+    cmwc-4096 [
+        4096 iota >array 362436 <cmwc-seed> seed-random [
+            10 [ random-32 ] replicate
+        ] with-random
+    ] [
+        4096 iota >array 362436 <cmwc-seed> seed-random [
+            10 [ random-32 ] replicate
+        ] with-random
+    ] bi =
+] unit-test
--- a/extra/random/cmwc/cmwc.factor
+++ b/extra/random/cmwc/cmwc.factor
@ -0,0 +1,54 @@
+! Copyright (C) 2009 Doug Coleman.
+! See http://factorcode.org/license.txt for BSD license.
+USING: accessors arrays fry kernel locals math math.bitwise
+random sequences ;
+IN: random.cmwc
+
+! Multiply-with-carry RNG
+
+TUPLE: cmwc Q a b c i r mod ;
+
+TUPLE: cmwc-seed Q c ;
+
+: <cmwc> ( length a b c -- cmwc )
+    cmwc new
+        swap >>c
+        swap >>b
+        swap >>a
+        swap [ 1 - >>i ] [ 0 <array> >>Q ] bi
+        dup b>> 1 - >>r
+        dup Q>> length 1 - >>mod ;
+
+: <cmwc-seed> ( Q c -- cmwc-seed )
+    cmwc-seed new
+        swap >>c
+        swap >>Q ; inline
+
+M: cmwc seed-random
+    [ Q>> >>Q ]
+    [ Q>> length 1 - >>i ]
+    [ c>> >>c ] tri ;
+
+M:: cmwc random-32* ( cmwc -- n )
+    cmwc dup mod>> '[ 1 + _ bitand ] change-i
+    [ a>> ]
+    [ [ i>> ] [ Q>> ] bi nth * ]
+    [ c>> + ] tri :> t!
+
+    t -32 shift cmwc (>>c)
+
+    t cmwc [ b>> bitand ] [ c>> + ] bi 64 bits t!
+    t cmwc r>> > [
+        cmwc [ 1 + ] change-c drop
+        t cmwc b>> - 64 bits t!
+    ] when
+
+    cmwc [ r>> t - 32 bits dup ] [ i>> ] [ Q>> ] tri set-nth ;
+
+: cmwc-4096 ( -- cmwc )
+    4096
+    [ 18782 4294967295 362436 <cmwc> ]
+    [
+        '[ [ random-32 ] replicate ] with-system-random
+        362436 <cmwc-seed> seed-random
+    ] bi ;
--- a/extra/random/lagged-fibonacci/authors.txt
+++ b/extra/random/lagged-fibonacci/authors.txt
@ -0,0 +1 @@
+Doug Coleman
--- a/extra/random/lagged-fibonacci/lagged-fibonacci-tests.factor
+++ b/extra/random/lagged-fibonacci/lagged-fibonacci-tests.factor
@ -0,0 +1,24 @@
+! Copyright (C) 2009 Doug Coleman.
+! See http://factorcode.org/license.txt for BSD license.
+USING: fry kernel math.functions random random.lagged-fibonacci
+sequences specialized-arrays.instances.double tools.test ;
+IN: random.lagged-fibonacci.tests
+
+[ t ] [
+    3 <lagged-fibonacci> [
+        1000 [ random-float ] double-array{ } replicate-as
+        999 swap nth 0.860072135925293 -.01 ~
+    ] with-random
+] unit-test
+
+[ t ] [
+    3 <lagged-fibonacci> [
+        [
+            1000 [ random-float ] double-array{ } replicate-as
+        ] with-random
+    ] [
+        3 seed-random [
+            1000 [ random-float ] double-array{ } replicate-as
+        ] with-random =
+    ] bi
+] unit-test
--- a/extra/random/lagged-fibonacci/lagged-fibonacci.factor
+++ b/extra/random/lagged-fibonacci/lagged-fibonacci.factor
@ -0,0 +1,71 @@
+! Copyright (C) 2009 Doug Coleman.
+! See http://factorcode.org/license.txt for BSD license.
+USING: accessors alien.c-types fry kernel literals locals math
+random sequences specialized-arrays namespaces ;
+SPECIALIZED-ARRAY: double
+IN: random.lagged-fibonacci
+
+TUPLE: lagged-fibonacci u pt0 pt1 ;
+
+<PRIVATE
+
+CONSTANT: p-r 1278
+CONSTANT: q-r 417
+
+CONSTANT: lagged-fibonacci 899999963
+CONSTANT: lagged-fibonacci-max-seed 900000000
+CONSTANT: lagged-fibonacci-sig-bits 24
+
+: normalize-seed ( seed -- seed' )
+    abs lagged-fibonacci-max-seed mod ;
+
+: adjust-ptr ( ptr -- ptr' )
+    1 - dup 0 < [ drop p-r ] when ;
+
+PRIVATE>
+
+M:: lagged-fibonacci seed-random ( lagged-fibonacci seed! -- lagged-fibonacci )
+    seed normalize-seed seed!
+    seed 30082 /i :> ij
+    seed 30082 ij * - :> kl
+    ij 177 /i 177 mod 2 + :> i!
+    ij 177 mod 2 + :> j!
+    kl 169 /i 178 mod 1 + :> k!
+    kl 169 mod :> l!
+
+    lagged-fibonacci u>> [
+        drop
+        0.0 :> s!
+        0.5 :> t!
+        0.0 :> m!
+        lagged-fibonacci-sig-bits [
+            i j * 179 mod k * 179 mod m!
+            j i!
+            k j!
+            m k!
+            53 l * 1 + 169 mod l!
+            l m * 64 mod 31 > [ s t + s! ] when
+            t 0.5 * t!
+        ] times
+        s
+    ] change-each
+    lagged-fibonacci p-r >>pt0
+        q-r >>pt1 ;
+
+: <lagged-fibonacci> ( seed -- lagged-fibonacci )
+    lagged-fibonacci new
+        p-r 1 + <double-array> >>u
+        swap seed-random ;
+
+GENERIC: random-float* ( tuple -- r )
+ 
+: random-float ( -- n ) random-generator get random-float* ;
+
+M:: lagged-fibonacci random-float* ( lagged-fibonacci -- x )
+    lagged-fibonacci [ pt0>> ] [ u>> ] bi nth
+    lagged-fibonacci [ pt1>> ] [ u>> ] bi nth - :> uni!
+    uni 0.0 < [ uni 1.0 + uni! ] when
+    uni lagged-fibonacci [ pt0>> ] [ u>> ] bi set-nth
+    lagged-fibonacci [ adjust-ptr ] change-pt0 drop
+    lagged-fibonacci [ adjust-ptr ] change-pt1 drop
+    uni ; inline
--- a/vm/code_block.cpp
+++ b/vm/code_block.cpp
@ -533,9 +533,13 @@ code_block *factor_vm::add_code_block(cell type, cell code_, cell labels_, cell
 	compiled->type = type;
 	compiled->last_scan = data->nursery();
 	compiled->needs_fixup = true;
-	compiled->relocation = relocation.value();

 	/* slight space optimization */
+	if(relocation.type() == BYTE_ARRAY_TYPE && array_capacity(relocation.untagged()) == 0)
+		compiled->relocation = F;
+	else
+		compiled->relocation = relocation.value();
+
 	if(literals.type() == ARRAY_TYPE && array_capacity(literals.untagged()) == 0)
 		compiled->literals = F;
 	else
--- a/vm/os-genunix.cpp
+++ b/vm/os-genunix.cpp
@ -30,6 +30,7 @@ const char *default_image_path()
 	char *new_path = new char[PATH_MAX + SUFFIX_LEN + 1];
 	memcpy(new_path,path,len + 1);
 	memcpy(new_path + len,SUFFIX,SUFFIX_LEN + 1);
+	free(const_cast<char *>(path));
 	return new_path;
 }

--- a/vm/os-linux.cpp
+++ b/vm/os-linux.cpp
@ -3,7 +3,7 @@
 namespace factor
 {

-/* Snarfed from SBCL linux-so.c. You must delete[] the result yourself. */
+/* Snarfed from SBCL linux-so.c. You must free() the result yourself. */
 const char *vm_executable_path()
 {
 	char *path = new char[PATH_MAX + 1];
@ -17,7 +17,10 @@ const char *vm_executable_path()
 	else
 	{
 		path[size] = '\0';
-		return safe_strdup(path);
+
+		const char *ret = safe_strdup(path);
+		delete[] path;
+		return ret;
 	}
 }

--- a/vm/quotations.cpp
+++ b/vm/quotations.cpp
@ -103,6 +103,28 @@ bool quotation_jit::stack_frame_p()
 	return false;
 }

+bool quotation_jit::trivial_quotation_p(array *elements)
+{
+	return array_capacity(elements) == 1 && tagged<object>(array_nth(elements,0)).type_p(WORD_TYPE);
+}
+
+void quotation_jit::emit_quot(cell quot_)
+{
+	gc_root<quotation> quot(quot_,parent_vm);
+
+	array *elements = parent_vm->untag<array>(quot->array);
+
+	/* If the quotation consists of a single word, compile a direct call
+	to the word. */
+	if(trivial_quotation_p(elements))
+		literal(array_nth(elements,0));
+	else
+	{
+		if(compiling) parent_vm->jit_compile(quot.value(),relocate);
+		literal(quot.value());
+	}
+}
+
 /* Allocates memory */
 void quotation_jit::iterate_quotation()
 {
@ -194,14 +216,8 @@ void quotation_jit::iterate_quotation()
 				if(stack_frame) emit(parent_vm->userenv[JIT_EPILOG]);
 				tail_call = true;

-				if(compiling)
-				{
-					parent_vm->jit_compile(array_nth(elements.untagged(),i),relocate);
-					parent_vm->jit_compile(array_nth(elements.untagged(),i + 1),relocate);
-				}
-
-				literal(array_nth(elements.untagged(),i));
-				literal(array_nth(elements.untagged(),i + 1));
+				emit_quot(array_nth(elements.untagged(),i));
+				emit_quot(array_nth(elements.untagged(),i + 1));
 				emit(parent_vm->userenv[JIT_IF]);

 				i += 2;
@ -209,25 +225,22 @@ void quotation_jit::iterate_quotation()
 			/* dip */
 			else if(fast_dip_p(i,length))
 			{
-				if(compiling)
-					parent_vm->jit_compile(obj.value(),relocate);
-				emit_with(parent_vm->userenv[JIT_DIP],obj.value());
+				emit_quot(obj.value());
+				emit(parent_vm->userenv[JIT_DIP]);
 				i++;
 			}
 			/* 2dip */
 			else if(fast_2dip_p(i,length))
 			{
-				if(compiling)
-					parent_vm->jit_compile(obj.value(),relocate);
-				emit_with(parent_vm->userenv[JIT_2DIP],obj.value());
+				emit_quot(obj.value());
+				emit(parent_vm->userenv[JIT_2DIP]);
 				i++;
 			}
 			/* 3dip */
 			else if(fast_3dip_p(i,length))
 			{
-				if(compiling)
-					parent_vm->jit_compile(obj.value(),relocate);
-				emit_with(parent_vm->userenv[JIT_3DIP],obj.value());
+				emit_quot(obj.value());
+				emit(parent_vm->userenv[JIT_3DIP]);
 				i++;
 			}
 			else
--- a/vm/quotations.hpp
+++ b/vm/quotations.hpp
@ -13,6 +13,8 @@ struct quotation_jit : public jit {

 	void emit_mega_cache_lookup(cell methods, fixnum index, cell cache);
 	bool primitive_call_p(cell i, cell length);
+	bool trivial_quotation_p(array *elements);
+	void emit_quot(cell quot);
 	bool fast_if_p(cell i, cell length);
 	bool fast_dip_p(cell i, cell length);
 	bool fast_2dip_p(cell i, cell length);
				`@ -0,0 +1 @@`
				`Conversion, packing, and unpacking of SIMD vectors`