diff --git a/basis/compiler/cfg/instructions/instructions.factor b/basis/compiler/cfg/instructions/instructions.factor index 6b61cb53cb..57d88a2d86 100644 --- a/basis/compiler/cfg/instructions/instructions.factor +++ b/basis/compiler/cfg/instructions/instructions.factor @@ -280,6 +280,11 @@ def: dst use: src literal: shuffle rep ; +PURE-INSN: ##tail>head-vector +def: dst +use: src +literal: rep ; + PURE-INSN: ##merge-vector-head def: dst use: src1 src2 @@ -290,10 +295,39 @@ def: dst use: src1 src2 literal: rep ; +PURE-INSN: ##signed-pack-vector +def: dst +use: src1 src2 +literal: rep ; + +PURE-INSN: ##unsigned-pack-vector +def: dst +use: src1 src2 +literal: rep ; + +PURE-INSN: ##unpack-vector-head +def: dst +use: src +literal: rep ; + +PURE-INSN: ##unpack-vector-tail +def: dst +use: src +literal: rep ; + +PURE-INSN: ##integer>float-vector +def: dst +use: src +literal: rep ; + +PURE-INSN: ##float>integer-vector +def: dst +use: src +literal: rep ; + PURE-INSN: ##compare-vector def: dst use: src1 src2 -temp: temp literal: rep cc ; PURE-INSN: ##test-vector @@ -781,8 +815,6 @@ UNION: kill-vreg-insn UNION: def-is-use-insn ##box-alien ##box-displaced-alien -##compare-vector -##not-vector ##string-nth ##unbox-any-c-ptr ; diff --git a/basis/compiler/cfg/intrinsics/intrinsics.factor b/basis/compiler/cfg/intrinsics/intrinsics.factor index 124aac5b18..2dcd6d4b45 100644 --- a/basis/compiler/cfg/intrinsics/intrinsics.factor +++ b/basis/compiler/cfg/intrinsics/intrinsics.factor @@ -171,18 +171,18 @@ IN: compiler.cfg.intrinsics { math.vectors.simd.intrinsics:(simd-vbitandn) [ [ ^^andn-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-vbitor) [ [ ^^or-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-vbitxor) [ [ ^^xor-vector ] emit-binary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-vbitnot) [ [ ^^not-vector ] emit-unary-vector-op ] } + { math.vectors.simd.intrinsics:(simd-vbitnot) [ [ generate-not-vector ] emit-unary-vector-op ] } { math.vectors.simd.intrinsics:(simd-vand) [ [ ^^and-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-vandn) [ [ ^^andn-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-vor) [ [ ^^or-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-vxor) [ [ ^^xor-vector ] emit-binary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-vnot) [ [ ^^not-vector ] emit-unary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-v<=) [ [ cc<= ^^compare-vector ] emit-binary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-v<) [ [ cc< ^^compare-vector ] emit-binary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-v=) [ [ cc= ^^compare-vector ] emit-binary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-v>) [ [ cc> ^^compare-vector ] emit-binary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-v>=) [ [ cc>= ^^compare-vector ] emit-binary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-vunordered?) [ [ cc/<>= ^^compare-vector ] emit-binary-vector-op ] } + { math.vectors.simd.intrinsics:(simd-vnot) [ [ generate-not-vector ] emit-unary-vector-op ] } + { math.vectors.simd.intrinsics:(simd-v<=) [ [ cc<= generate-compare-vector ] emit-binary-vector-op ] } + { math.vectors.simd.intrinsics:(simd-v<) [ [ cc< generate-compare-vector ] emit-binary-vector-op ] } + { math.vectors.simd.intrinsics:(simd-v=) [ [ cc= generate-compare-vector ] emit-binary-vector-op ] } + { math.vectors.simd.intrinsics:(simd-v>) [ [ cc> generate-compare-vector ] emit-binary-vector-op ] } + { math.vectors.simd.intrinsics:(simd-v>=) [ [ cc>= generate-compare-vector ] emit-binary-vector-op ] } + { math.vectors.simd.intrinsics:(simd-vunordered?) [ [ cc/<>= generate-compare-vector ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-vany?) [ [ vcc-any ^^test-vector ] emit-unary-vector-op ] } { math.vectors.simd.intrinsics:(simd-vall?) [ [ vcc-all ^^test-vector ] emit-unary-vector-op ] } { math.vectors.simd.intrinsics:(simd-vnone?) [ [ vcc-none ^^test-vector ] emit-unary-vector-op ] } @@ -194,8 +194,14 @@ IN: compiler.cfg.intrinsics { math.vectors.simd.intrinsics:(simd-gather-2) [ emit-gather-vector-2 ] } { math.vectors.simd.intrinsics:(simd-gather-4) [ emit-gather-vector-4 ] } { math.vectors.simd.intrinsics:(simd-vshuffle) [ emit-shuffle-vector ] } - { math.vectors.simd.intrinsics:(simd-vmerge-head) [ [ ^^merge-vector-head ] emit-binary-vector-op ] } - { math.vectors.simd.intrinsics:(simd-vmerge-tail) [ [ ^^merge-vector-tail ] emit-binary-vector-op ] } + { math.vectors.simd.intrinsics:(simd-(vmerge-head)) [ [ ^^merge-vector-head ] emit-binary-vector-op ] } + { math.vectors.simd.intrinsics:(simd-(vmerge-tail)) [ [ ^^merge-vector-tail ] emit-binary-vector-op ] } + { math.vectors.simd.intrinsics:(simd-(v>float)) [ [ ^^integer>float-vector ] emit-unary-vector-op ] } + { math.vectors.simd.intrinsics:(simd-(v>integer)) [ [ ^^float>integer-vector ] emit-unary-vector-op ] } + { math.vectors.simd.intrinsics:(simd-(vpack-signed)) [ [ ^^signed-pack-vector ] emit-binary-vector-op ] } + { math.vectors.simd.intrinsics:(simd-(vpack-unsigned)) [ [ ^^unsigned-pack-vector ] emit-binary-vector-op ] } + { math.vectors.simd.intrinsics:(simd-(vunpack-head)) [ [ generate-unpack-vector-head ] emit-unary-vector-op ] } + { math.vectors.simd.intrinsics:(simd-(vunpack-tail)) [ [ generate-unpack-vector-tail ] emit-unary-vector-op ] } { math.vectors.simd.intrinsics:(simd-select) [ emit-select-vector ] } { math.vectors.simd.intrinsics:(simd-sum) [ [ ^^horizontal-add-vector ] emit-unary-vector-op ] } { math.vectors.simd.intrinsics:alien-vector [ emit-alien-vector ] } diff --git a/basis/compiler/cfg/intrinsics/simd/simd.factor b/basis/compiler/cfg/intrinsics/simd/simd.factor index 62ee1cf019..9986588e3e 100644 --- a/basis/compiler/cfg/intrinsics/simd/simd.factor +++ b/basis/compiler/cfg/intrinsics/simd/simd.factor @@ -2,8 +2,9 @@ ! See http://factorcode.org/license.txt for BSD license. USING: accessors byte-arrays fry cpu.architecture kernel math sequences math.vectors.simd.intrinsics macros generalizations -combinators combinators.short-circuit arrays +combinators combinators.short-circuit arrays locals compiler.tree.propagation.info compiler.cfg.builder.blocks +compiler.cfg.comparisons compiler.cfg.stacks compiler.cfg.stacks.local compiler.cfg.hats compiler.cfg.instructions compiler.cfg.registers compiler.cfg.intrinsics.alien ; @@ -115,3 +116,64 @@ MACRO: if-literals-match ( quots -- ) [ byte-array inline-alien-setter? ] inline-alien ] with emit-vector-op ; + +: generate-not-vector ( src rep -- dst ) + dup %not-vector-reps member? + [ ^^not-vector ] + [ [ ^^fill-vector ] [ ^^xor-vector ] bi ] if ; + +:: (generate-compare-vector) ( src1 src2 rep {cc,swap} -- dst ) + {cc,swap} first2 :> swap? :> cc + swap? + [ src2 src1 rep cc ^^compare-vector ] + [ src1 src2 rep cc ^^compare-vector ] if ; + +:: generate-compare-vector ( src1 src2 rep orig-cc -- dst ) + rep orig-cc %compare-vector-ccs :> not? :> ccs + + ccs empty? + [ rep not? [ ^^fill-vector ] [ ^^zero-vector ] if ] + [ + ccs unclip :> first-cc :> rest-ccs + src1 src2 rep first-cc (generate-compare-vector) :> first-dst + + rest-ccs first-dst + [ [ src1 src2 rep ] dip (generate-compare-vector) rep ^^or-vector ] + reduce + + not? [ rep generate-not-vector ] when + ] if ; + +:: generate-unpack-vector-head ( src rep -- dst ) + { + { + [ rep %unpack-vector-head-reps member? ] + [ src rep ^^unpack-vector-head ] + } + [ + rep ^^zero-vector :> zero + zero src rep cc> ^^compare-vector :> sign + src sign rep ^^merge-vector-head + ] + } cond ; + +:: generate-unpack-vector-tail ( src rep -- dst ) + { + { + [ rep %unpack-vector-tail-reps member? ] + [ src rep ^^unpack-vector-tail ] + } + { + [ rep %unpack-vector-head-reps member? ] + [ + src rep ^^tail>head-vector :> tail + tail rep ^^unpack-vector-head + ] + } + [ + rep ^^zero-vector :> zero + zero src rep cc> ^^compare-vector :> sign + src sign rep ^^merge-vector-tail + ] + } cond ; + diff --git a/basis/compiler/cfg/linear-scan/linear-scan-tests.factor b/basis/compiler/cfg/linear-scan/linear-scan-tests.factor index 2f49bf7fae..3a9a7ac0a1 100644 --- a/basis/compiler/cfg/linear-scan/linear-scan-tests.factor +++ b/basis/compiler/cfg/linear-scan/linear-scan-tests.factor @@ -1465,7 +1465,7 @@ V{ [ ] [ { 1 2 3 } test-linear-scan-on-cfg ] unit-test -[ { 1 } ] [ 1 get instructions>> first tagged-values>> ] unit-test +[ { { 0 1 } } ] [ 1 get instructions>> first tagged-values>> ] unit-test V{ T{ ##peek f 0 D 0 } @@ -1487,4 +1487,4 @@ V{ [ ] [ { 1 2 3 } test-linear-scan-on-cfg ] unit-test -[ { 1 } ] [ 1 get instructions>> first tagged-values>> ] unit-test +[ { { 0 1 } } ] [ 1 get instructions>> first tagged-values>> ] unit-test diff --git a/basis/compiler/cfg/representations/representations.factor b/basis/compiler/cfg/representations/representations.factor index a2311ca964..42059f4152 100644 --- a/basis/compiler/cfg/representations/representations.factor +++ b/basis/compiler/cfg/representations/representations.factor @@ -226,24 +226,40 @@ SYMBOL: phi-mappings M: ##phi conversions-for-insn [ , ] [ [ inputs>> values ] [ dst>> ] bi phi-mappings get set-at ] bi ; -! When a literal zero vector is unboxed, we replace the ##load-reference -! with a ##zero-vector instruction since this is more efficient. +! When a literal zeroes/ones vector is unboxed, we replace the ##load-reference +! with a ##zero-vector or ##fill-vector instruction since this is more efficient. : convert-to-zero-vector? ( insn -- ? ) { [ dst>> rep-of vector-rep? ] - [ obj>> B{ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 } = ] + [ obj>> B{ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 } = ] + } 1&& ; +: convert-to-fill-vector? ( insn -- ? ) + { + [ dst>> rep-of vector-rep? ] + [ obj>> B{ 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 } = ] } 1&& ; -: convert-to-zero-vector ( insn -- ) - dst>> dup rep-of ##zero-vector ; +: (convert-to-zero/fill-vector) ( insn -- dst rep ) + dst>> dup rep-of ; inline + +: conversions-for-load-insn ( insn -- ?insn ) + { + { + [ dup convert-to-zero-vector? ] + [ (convert-to-zero/fill-vector) ##zero-vector f ] + } + { + [ dup convert-to-fill-vector? ] + [ (convert-to-zero/fill-vector) ##fill-vector f ] + } + [ ] + } cond ; M: ##load-reference conversions-for-insn - dup convert-to-zero-vector? - [ convert-to-zero-vector ] [ call-next-method ] if ; + conversions-for-load-insn [ call-next-method ] when* ; M: ##load-constant conversions-for-insn - dup convert-to-zero-vector? - [ convert-to-zero-vector ] [ call-next-method ] if ; + conversions-for-load-insn [ call-next-method ] when* ; M: vreg-insn conversions-for-insn [ compute-renaming-set ] [ perform-renaming ] bi ; @@ -312,4 +328,4 @@ PRIVATE> [ insert-conversions ] [ ] } cleave - representations get cfg get (>>reps) ; \ No newline at end of file + representations get cfg get (>>reps) ; diff --git a/basis/compiler/cfg/value-numbering/rewrite/rewrite.factor b/basis/compiler/cfg/value-numbering/rewrite/rewrite.factor index 9827e02bf5..56ec16eed6 100755 --- a/basis/compiler/cfg/value-numbering/rewrite/rewrite.factor +++ b/basis/compiler/cfg/value-numbering/rewrite/rewrite.factor @@ -459,7 +459,7 @@ M: ##shuffle-vector rewrite value>> over rep>> { { float-4-rep [ float>bits 4 >le (fold-scalar>vector) ] } { double-2-rep [ double>bits 8 >le (fold-scalar>vector) ] } - [ rep-component-type heap-size >le (fold-scalar>vector) ] + [ [ untag-fixnum ] dip rep-component-type heap-size >le (fold-scalar>vector) ] } case ; M: ##scalar>vector rewrite diff --git a/basis/compiler/cfg/value-numbering/value-numbering-tests.factor b/basis/compiler/cfg/value-numbering/value-numbering-tests.factor index b959a09e19..5f8eda2c08 100644 --- a/basis/compiler/cfg/value-numbering/value-numbering-tests.factor +++ b/basis/compiler/cfg/value-numbering/value-numbering-tests.factor @@ -4,7 +4,7 @@ cpu.architecture tools.test kernel math combinators.short-circuit accessors sequences compiler.cfg.predecessors locals compiler.cfg.dce compiler.cfg.ssa.destruction compiler.cfg.loop-detection compiler.cfg.representations compiler.cfg assocs vectors arrays -layouts namespaces alien ; +layouts literals namespaces alien ; IN: compiler.cfg.value-numbering.tests : trim-temps ( insns -- insns ) @@ -1215,6 +1215,20 @@ cell 8 = [ } value-numbering-step ] unit-test +[ + { + T{ ##load-constant f 0 $[ 55 tag-fixnum ] } + T{ ##load-constant f 1 B{ 55 0 0 0 55 0 0 0 55 0 0 0 55 0 0 0 } } + T{ ##copy f 2 1 any-rep } + } +] [ + { + T{ ##load-constant f 0 $[ 55 tag-fixnum ] } + T{ ##scalar>vector f 1 0 int-4-rep } + T{ ##shuffle-vector f 2 1 { 0 0 0 0 } float-4-rep } + } value-numbering-step +] unit-test + [ { T{ ##load-constant f 0 1.25 } diff --git a/basis/compiler/codegen/codegen.factor b/basis/compiler/codegen/codegen.factor index 121f09a5a8..7c7f9a696c 100755 --- a/basis/compiler/codegen/codegen.factor +++ b/basis/compiler/codegen/codegen.factor @@ -160,11 +160,19 @@ CODEGEN: ##double>single-float %double>single-float CODEGEN: ##integer>float %integer>float CODEGEN: ##float>integer %float>integer CODEGEN: ##zero-vector %zero-vector +CODEGEN: ##fill-vector %fill-vector CODEGEN: ##gather-vector-2 %gather-vector-2 CODEGEN: ##gather-vector-4 %gather-vector-4 CODEGEN: ##shuffle-vector %shuffle-vector +CODEGEN: ##tail>head-vector %tail>head-vector CODEGEN: ##merge-vector-head %merge-vector-head CODEGEN: ##merge-vector-tail %merge-vector-tail +CODEGEN: ##signed-pack-vector %signed-pack-vector +CODEGEN: ##unsigned-pack-vector %unsigned-pack-vector +CODEGEN: ##unpack-vector-head %unpack-vector-head +CODEGEN: ##unpack-vector-tail %unpack-vector-tail +CODEGEN: ##integer>float-vector %integer>float-vector +CODEGEN: ##float>integer-vector %float>integer-vector CODEGEN: ##compare-vector %compare-vector CODEGEN: ##test-vector %test-vector CODEGEN: ##add-vector %add-vector diff --git a/basis/compiler/tree/propagation/simd/simd.factor b/basis/compiler/tree/propagation/simd/simd.factor index 462e5d6e0b..4e9734693b 100644 --- a/basis/compiler/tree/propagation/simd/simd.factor +++ b/basis/compiler/tree/propagation/simd/simd.factor @@ -31,8 +31,14 @@ IN: compiler.tree.propagation.simd (simd-hlshift) (simd-hrshift) (simd-vshuffle) - (simd-vmerge-head) - (simd-vmerge-tail) + (simd-(vmerge-head)) + (simd-(vmerge-tail)) + (simd-(v>float)) + (simd-(v>integer)) + (simd-(vpack-signed)) + (simd-(vpack-unsigned)) + (simd-(vunpack-head)) + (simd-(vunpack-tail)) (simd-v<=) (simd-v<) (simd-v=) diff --git a/basis/compression/huffman/huffman.factor b/basis/compression/huffman/huffman.factor index 2df4dce916..9922048009 100755 --- a/basis/compression/huffman/huffman.factor +++ b/basis/compression/huffman/huffman.factor @@ -2,31 +2,35 @@ ! See http://factorcode.org/license.txt for BSD license. USING: accessors arrays assocs fry hashtables io kernel locals math math.order math.parser -math.ranges multiline sequences ; +math.ranges multiline sequences bitstreams bit-arrays ; IN: compression.huffman QUALIFIED-WITH: bitstreams bs ( -- code ) 0 0 0 huffman-code boa ; -: next-size ( code -- ) [ 1 + ] change-size [ 2 * ] change-code drop ; -: next-code ( code -- ) [ 1 + ] change-code drop ; +: ( -- huffman-code ) + 0 0 0 huffman-code boa ; inline -:: all-patterns ( huff n -- seq ) - n log2 huff size>> - :> free-bits +: next-size ( huffman-code -- ) + [ 1 + ] change-size + [ 2 * ] change-code drop ; inline + +: next-code ( huffman-code -- ) + [ 1 + ] change-code drop ; inline + +:: all-patterns ( huffman-code n -- seq ) + n log2 huffman-code size>> - :> free-bits free-bits 0 > - [ free-bits 2^ [0,b) [ huff code>> free-bits 2^ * + ] map ] - [ huff code>> free-bits neg 2^ /i 1array ] if ; + [ free-bits 2^ iota [ huffman-code code>> free-bits 2^ * + ] map ] + [ huffman-code code>> free-bits neg 2^ /i 1array ] if ; -:: huffman-each ( tdesc quot: ( huff -- ) -- ) +:: huffman-each ( tdesc quot: ( huffman-code -- ) -- ) :> code tdesc [ @@ -34,7 +38,7 @@ TUPLE: huffman-code [ code (>>value) code clone quot call code next-code ] each ] each ; inline -: update-reverse-table ( huff n table -- ) +: update-reverse-table ( huffman-code n table -- ) [ drop all-patterns ] [ nip '[ _ swap _ set-at ] each ] 3bi ; @@ -43,49 +47,29 @@ TUPLE: huffman-code tdesc [ n table update-reverse-table ] huffman-each table seq>> ; -:: huffman-table ( tdesc max -- table ) - max f :> table - tdesc [ [ ] [ value>> ] bi table set-nth ] huffman-each - table ; - PRIVATE> -! decoder - TUPLE: huffman-decoder - { bs } - { tdesc } - { rtable } - { bits/level } ; + { bs bit-reader } + { tdesc array } + { rtable array } + { bits/level fixnum } ; -: ( bs tdesc -- decoder ) +: ( bs tdesc -- huffman-decoder ) huffman-decoder new - swap >>tdesc - swap >>bs - 16 >>bits/level - [ ] [ tdesc>> ] [ bits/level>> 2^ ] tri reverse-table >>rtable ; + swap >>tdesc + swap >>bs + 16 >>bits/level + dup [ tdesc>> ] [ bits/level>> 2^ ] bi reverse-table >>rtable ; inline -: read1-huff ( decoder -- elt ) - 16 over [ bs>> bs:peek ] [ rtable>> nth ] bi ! first/last - [ size>> swap bs>> bs:seek ] [ value>> ] bi ; +: read1-huff ( huffman-decoder -- elt ) + 16 over [ bs>> bs:peek ] [ rtable>> nth ] bi + [ size>> swap bs>> bs:seek ] [ value>> ] bi ; inline -! %remove : reverse-bits ( value bits -- value' ) - [ >bin ] [ CHAR: 0 pad-head bin> ] bi* ; + [ integer>bit-array ] dip + f pad-tail reverse bit-array>integer ; inline -: read1-huff2 ( decoder -- elt ) - 16 over [ bs>> bs:peek 16 reverse-bits ] [ rtable>> nth ] bi ! first/last - [ size>> swap bs>> bs:seek ] [ value>> ] bi ; - -/* -: huff>string ( code -- str ) - [ value>> number>string ] - [ [ code>> ] [ size>> bits>string ] bi ] bi - " = " glue ; - -: huff. ( code -- ) huff>string print ; - -:: rtable. ( rtable -- ) - rtable length>> log2 :> n - rtable [ swap n bits. [ huff. ] each ] assoc-each ; -*/ +: read1-huff2 ( huffman-decoder -- elt ) + 16 over [ bs>> bs:peek 16 reverse-bits ] [ rtable>> nth ] bi + [ size>> swap bs>> bs:seek ] [ value>> ] bi ; inline diff --git a/basis/compression/inflate/inflate-tests.factor b/basis/compression/inflate/inflate-tests.factor new file mode 100644 index 0000000000..e2beefb9b2 --- /dev/null +++ b/basis/compression/inflate/inflate-tests.factor @@ -0,0 +1,102 @@ +! Copyright (C) 2009 Doug Coleman. +! See http://factorcode.org/license.txt for BSD license. +USING: tools.test compression.inflate ; +IN: compression.inflate.tests + +[ +BV{ + 1 255 255 255 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 122 121 94 119 + 239 237 227 88 16 16 10 5 16 17 26 172 3 20 19 245 22 54 55 + 70 245 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 139 138 112 127 12 6 234 132 254 250 9 + 24 16 19 38 182 25 27 40 154 2 240 239 235 25 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 163 163 154 57 223 218 192 128 6 4 39 87 13 9 22 63 245 239 + 239 242 240 240 242 243 4 17 17 25 21 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 223 219 + 197 140 26 21 26 221 108 117 136 170 0 0 0 0 0 0 0 194 148 + 147 138 6 4 4 5 4 33 176 175 161 5 80 81 95 251 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 122 121 105 33 246 246 234 80 241 240 + 226 77 28 25 4 58 29 30 68 108 0 0 0 0 0 0 0 0 0 0 0 0 108 + 109 118 250 2 24 24 39 230 225 221 203 107 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 103 102 80 101 249 245 214 208 13 6 240 142 + 44 37 29 65 11 13 22 250 11 15 30 180 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 1 200 201 196 1 208 195 176 132 224 223 207 50 + 253 6 15 181 251 253 0 6 240 241 239 77 14 10 246 64 33 24 + 13 0 7 252 20 0 247 1 249 0 241 253 1 205 129 132 173 52 + 124 123 107 32 17 16 6 15 115 117 143 209 0 0 0 0 1 255 255 + 255 0 0 0 0 0 128 118 95 119 221 222 204 136 1 3 0 0 22 27 + 35 0 249 239 239 0 30 22 3 0 247 4 18 0 250 248 247 0 29 26 + 31 222 239 249 6 164 241 241 230 48 19 19 28 209 29 30 35 + 154 87 88 109 228 1 255 255 255 0 0 0 0 0 0 0 0 0 136 136 + 116 39 227 224 218 110 245 245 242 61 238 238 237 36 11 1 + 254 9 32 37 20 213 7 14 40 151 2 0 246 36 6 8 20 210 8 8 5 + 4 33 32 41 184 10 11 17 232 69 70 80 251 0 255 255 255 0 + 255 255 255 0 255 255 255 0 255 255 255 0 255 255 255 0 255 + 255 255 0 107 104 82 144 88 81 34 255 162 159 134 122 255 + 255 255 0 255 255 255 0 255 255 255 0 255 255 255 0 195 194 + 184 2 255 255 255 0 255 255 255 0 0 255 255 255 0 255 255 + 255 0 255 255 255 0 255 255 255 0 255 255 255 0 174 171 167 + 15 102 99 63 233 132 129 99 133 255 255 255 0 255 255 255 0 + 255 255 255 0 255 255 255 0 255 255 255 0 255 255 255 0 255 + 255 255 0 255 255 255 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 119 119 116 4 240 239 217 143 28 28 30 228 34 36 45 232 0 0 + 0 0 0 0 0 0 38 38 38 4 28 28 28 2 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 4 0 0 0 0 0 0 0 0 33 33 33 3 38 38 38 9 243 243 243 + 252 14 12 44 24 233 235 4 89 250 251 216 126 92 91 76 241 8 + 9 21 235 69 69 70 2 250 250 249 214 0 0 0 223 0 0 0 0 0 0 0 + 0 0 0 0 0 2 0 0 0 0 0 0 0 0 247 247 247 255 25 25 25 11 45 + 46 48 26 239 239 251 219 3 4 1 114 233 236 1 254 21 21 20 + 113 12 11 2 54 1 2 2 215 206 206 206 230 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 1 255 255 255 0 0 0 0 0 0 0 0 0 0 0 0 0 46 46 + 47 8 56 56 49 70 23 21 9 145 237 239 248 180 247 247 2 148 + 225 225 224 234 241 241 240 248 205 205 205 247 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 255 255 255 0 255 255 255 0 255 255 + 255 0 255 255 255 0 255 255 255 0 255 255 255 0 107 106 96 + 75 90 89 73 75 255 255 255 0 255 255 255 0 255 255 255 0 + 255 255 255 0 255 255 255 0 255 255 255 0 255 255 255 0 255 + 255 255 0 +} +] [ +B{ + 56 141 99 252 255 255 63 3 41 160 170 50 174 252 253 219 + 199 17 2 2 92 172 2 130 82 107 152 69 132 191 138 153 153 + 187 125 37 70 115 119 87 65 61 15 219 171 150 127 191 56 37 + 4 132 213 182 73 74 107 204 98 250 240 254 181 36 49 154 23 + 47 158 101 121 255 214 129 6 54 22 245 112 94 78 49 251 175 + 239 223 127 250 240 225 211 103 22 65 65 73 81 98 12 184 + 127 251 104 143 148 168 212 221 156 210 142 85 80 161 67 83 + 38 119 177 177 176 176 178 40 110 88 191 144 53 32 48 254 + 55 166 127 51 21 191 125 123 21 240 241 195 35 95 25 73 22 + 43 89 57 151 28 100 249 156 220 178 95 76 18 18 234 207 30 + 222 61 157 141 174 57 61 45 32 245 231 215 107 23 120 217 + 62 244 233 168 202 58 114 243 138 253 226 230 151 219 130 + 174 142 241 196 201 35 140 23 14 111 104 121 112 255 188 + 209 95 54 254 173 191 255 50 176 125 248 248 222 151 143 + 235 155 131 162 4 47 3 251 31 17 134 239 140 63 25 62 254 + 101 60 219 216 178 214 164 166 58 91 65 80 128 141 191 184 + 180 255 34 3 3 3 3 35 44 26 27 202 226 203 239 222 59 211 + 193 200 204 192 32 38 173 204 240 243 253 123 6 57 49 102 + 134 239 44 66 12 191 126 124 103 144 149 146 191 247 254 39 + 219 146 143 31 159 25 8 11 203 92 148 149 83 158 21 30 145 + 251 132 17 57 29 116 116 148 168 63 126 112 43 239 235 215 + 79 182 239 222 189 85 225 102 252 199 169 160 42 114 149 + 157 79 99 58 19 195 55 21 54 14 145 75 28 28 172 44 138 10 + 154 59 184 184 5 95 184 186 5 252 102 248 255 255 63 86 156 + 157 17 52 33 34 80 233 255 162 249 109 85 232 114 135 15 + 237 96 130 177 177 106 94 183 122 57 127 90 178 253 203 150 + 198 228 86 92 22 192 48 19 122 168 150 151 151 176 124 120 + 127 179 95 70 70 238 137 146 138 238 11 152 184 154 154 26 + 139 140 140 12 134 122 22 24 67 81 81 145 89 77 77 141 243 + 243 231 207 127 248 120 116 36 94 190 102 137 252 245 251 + 70 93 76 180 207 71 14 78 209 215 174 174 110 76 191 126 + 253 188 198 192 192 112 31 217 0 184 137 223 191 127 255 47 + 41 41 201 173 171 103 32 245 254 253 239 219 204 44 140 69 + 47 223 48 254 19 21 21 41 228 225 102 50 99 100 98 186 126 + 238 220 185 103 24 233 0 61 55 234 233 233 115 88 88 24 186 + 137 139 114 78 124 251 254 199 150 239 223 153 166 60 124 + 248 224 213 199 143 31 126 156 61 123 246 59 186 1 184 99 + 33 43 193 59 42 210 211 155 80 32 2 0 2 32 94 128 +} zlib-inflate +] unit-test diff --git a/basis/compression/inflate/inflate.factor b/basis/compression/inflate/inflate.factor index 26b851cc1e..ab27c70ac0 100644 --- a/basis/compression/inflate/inflate.factor +++ b/basis/compression/inflate/inflate.factor @@ -1,59 +1,47 @@ ! Copyright (C) 2009 Marc Fauconneau. ! See http://factorcode.org/license.txt for BSD license. USING: accessors arrays assocs byte-vectors combinators -compression.huffman fry hashtables io.binary kernel locals math -math.bitwise math.order math.ranges sequences sorting ; +combinators.smart compression.huffman fry hashtables io.binary +kernel literals locals math math.bitwise math.order math.ranges +sequences sorting memoize combinators.short-circuit ; QUALIFIED-WITH: bitstreams bs IN: compression.inflate -QUALIFIED-WITH: bitstreams bs - seq ( assoc -- seq ) - dup keys [ ] [ max ] map-reduce 1 + f - [ '[ swap _ set-nth ] assoc-each ] keep ; - ERROR: zlib-unimplemented ; ERROR: bad-zlib-data ; ERROR: bad-zlib-header ; - + :: check-zlib-header ( data -- ) 16 data bs:peek 2 >le be> 31 mod ! checksum - 0 assert= + 0 assert= 4 data bs:read 8 assert= ! compression method: deflate 4 data bs:read ! log2(max length)-8, 32K max - 7 <= [ bad-zlib-header ] unless - 5 data bs:seek ! drop check bits - 1 data bs:read 0 assert= ! dictionnary - not allowed in png + 7 <= [ bad-zlib-header ] unless + 5 data bs:seek ! drop check bits + 1 data bs:read 0 assert= ! dictionary - not allowed in png 2 data bs:seek ! compression level; ignore ; -:: default-table ( -- table ) - 0 :> table - 0 143 [a,b] 280 287 [a,b] append 8 table set-at - 144 255 [a,b] >array 9 table set-at - 256 279 [a,b] >array 7 table set-at - table enum>seq 1 tail ; - CONSTANT: clen-shuffle { 16 17 18 0 8 7 9 6 10 5 11 4 12 3 13 2 14 1 15 } -: get-table ( values size -- table ) - 16 f clone - [ '[ _ push-at ] 2each ] keep seq>> 1 tail [ natural-sort ] map ; +: get-table ( values size -- table ) + 16 f + [ '[ _ push-at ] 2each ] keep + seq>> rest-slice [ natural-sort ] map ; inline :: decode-huffman-tables ( bitstream -- tables ) 5 bitstream bs:read 257 + 5 bitstream bs:read 1 + - 4 bitstream bs:read 4 + - clen-shuffle swap head - dup [ drop 3 bitstream bs:read ] map + 4 bitstream bs:read 4 + clen-shuffle swap head + + dup length iota [ 3 bitstream bs:read ] replicate get-table - bitstream swap + bitstream swap [ 2dup + ] dip swap :> k! '[ - _ read1-huff2 - { + _ read1-huff2 { { [ dup 16 = ] [ 2 bitstream bs:read 3 + 2array ] } { [ dup 17 = ] [ 3 bitstream bs:read 3 + 2array ] } { [ dup 18 = ] [ 7 bitstream bs:read 11 + 2array ] } @@ -61,121 +49,118 @@ CONSTANT: clen-shuffle { 16 17 18 0 8 7 9 6 10 5 11 4 12 3 13 2 14 1 15 } } cond dup array? [ dup second ] [ 1 ] if k swap - dup k! 0 > - ] - [ ] produce swap suffix - { } [ dup array? [ dup first 16 = ] [ f ] if [ [ unclip-last ] [ second 1 + swap append ] bi* ] [ suffix ] if ] reduce + ] [ ] produce swap suffix + { } [ + dup { [ array? ] [ first 16 = ] } 1&& [ + [ unclip-last-slice ] + [ second 1 + swap append ] bi* + ] [ + suffix + ] if + ] reduce [ dup array? [ second 0 ] [ 1array ] if ] map concat - nip swap cut 2array [ [ length>> [0,b) ] [ ] bi get-table ] map ; - + nip swap cut 2array + [ [ length>> iota ] [ ] bi get-table ] map ; + +MEMO: static-huffman-tables ( -- obj ) + [ + 0 143 [a,b] [ 8 ] replicate + 144 255 [a,b] [ 9 ] replicate append + 256 279 [a,b] [ 7 ] replicate append + 280 287 [a,b] [ 8 ] replicate append + ] append-outputs + 0 31 [a,b] [ 5 ] replicate 2array + [ [ length>> [0,b) ] [ ] bi get-table ] map ; + CONSTANT: length-table { - 3 4 5 6 7 8 9 10 - 11 13 15 17 - 19 23 27 31 - 35 43 51 59 - 67 83 99 115 - 131 163 195 227 258 + 3 4 5 6 7 8 9 10 11 13 15 17 19 23 27 31 + 35 43 51 59 67 83 99 115 131 163 195 227 258 } CONSTANT: dist-table { - 1 2 3 4 - 5 7 9 13 - 17 25 33 49 - 65 97 129 193 - 257 385 513 769 - 1025 1537 2049 3073 - 4097 6145 8193 12289 - 16385 24577 + 1 2 3 4 5 7 9 13 17 25 33 49 + 65 97 129 193 257 385 513 769 1025 1537 2049 3073 + 4097 6145 8193 12289 16385 24577 } : nth* ( n seq -- elt ) - [ length 1 - swap - ] [ nth ] bi ; + [ length 1 - swap - ] [ nth ] bi ; inline :: inflate-lz77 ( seq -- bytes ) 1000 :> bytes - seq - [ + seq [ dup array? [ first2 '[ _ 1 - bytes nth* bytes push ] times ] [ bytes push ] if - ] each + ] each bytes ; -:: inflate-dynamic ( bitstream -- bytes ) - bitstream decode-huffman-tables - bitstream '[ _ swap ] map :> tables +:: inflate-huffman ( bitstream tables -- bytes ) + bitstream tables [ ] with map :> tables [ tables first read1-huff2 - dup 256 > - [ - dup 285 = - [ ] - [ - dup 264 > - [ - dup 261 - 4 /i dup 5 > - [ bad-zlib-data ] when - bitstream bs:read 2array - ] - when - ] if - ! 5 bitstream read-bits ! distance + dup 256 > [ + dup 285 = [ + dup 264 > [ + dup 261 - 4 /i + dup 5 > [ bad-zlib-data ] when + bitstream bs:read 2array + ] when + ] unless + tables second read1-huff2 - dup 3 > - [ + + dup 3 > [ dup 2 - 2 /i dup 13 > [ bad-zlib-data ] when bitstream bs:read 2array - ] - when - 2array - ] - when - dup 256 = not - ] - [ ] produce nip + ] when 2array + ] when dup 256 = not + ] [ ] produce nip [ dup array? [ - first2 - [ + first2 [ dup array? [ first2 ] [ 0 ] if [ 257 - length-table nth ] [ + ] bi* - ] - [ + ] [ dup array? [ first2 ] [ 0 ] if [ dist-table nth ] [ + ] bi* - ] bi* - 2array + ] bi* 2array ] when ] map ; - -:: inflate-raw ( bitstream -- bytes ) - 8 bitstream bs:align + +:: inflate-raw ( bitstream -- bytes ) + 8 bitstream bs:align 16 bitstream bs:read :> len 16 bitstream bs:read :> nlen - len nlen + 16 >signed -1 assert= ! len + ~len = -1 + + ! len + ~len = -1 + len nlen + 16 >signed -1 assert= + bitstream byte-pos>> bitstream byte-pos>> len + bitstream bytes>> len 8 * bitstream bs:seek ; -: inflate-static ( bitstream -- bytes ) zlib-unimplemented ; +: inflate-dynamic ( bitstream -- array ) + dup decode-huffman-tables inflate-huffman ; -:: inflate-loop ( bitstream -- bytes ) - [ 1 bitstream bs:read 0 = ] - [ +: inflate-static ( bitstream -- array ) + static-huffman-tables inflate-huffman ; + +:: inflate-loop ( bitstream -- array ) + [ 1 bitstream bs:read 0 = ] [ bitstream 2 bitstream bs:read - { + { { 0 [ inflate-raw ] } { 1 [ inflate-static ] } { 2 [ inflate-dynamic ] } { 3 [ bad-zlib-data f ] } - } - case - ] - [ produce ] keep call suffix concat ; + } case + ] [ produce ] keep call suffix concat ; PRIVATE> diff --git a/basis/cords/cords.factor b/basis/cords/cords.factor index 915744491f..a50de60c45 100644 --- a/basis/cords/cords.factor +++ b/basis/cords/cords.factor @@ -6,35 +6,37 @@ IN: cords > length ] [ second>> length ] bi + ; + [ first>> length ] [ second>> length ] bi + ; inline -M: simple-cord virtual-seq first>> ; +M: simple-cord virtual-seq first>> ; inline M: simple-cord virtual@ 2dup first>> length < - [ first>> ] [ [ first>> length - ] [ second>> ] bi ] if ; + [ first>> ] [ [ first>> length - ] [ second>> ] bi ] if ; inline -TUPLE: multi-cord count seqs ; +TUPLE: multi-cord + { count read-only } { seqs read-only } ; -M: multi-cord length count>> ; +M: multi-cord length count>> ; inline M: multi-cord virtual@ dupd seqs>> [ first <=> ] with search nip - [ first - ] [ second ] bi ; + [ first - ] [ second ] bi ; inline M: multi-cord virtual-seq - seqs>> [ f ] [ first second ] if-empty ; + seqs>> [ f ] [ first second ] if-empty ; inline : ( seqs -- cord ) dup length 2 = [ first2 simple-cord boa ] [ [ 0 [ length + ] accumulate ] keep zip multi-cord boa - ] if ; + ] if ; inline PRIVATE> @@ -52,7 +54,7 @@ INSTANCE: multi-cord virtual-sequence { [ over cord? ] [ [ seqs>> values ] dip suffix ] } { [ dup cord? ] [ seqs>> values swap prefix ] } [ 2array ] - } cond ; + } cond ; inline : cord-concat ( seqs -- cord ) { @@ -67,4 +69,4 @@ INSTANCE: multi-cord virtual-sequence } cond ] map concat ] - } cond ; + } cond ; inline diff --git a/basis/cpu/architecture/architecture.factor b/basis/cpu/architecture/architecture.factor index 5ce16ad731..c7a7f0c5ef 100644 --- a/basis/cpu/architecture/architecture.factor +++ b/basis/cpu/architecture/architecture.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2006, 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: accessors arrays generic kernel kernel.private math -memory namespaces make sequences layouts system hashtables +USING: accessors arrays assocs generic kernel kernel.private +math memory namespaces make sequences layouts system hashtables classes alien byte-arrays combinators words sets fry ; IN: cpu.architecture @@ -56,6 +56,18 @@ uint-4-rep longlong-2-rep ulonglong-2-rep ; +UNION: signed-int-vector-rep +char-16-rep +short-8-rep +int-4-rep +longlong-2-rep ; + +UNION: unsigned-int-vector-rep +uchar-16-rep +ushort-8-rep +uint-4-rep +ulonglong-2-rep ; + UNION: scalar-rep char-scalar-rep uchar-scalar-rep @@ -83,6 +95,18 @@ double-rep vector-rep scalar-rep ; +: unsign-rep ( rep -- rep' ) + { + { uint-4-rep int-4-rep } + { ulonglong-2-rep longlong-2-rep } + { ushort-8-rep short-8-rep } + { uchar-16-rep char-16-rep } + { uchar-scalar-rep char-scalar-rep } + { ushort-scalar-rep short-scalar-rep } + { uint-scalar-rep int-scalar-rep } + { ulonglong-scalar-rep longlong-scalar-rep } + } ?at drop ; + ! Register classes SINGLETONS: int-regs float-regs ; @@ -218,9 +242,16 @@ HOOK: %fill-vector cpu ( dst rep -- ) HOOK: %gather-vector-2 cpu ( dst src1 src2 rep -- ) HOOK: %gather-vector-4 cpu ( dst src1 src2 src3 src4 rep -- ) HOOK: %shuffle-vector cpu ( dst src shuffle rep -- ) +HOOK: %tail>head-vector cpu ( dst src rep -- ) HOOK: %merge-vector-head cpu ( dst src1 src2 rep -- ) HOOK: %merge-vector-tail cpu ( dst src1 src2 rep -- ) -HOOK: %compare-vector cpu ( dst src1 src2 temp rep cc -- ) +HOOK: %signed-pack-vector cpu ( dst src1 src2 rep -- ) +HOOK: %unsigned-pack-vector cpu ( dst src1 src2 rep -- ) +HOOK: %unpack-vector-head cpu ( dst src rep -- ) +HOOK: %unpack-vector-tail cpu ( dst src rep -- ) +HOOK: %integer>float-vector cpu ( dst src rep -- ) +HOOK: %float>integer-vector cpu ( dst src rep -- ) +HOOK: %compare-vector cpu ( dst src1 src2 rep cc -- ) HOOK: %test-vector cpu ( dst src1 temp rep vcc -- ) HOOK: %test-vector-branch cpu ( label src1 temp rep vcc -- ) HOOK: %add-vector cpu ( dst src1 src2 rep -- ) @@ -259,7 +290,14 @@ HOOK: %gather-vector-2-reps cpu ( -- reps ) HOOK: %gather-vector-4-reps cpu ( -- reps ) HOOK: %shuffle-vector-reps cpu ( -- reps ) HOOK: %merge-vector-reps cpu ( -- reps ) +HOOK: %signed-pack-vector-reps cpu ( -- reps ) +HOOK: %unsigned-pack-vector-reps cpu ( -- reps ) +HOOK: %unpack-vector-head-reps cpu ( -- reps ) +HOOK: %unpack-vector-tail-reps cpu ( -- reps ) +HOOK: %integer>float-vector-reps cpu ( -- reps ) +HOOK: %float>integer-vector-reps cpu ( -- reps ) HOOK: %compare-vector-reps cpu ( cc -- reps ) +HOOK: %compare-vector-ccs cpu ( rep cc -- {cc,swap?}s not? ) HOOK: %test-vector-reps cpu ( -- reps ) HOOK: %add-vector-reps cpu ( -- reps ) HOOK: %saturated-add-vector-reps cpu ( -- reps ) diff --git a/basis/cpu/ppc/ppc.factor b/basis/cpu/ppc/ppc.factor index 9394e864f0..32c92a8da0 100644 --- a/basis/cpu/ppc/ppc.factor +++ b/basis/cpu/ppc/ppc.factor @@ -263,6 +263,11 @@ M: ppc %gather-vector-2-reps { } ; M: ppc %gather-vector-4-reps { } ; M: ppc %shuffle-vector-reps { } ; M: ppc %merge-vector-reps { } ; +M: ppc %signed-pack-vector-reps { } ; +M: ppc %unsigned-pack-vector-reps { } ; +M: ppc %unpack-vector-reps { } ; +M: ppc %integer>float-vector-reps { } ; +M: ppc %float>integer-vector-reps { } ; M: ppc %compare-vector-reps drop { } ; M: ppc %test-vector-reps { } ; M: ppc %add-vector-reps { } ; diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index fa5d99101b..13727bdc61 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -588,14 +588,6 @@ M: x86 %fill-vector-reps { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } } } available-reps ; -: unsign-rep ( rep -- rep' ) - { - { uint-4-rep int-4-rep } - { ulonglong-2-rep longlong-2-rep } - { ushort-8-rep short-8-rep } - { uchar-16-rep char-16-rep } - } ?at drop ; - ! M:: x86 %broadcast-vector ( dst src rep -- ) ! rep unsign-rep { ! { float-4-rep [ @@ -749,14 +741,81 @@ M: x86 %merge-vector-reps { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } } } available-reps ; -:: compare-float-v-operands ( dst src1 src2 temp rep cc -- dst' src' rep cc' ) - cc { cc> cc>= cc/> cc/>= } member? - [ dst src2 src1 rep two-operand rep cc swap-cc ] - [ dst src1 src2 rep two-operand rep cc ] if ; +M: x86 %signed-pack-vector + [ two-operand ] keep + { + { int-4-rep [ PACKSSDW ] } + { short-8-rep [ PACKSSWB ] } + } case ; + +M: x86 %signed-pack-vector-reps + { + { sse2? { short-8-rep int-4-rep } } + } available-reps ; + +M: x86 %unsigned-pack-vector + [ two-operand ] keep + unsign-rep { + { int-4-rep [ PACKUSDW ] } + { short-8-rep [ PACKUSWB ] } + } case ; + +M: x86 %unsigned-pack-vector-reps + { + { sse2? { short-8-rep } } + { sse4.1? { int-4-rep } } + } available-reps ; + +M: x86 %tail>head-vector ( dst src rep -- ) + dup { + { float-4-rep [ drop MOVHLPS ] } + { double-2-rep [ [ %copy ] [ drop UNPCKHPD ] 3bi ] } + [ drop [ %copy ] [ drop PUNPCKHQDQ ] 3bi ] + } case ; + +M: x86 %unpack-vector-head ( dst src rep -- ) + { + { char-16-rep [ PMOVSXBW ] } + { uchar-16-rep [ PMOVZXBW ] } + { short-8-rep [ PMOVSXWD ] } + { ushort-8-rep [ PMOVZXWD ] } + { int-4-rep [ PMOVSXDQ ] } + { uint-4-rep [ PMOVZXDQ ] } + { float-4-rep [ CVTPS2PD ] } + } case ; + +M: x86 %unpack-vector-head-reps ( -- reps ) + { + { sse2? { float-4-rep } } + { sse4.1? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } } + } available-reps ; + +M: x86 %unpack-vector-tail-reps ( -- reps ) { } ; + +M: x86 %integer>float-vector ( dst src rep -- ) + { + { int-4-rep [ CVTDQ2PS ] } + } case ; + +M: x86 %integer>float-vector-reps + { + { sse2? { int-4-rep } } + } available-reps ; + +M: x86 %float>integer-vector ( dst src rep -- ) + { + { float-4-rep [ CVTTPS2DQ ] } + } case ; + +M: x86 %float>integer-vector-reps + { + { sse2? { float-4-rep } } + } available-reps ; + : (%compare-float-vector) ( dst src rep double single -- ) [ double-2-rep eq? ] 2dip if ; inline -: %compare-float-vector ( dst src1 src2 temp rep cc -- ) - compare-float-v-operands { +: %compare-float-vector ( dst src rep cc -- ) + { { cc< [ [ CMPLTPD ] [ CMPLTPS ] (%compare-float-vector) ] } { cc<= [ [ CMPLEPD ] [ CMPLEPS ] (%compare-float-vector) ] } { cc= [ [ CMPEQPD ] [ CMPEQPS ] (%compare-float-vector) ] } @@ -767,16 +826,6 @@ M: x86 %merge-vector-reps { cc/<>= [ [ CMPUNORDPD ] [ CMPUNORDPS ] (%compare-float-vector) ] } } case ; -:: compare-int-v-operands ( dst src1 src2 temp rep cc -- not-dst/f cmp-dst src' rep cc' ) - cc order-cc :> occ - occ { - { cc= [ f dst src1 src2 rep two-operand rep cc= ] } - { cc/= [ dst temp src1 src2 rep two-operand rep cc= ] } - { cc<= [ dst temp src1 src2 rep two-operand rep cc> ] } - { cc< [ f dst src2 src1 rep two-operand rep cc> ] } - { cc> [ f dst src1 src2 rep two-operand rep cc> ] } - { cc>= [ dst temp src2 src1 rep two-operand rep cc> ] } - } case ; :: (%compare-int-vector) ( dst src rep int64 int32 int16 int8 -- ) rep unsign-rep :> rep' dst src rep' { @@ -785,15 +834,14 @@ M: x86 %merge-vector-reps { short-8-rep [ int16 call ] } { char-16-rep [ int8 call ] } } case ; inline -:: %compare-int-vector ( dst src1 src2 temp rep cc -- ) - dst src1 src2 temp rep cc compare-int-v-operands :> cc' :> rep :> src' :> cmp-dst :> not-dst - cmp-dst src' rep cc' { +: %compare-int-vector ( dst src rep cc -- ) + { { cc= [ [ PCMPEQQ ] [ PCMPEQD ] [ PCMPEQW ] [ PCMPEQB ] (%compare-int-vector) ] } { cc> [ [ PCMPGTQ ] [ PCMPGTD ] [ PCMPGTW ] [ PCMPGTB ] (%compare-int-vector) ] } - } case - not-dst [ cmp-dst rep %not-vector ] when* ; + } case ; -M: x86 %compare-vector ( dst src1 src2 temp rep cc -- ) +M: x86 %compare-vector ( dst src1 src2 rep cc -- ) + [ [ two-operand ] keep ] dip over float-vector-rep? [ %compare-float-vector ] [ %compare-int-vector ] if ; @@ -804,11 +852,6 @@ M: x86 %compare-vector ( dst src1 src2 temp rep cc -- ) { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } } { sse4.1? { longlong-2-rep ulonglong-2-rep } } } available-reps ; -: %compare-vector-unord-reps ( -- reps ) - { - { sse? { float-4-rep } } - { sse2? { double-2-rep } } - } available-reps ; : %compare-vector-ord-reps ( -- reps ) { { sse? { float-4-rep } } @@ -819,10 +862,44 @@ M: x86 %compare-vector ( dst src1 src2 temp rep cc -- ) M: x86 %compare-vector-reps { { [ dup { cc= cc/= } memq? ] [ drop %compare-vector-eq-reps ] } - { [ dup { cc<>= cc/<>= } memq? ] [ drop %compare-vector-unord-reps ] } [ drop %compare-vector-ord-reps ] } cond ; +: %compare-float-vector-ccs ( cc -- ccs not? ) + { + { cc< [ { { cc< f } } f ] } + { cc<= [ { { cc<= f } } f ] } + { cc> [ { { cc< t } } f ] } + { cc>= [ { { cc<= t } } f ] } + { cc= [ { { cc= f } } f ] } + { cc<> [ { { cc< f } { cc< t } } f ] } + { cc<>= [ { { cc<>= f } } f ] } + { cc/< [ { { cc/< f } } f ] } + { cc/<= [ { { cc/<= f } } f ] } + { cc/> [ { { cc/< t } } f ] } + { cc/>= [ { { cc/<= t } } f ] } + { cc/= [ { { cc/= f } } f ] } + { cc/<> [ { { cc/= f } { cc/<>= f } } f ] } + { cc/<>= [ { { cc/<>= f } } f ] } + } case ; + +: %compare-int-vector-ccs ( cc -- ccs not? ) + order-cc { + { cc< [ { { cc> t } } f ] } + { cc<= [ { { cc> f } } t ] } + { cc> [ { { cc> f } } f ] } + { cc>= [ { { cc> t } } t ] } + { cc= [ { { cc= f } } f ] } + { cc/= [ { { cc= f } } t ] } + { t [ { } t ] } + { f [ { } f ] } + } case ; + +M: x86 %compare-vector-ccs + swap float-vector-rep? + [ %compare-float-vector-ccs ] + [ %compare-int-vector-ccs ] if ; + :: %test-vector-mask ( dst temp mask vcc -- ) vcc { { vcc-any [ dst dst TEST dst temp \ CMOVNE %boolean ] } @@ -1146,15 +1223,7 @@ M: x86 %xor-vector-reps { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } } } available-reps ; -M:: x86 %not-vector ( dst src rep -- ) - dst rep %fill-vector - dst dst src rep %xor-vector ; - -M: x86 %not-vector-reps - { - { sse? { float-4-rep } } - { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } } - } available-reps ; +M: x86 %not-vector-reps { } ; M: x86 %shl-vector ( dst src1 src2 rep -- ) [ two-operand ] keep diff --git a/basis/images/png/png.factor b/basis/images/png/png.factor index 2af44e4e1d..08d8c56667 100755 --- a/basis/images/png/png.factor +++ b/basis/images/png/png.factor @@ -26,6 +26,9 @@ CONSTANT: indexed-color 3 CONSTANT: greyscale-alpha 4 CONSTANT: truecolor-alpha 6 +CONSTANT: interlace-none 0 +CONSTANT: interlace-adam7 1 + : ( -- image ) loading-png new V{ } clone >>chunks ; @@ -86,8 +89,8 @@ ERROR: unimplemented-color-type image ; : png-bytes-per-pixel ( loading-png -- n ) dup color-type>> { - { 2 [ scale-bit-depth 3 * ] } - { 6 [ scale-bit-depth 4 * ] } + { truecolor [ scale-bit-depth 3 * ] } + { truecolor-alpha [ scale-bit-depth 4 * ] } [ unknown-color-type ] } case ; inline @@ -118,20 +121,41 @@ ERROR: unimplemented-color-type image ; lines dup first length 0 prefix [ n 1 - 0 prepend ] map 2 clump [ - n swap first2 [ ] [ n 1 - swap nth ] [ [ 0 n 1 - ] dip set-nth ] tri + n swap first2 + [ ] + [ n 1 - swap nth ] + [ [ 0 n 1 - ] dip set-nth ] tri png-unfilter-line ] map B{ } concat-as ; +ERROR: unimplemented-interlace ; + +: reverse-interlace ( byte-array loading-png -- byte-array ) + { + { interlace-none [ ] } + { interlace-adam7 [ unimplemented-interlace ] } + [ unimplemented-interlace ] + } case ; + : png-image-bytes ( loading-png -- byte-array ) [ png-bytes-per-pixel ] - [ inflate-data ] + [ [ inflate-data ] [ interlace-method>> ] bi reverse-interlace ] [ png-group-width ] tri group reverse-png-filter ; +ERROR: unknown-component-type n ; + +: png-component ( loading-png -- obj ) + bit-depth>> { + { 8 [ ubyte-components ] } + { 16 [ ushort-components ] } + [ unknown-component-type ] + } case ; + : loading-png>image ( loading-png -- image ) [ image new ] dip { [ png-image-bytes >>bitmap ] [ [ width>> ] [ height>> ] bi 2array >>dim ] - [ drop ubyte-components >>component-type ] + [ png-component >>component-type ] } cleave ; : decode-greyscale ( loading-png -- image ) diff --git a/basis/io/streams/limited/limited-docs.factor b/basis/io/streams/limited/limited-docs.factor index 833e53820e..6c1806ff38 100755 --- a/basis/io/streams/limited/limited-docs.factor +++ b/basis/io/streams/limited/limited-docs.factor @@ -88,7 +88,7 @@ HELP: stream-throws { stream-eofs stream-throws } related-words ARTICLE: "io.streams.limited" "Limited input streams" -"The " { $vocab-link "io.streams.limited" } " vocabulary wraps a stream to behave as if it had only a limited number of bytes, either throwing an error or returning " { $link f } " upon reaching the end." $nl +"The " { $vocab-link "io.streams.limited" } " vocabulary wraps a stream to behave as if it had only a limited number of bytes, either throwing an error or returning " { $link f } " upon reaching the end. Limiting a non-seekable stream keeps a byte count and triggers the end-of-stream behavior when this byte count has been reached. However, limiting a seekable stream creates a window of bytes that supports seeking and re-reading of bytes in that window." $nl "Wrap a stream in a limited stream:" { $subsections limit } "Wrap the current " { $link input-stream } " in a limited stream:" diff --git a/basis/math/bitwise/bitwise-docs.factor b/basis/math/bitwise/bitwise-docs.factor index 2d487a621a..5dce9646f4 100755 --- a/basis/math/bitwise/bitwise-docs.factor +++ b/basis/math/bitwise/bitwise-docs.factor @@ -1,6 +1,6 @@ ! Copyright (C) 2008 Doug Coleman. ! See http://factorcode.org/license.txt for BSD license. -USING: assocs help.markup help.syntax math sequences ; +USING: assocs help.markup help.syntax math sequences kernel ; IN: math.bitwise HELP: bitfield @@ -67,17 +67,21 @@ HELP: bit-clear? HELP: bit-count { $values - { "x" integer } + { "obj" object } { "n" integer } } -{ $description "Returns the number of set bits as an integer." } +{ $description "Returns the number of set bits as an object. This word only works on non-negative integers or objects that can be represented as a byte-array." } { $examples { $example "USING: math.bitwise prettyprint ;" "HEX: f0 bit-count ." "4" } { $example "USING: math.bitwise prettyprint ;" - "-7 bit-count ." + "-1 32 bits bit-count ." + "32" + } + { $example "USING: math.bitwise prettyprint ;" + "B{ 1 0 1 } bit-count ." "2" } } ; @@ -206,6 +210,20 @@ HELP: mask? } } ; +HELP: even-parity? +{ $values + { "obj" object } + { "?" boolean } +} +{ $description "Returns true if the number of set bits in an object is even." } ; + +HELP: odd-parity? +{ $values + { "obj" object } + { "?" boolean } +} +{ $description "Returns true if the number of set bits in an object is odd." } ; + HELP: on-bits { $values { "n" integer } @@ -368,6 +386,8 @@ $nl { $subsections on-bits } "Counting the number of set bits:" { $subsections bit-count } +"Testing the parity of an object:" +{ $subsections even-parity? odd-parity? } "More efficient modding by powers of two:" { $subsections wrap } "Bit-rolling:" diff --git a/basis/math/bitwise/bitwise-tests.factor b/basis/math/bitwise/bitwise-tests.factor index d1e6c11b6c..d10e4ccc87 100644 --- a/basis/math/bitwise/bitwise-tests.factor +++ b/basis/math/bitwise/bitwise-tests.factor @@ -1,4 +1,7 @@ -USING: accessors math math.bitwise tools.test kernel words ; +USING: accessors math math.bitwise tools.test kernel words +specialized-arrays alien.c-types math.vectors.simd +sequences destructors libc ; +SPECIALIZED-ARRAY: int IN: math.bitwise.tests [ 0 ] [ 1 0 0 bitroll ] unit-test @@ -37,3 +40,23 @@ CONSTANT: b 2 [ 4 ] [ BIN: 1010101 bit-count ] unit-test [ 0 ] [ BIN: 0 bit-count ] unit-test [ 1 ] [ BIN: 1 bit-count ] unit-test + +SIMD: uint +SPECIALIZED-ARRAY: uint +SPECIALIZED-ARRAY: uint-4 + +[ 1 ] [ uint-4{ 1 0 0 0 } bit-count ] unit-test + +[ 1 ] [ + [ + 2 malloc-int-array &free 1 0 pick set-nth bit-count + ] with-destructors +] unit-test + +[ 1 ] [ B{ 1 0 0 } bit-count ] unit-test +[ 3 ] [ B{ 1 1 1 } bit-count ] unit-test + +[ t ] [ BIN: 0 even-parity? ] unit-test +[ f ] [ BIN: 1 even-parity? ] unit-test +[ f ] [ BIN: 0 odd-parity? ] unit-test +[ t ] [ BIN: 1 odd-parity? ] unit-test diff --git a/basis/math/bitwise/bitwise.factor b/basis/math/bitwise/bitwise.factor index bed065a800..204f295944 100755 --- a/basis/math/bitwise/bitwise.factor +++ b/basis/math/bitwise/bitwise.factor @@ -1,7 +1,9 @@ ! Copyright (C) 2007, 2008 Slava Pestov, Doug Coleman. ! See http://factorcode.org/license.txt for BSD license. USING: arrays assocs combinators combinators.smart fry kernel -macros math math.bits sequences sequences.private words ; +macros math math.bits sequences sequences.private words +byte-arrays alien alien.c-types specialized-arrays ; +SPECIALIZED-ARRAY: uchar IN: math.bitwise ! utilities @@ -84,24 +86,36 @@ DEFER: byte-bit-count GENERIC: (bit-count) ( x -- n ) M: fixnum (bit-count) - [ - { - [ byte-bit-count ] - [ -8 shift byte-bit-count ] - [ -16 shift byte-bit-count ] - [ -24 shift byte-bit-count ] - } cleave - ] sum-outputs ; + 0 swap [ + dup 0 > + ] [ + [ 8 bits byte-bit-count ] [ -8 shift ] bi + [ + ] dip + ] while drop ; M: bignum (bit-count) dup 0 = [ drop 0 ] [ [ byte-bit-count ] [ -8 shift (bit-count) ] bi + ] if ; +: byte-array-bit-count ( byte-array -- n ) + 0 [ byte-bit-count + ] reduce ; + PRIVATE> -: bit-count ( x -- n ) - dup 0 < [ bitnot ] when (bit-count) ; inline +ERROR: invalid-bit-count-target object ; + +GENERIC: bit-count ( obj -- n ) + +M: integer bit-count + dup 0 < [ invalid-bit-count-target ] when (bit-count) ; inline + +M: byte-array bit-count + byte-array-bit-count ; + +M: object bit-count + [ >c-ptr ] [ byte-length ] bi + byte-array-bit-count ; : >signed ( x n -- y ) 2dup neg 1 + shift 1 = [ 2^ - ] [ drop ] if ; @@ -113,3 +127,7 @@ PRIVATE> : next-even ( m -- n ) >even 2 + ; foldable : next-odd ( m -- n ) dup even? [ 1 + ] [ 2 + ] if ; foldable + +: even-parity? ( obj -- ? ) bit-count even? ; + +: odd-parity? ( obj -- ? ) bit-count odd? ; diff --git a/basis/math/vectors/conversion/authors.txt b/basis/math/vectors/conversion/authors.txt new file mode 100644 index 0000000000..f13c9c1e77 --- /dev/null +++ b/basis/math/vectors/conversion/authors.txt @@ -0,0 +1 @@ +Joe Groff diff --git a/basis/math/vectors/conversion/backend/backend.factor b/basis/math/vectors/conversion/backend/backend.factor new file mode 100644 index 0000000000..d47fab1b0e --- /dev/null +++ b/basis/math/vectors/conversion/backend/backend.factor @@ -0,0 +1,21 @@ +! (c)Joe Groff bsd license +USING: accessors alien.c-types arrays assocs classes combinators +cords fry kernel math math.vectors sequences ; +IN: math.vectors.conversion.backend + +: saturate-map-as ( v quot result -- w ) + [ element-type '[ @ _ c-type-clamp ] ] keep map-as ; inline + +: (v>float) ( i to-type -- f ) + [ >float ] swap new map-as ; +: (v>integer) ( f to-type -- i ) + [ >integer ] swap new map-as ; +: (vpack-signed) ( a b to-type -- ab ) + [ cord-append [ ] ] dip new saturate-map-as ; +: (vpack-unsigned) ( a b to-type -- ab ) + [ cord-append [ ] ] dip new saturate-map-as ; +: (vunpack-head) ( ab to-type -- a ) + [ dup length 2 /i head-slice ] dip new like ; +: (vunpack-tail) ( ab to-type -- b ) + [ dup length 2 /i tail-slice ] dip new like ; + diff --git a/basis/math/vectors/conversion/conversion-tests.factor b/basis/math/vectors/conversion/conversion-tests.factor new file mode 100644 index 0000000000..d6c16c8518 --- /dev/null +++ b/basis/math/vectors/conversion/conversion-tests.factor @@ -0,0 +1,149 @@ +! (c)Joe Groff bsd license +USING: accessors arrays compiler continuations generalizations +kernel kernel.private locals math.vectors.conversion math.vectors.simd +sequences stack-checker tools.test ; +FROM: alien.c-types => char uchar short ushort int uint longlong ulonglong float double ; +SIMD: uchar +SIMD: char +SIMD: ushort +SIMD: short +SIMD: uint +SIMD: int +SIMD: ulonglong +SIMD: longlong +SIMD: float +SIMD: double +IN: math.vectors.conversion.tests + +ERROR: optimized-vconvert-inconsistent + unoptimized-result + optimized-result ; + +MACRO:: test-vconvert ( from-type to-type -- ) + [ from-type to-type vconvert ] :> quot + quot infer :> effect + effect in>> length :> inputs + effect out>> length :> outputs + + inputs from-type :> declaration + + [ + inputs narray + [ quot with-datastack ] + [ [ [ declaration declare quot call ] compile-call ] with-datastack ] bi + 2dup = [ optimized-vconvert-inconsistent ] unless + drop outputs firstn + ] ; + +[ uint-4{ 5 1 2 6 } int-4 float-4 vconvert ] +[ bad-vconvert-input? ] must-fail-with + +[ int-4{ 1 2 3 4 } uint-4{ 5 1 2 6 } int-4 short-8 vconvert ] +[ bad-vconvert-input? ] must-fail-with + +[ uint-4{ 1 2 3 4 } int-4{ 5 1 2 6 } int-4 short-8 vconvert ] +[ bad-vconvert-input? ] must-fail-with + +[ uint-4{ 5 1 2 6 } int-4 longlong-2 vconvert ] +[ bad-vconvert-input? ] must-fail-with + +[ float-4{ -5.0 1.0 2.0 6.0 } ] +[ int-4{ -5 1 2 6 } int-4 float-4 test-vconvert ] unit-test + +[ int-4{ -5 1 2 6 } ] +[ float-4{ -5.0 1.0 2.0 6.0 } float-4 int-4 test-vconvert ] unit-test + +[ int-4{ -5 1 2 6 } ] +[ float-4{ -5.0 1.0 2.3 6.7 } float-4 int-4 test-vconvert ] unit-test + +[ double-2{ -5.0 1.0 } ] +[ longlong-2{ -5 1 } longlong-2 double-2 test-vconvert ] unit-test + +[ longlong-4{ -5 1 2 6 } ] +[ double-4{ -5.0 1.0 2.3 6.7 } double-4 longlong-4 test-vconvert ] unit-test + +! TODO we should be able to do double->int pack +! [ int-8{ -5 1 2 6 12 34 -56 78 } ] +[ double-4{ -5.0 1.0 2.0 6.0 } double-4{ 12.0 34.0 -56.0 78.0 } double-4 int-8 test-vconvert ] +[ error>> bad-vconvert? ] must-fail-with + +[ float-4{ -1.25 2.0 3.0 -4.0 } ] +[ double-2{ -1.25 2.0 } double-2{ 3.0 -4.0 } double-2 float-4 test-vconvert ] unit-test + +[ int-4{ -1 2 3 -4 } ] +[ longlong-2{ -1 2 } longlong-2{ 3 -4 } longlong-2 int-4 test-vconvert ] unit-test + +[ short-8{ -1 2 3 -32768 5 32767 -7 32767 } ] +[ int-4{ -1 2 3 -40000 } int-4{ 5 60000 -7 80000 } int-4 short-8 test-vconvert ] unit-test + +[ short-16{ -1 2 3 -32768 3 2 1 0 5 32767 -7 32767 7 6 5 4 } ] +[ + int-8{ -1 2 3 -40000 3 2 1 0 } + int-8{ 5 60000 -7 80000 7 6 5 4 } int-8 short-16 test-vconvert +] unit-test + +[ ushort-8{ 0 2 3 0 5 60000 0 65535 } ] +[ int-4{ -1 2 3 -40000 } int-4{ 5 60000 -7 80000 } int-4 ushort-8 test-vconvert ] unit-test + +[ ushort-8{ 65535 2 3 65535 5 60000 65535 65535 } ] +[ uint-4{ -1 2 3 -40000 } uint-4{ 5 60000 -7 80000 } uint-4 ushort-8 test-vconvert ] unit-test + +[ uint-4{ -1 2 3 -40000 } uint-4{ 5 60000 -7 80000 } uint-4 short-8 test-vconvert ] +[ error>> bad-vconvert? ] must-fail-with + +! TODO we should be able to do 256->128 pack +! [ float-4{ -1.25 2.0 3.0 -4.0 } ] +[ double-4{ -1.25 2.0 3.0 -4.0 } double-4 float-4 test-vconvert ] +[ error>> bad-vconvert? ] must-fail-with + +! [ int-4{ -1 2 3 -4 } ] +[ longlong-4{ -1 2 3 -4 } longlong-4 int-4 test-vconvert ] +[ error>> bad-vconvert? ] must-fail-with + +[ double-2{ -1.25 2.0 } double-2{ 3.0 -4.0 } ] +[ float-4{ -1.25 2.0 3.0 -4.0 } float-4 double-2 test-vconvert ] unit-test + +[ int-4{ -1 2 3 -4 } ] +[ int-4{ -1 2 3 -4 } int-4 int-4 test-vconvert ] unit-test + +[ longlong-2{ -1 2 } longlong-2{ 3 -4 } ] +[ int-4{ -1 2 3 -4 } int-4 longlong-2 test-vconvert ] unit-test + +[ int-4{ -1 2 3 -4 } int-4 ulonglong-2 test-vconvert ] +[ error>> bad-vconvert? ] must-fail-with + +[ ulonglong-2{ 1 2 } ulonglong-2{ 3 4 } ] +[ uint-4{ 1 2 3 4 } uint-4 ulonglong-2 test-vconvert ] unit-test + +[ longlong-4{ 1 2 3 4 } longlong-4{ 3 4 5 6 } ] +[ uint-8{ 1 2 3 4 3 4 5 6 } uint-8 longlong-4 test-vconvert ] unit-test + +[ int-4{ 1 2 -3 -4 } int-4{ 5 -6 7 -8 } ] +[ short-8{ 1 2 -3 -4 5 -6 7 -8 } short-8 int-4 test-vconvert ] unit-test + +[ uint-4{ 1 2 3 4 } uint-4{ 5 6 7 8 } ] +[ ushort-8{ 1 2 3 4 5 6 7 8 } ushort-8 uint-4 test-vconvert ] unit-test + +[ longlong-4{ 1 2 3 4 } longlong-4{ 3 4 5 6 } ] +[ uint-8{ 1 2 3 4 3 4 5 6 } uint-8 longlong-4 test-vconvert ] unit-test + +! TODO we should be able to do 128->256 unpack +! [ longlong-4{ 1 2 3 4 } ] +[ uint-4{ 1 2 3 4 } uint-4 longlong-4 test-vconvert ] +[ error>> bad-vconvert? ] must-fail-with + +! TODO we should be able to do multi-tier pack/unpack +! [ longlong-2{ 1 2 } longlong-2{ 3 4 } longlong-2{ 5 6 } longlong-2{ 7 8 } ] +[ ushort-8{ 1 2 3 4 5 6 7 8 } ushort-8 longlong-2 test-vconvert ] +[ error>> bad-vconvert? ] must-fail-with + +! [ ushort-8{ 1 2 3 4 5 6 7 8 } ] +[ + longlong-2{ 1 2 } + longlong-2{ 3 4 } + longlong-2{ 5 6 } + longlong-2{ 7 8 } + longlong-2 ushort-8 test-vconvert +] +[ error>> bad-vconvert? ] must-fail-with + diff --git a/basis/math/vectors/conversion/conversion.factor b/basis/math/vectors/conversion/conversion.factor new file mode 100644 index 0000000000..863cb9fea5 --- /dev/null +++ b/basis/math/vectors/conversion/conversion.factor @@ -0,0 +1,83 @@ +! (c)Joe Groff bsd license +USING: accessors alien.c-types arrays assocs classes combinators +combinators.short-circuit cords fry kernel locals math +math.vectors math.vectors.conversion.backend sequences ; +FROM: alien.c-types => char uchar short ushort int uint longlong ulonglong float double ; +IN: math.vectors.conversion + +ERROR: bad-vconvert from-type to-type ; +ERROR: bad-vconvert-input value expected-type ; + +> to-type boa ] ] + } + { + [ from-element float-type? ] + [ [ to-type (v>integer) ] ] + } + { + [ to-element float-type? ] + [ [ to-type (v>float) ] ] + } + } cond + [ from-type check-vconvert-type ] prepose ; + +:: [vpack] ( from-element to-element from-size to-size from-type to-type -- quot ) + from-size to-size /i log2 :> steps + + { + [ steps 1 = not ] + [ from-element to-element [ float-type? ] bi@ xor ] + [ from-element unsigned-type? to-element unsigned-type? not and ] + } 0|| [ from-type to-type bad-vconvert ] when + + to-element unsigned-type? [ to-type (vpack-unsigned) ] [ to-type (vpack-signed) ] ? + [ [ from-type check-vconvert-type ] bi@ ] prepose ; + +:: [vunpack] ( from-element to-element from-size to-size from-type to-type -- quot ) + to-size from-size /i log2 :> steps + + { + [ steps 1 = not ] + [ from-element to-element [ float-type? ] bi@ xor ] + [ from-element unsigned-type? not to-element unsigned-type? and ] + } 0|| [ from-type to-type bad-vconvert ] when + + [ + from-type check-vconvert-type + [ to-type (vunpack-head) ] [ to-type (vunpack-tail) ] bi + ] ; + +PRIVATE> + +MACRO:: vconvert ( from-type to-type -- ) + from-type new [ element-type ] [ byte-length ] bi :> from-length :> from-element + to-type new [ element-type ] [ byte-length ] bi :> to-length :> to-element + from-element heap-size :> from-size + to-element heap-size :> to-size + + from-length to-length = [ from-type to-type bad-vconvert ] unless + + from-element to-element from-size to-size from-type to-type { + { [ from-size to-size < ] [ [vunpack] ] } + { [ from-size to-size = ] [ [vconvert] ] } + { [ from-size to-size > ] [ [vpack] ] } + } cond ; + diff --git a/basis/math/vectors/conversion/summary.txt b/basis/math/vectors/conversion/summary.txt new file mode 100644 index 0000000000..15f4f0d396 --- /dev/null +++ b/basis/math/vectors/conversion/summary.txt @@ -0,0 +1 @@ +Conversion, packing, and unpacking of SIMD vectors diff --git a/basis/math/vectors/simd/functor/functor.factor b/basis/math/vectors/simd/functor/functor.factor index fb8326fde2..15e37bbcd9 100644 --- a/basis/math/vectors/simd/functor/functor.factor +++ b/basis/math/vectors/simd/functor/functor.factor @@ -3,6 +3,7 @@ USING: accessors assocs byte-arrays classes classes.algebra effects fry functors generalizations kernel literals locals math math.functions math.vectors math.vectors.private math.vectors.simd.intrinsics +math.vectors.conversion.backend math.vectors.specialization parser prettyprint.custom sequences sequences.private strings words definitions macros cpu.architecture namespaces arrays quotations combinators combinators.short-circuit sets @@ -174,6 +175,8 @@ A-vn->v-op DEFINES-PRIVATE ${A}-vn->v-op A-vv->n-op DEFINES-PRIVATE ${A}-vv->n-op A-v->v-op DEFINES-PRIVATE ${A}-v->v-op A-v->n-op DEFINES-PRIVATE ${A}-v->n-op +A-v-conversion-op DEFINES-PRIVATE ${A}-v-conversion-op +A-vv-conversion-op DEFINES-PRIVATE ${A}-vv-conversion-op A-element-class [ A-rep rep-component-type c:c-type-boxed-class ] @@ -251,10 +254,29 @@ INSTANCE: A sequence : A-v->n-op ( v quot -- n ) [ underlying>> A-rep ] dip call ; inline +: A-v-conversion-op ( v1 to-type quot -- v2 ) + swap [ underlying>> A-rep ] [ call ] [ '[ _ boa ] call( u -- v ) ] tri* ; inline + +: A-vv-conversion-op ( v1 v2 to-type quot -- v2 ) + swap { + [ underlying>> ] + [ underlying>> A-rep ] + [ call ] + [ '[ _ boa ] call( u -- v ) ] + } spread ; inline + simd new \ A >>class \ A-with >>ctor \ A-rep >>rep + { + { (v>float) A-v-conversion-op } + { (v>integer) A-v-conversion-op } + { (vpack-signed) A-vv-conversion-op } + { (vpack-unsigned) A-vv-conversion-op } + { (vunpack-head) A-v-conversion-op } + { (vunpack-tail) A-v-conversion-op } + } >>special-wrappers { { { +vector+ +vector+ -> +vector+ } A-vv->v-op } { { +vector+ +scalar+ -> +vector+ } A-vn->v-op } @@ -327,6 +349,10 @@ A-vany-op DEFINES-PRIVATE ${A}-vany-op A-vall-op DEFINES-PRIVATE ${A}-vall-op A-vmerge-head-op DEFINES-PRIVATE ${A}-vmerge-head-op A-vmerge-tail-op DEFINES-PRIVATE ${A}-vmerge-tail-op +A-v-conversion-op DEFINES-PRIVATE ${A}-v-conversion-op +A-vpack-op DEFINES-PRIVATE ${A}-vpack-op +A-vunpack-head-op DEFINES-PRIVATE ${A}-vunpack-head-op +A-vunpack-tail-op DEFINES-PRIVATE ${A}-vunpack-tail-op WHERE @@ -424,16 +450,41 @@ INSTANCE: A sequence : A-vmerge-head-op ( v1 v2 quot -- v ) drop [ underlying1>> ] bi@ - [ A-rep (simd-vmerge-head) ] - [ A-rep (simd-vmerge-tail) ] 2bi - \ A boa ; + [ A-rep (simd-(vmerge-head)) ] + [ A-rep (simd-(vmerge-tail)) ] 2bi + \ A boa ; inline : A-vmerge-tail-op ( v1 v2 quot -- v ) drop [ underlying2>> ] bi@ - [ A-rep (simd-vmerge-head) ] - [ A-rep (simd-vmerge-tail) ] 2bi - \ A boa ; + [ A-rep (simd-(vmerge-head)) ] + [ A-rep (simd-(vmerge-tail)) ] 2bi + \ A boa ; inline + +: A-v-conversion-op ( v1 to-type quot -- v ) + swap [ + [ [ underlying1>> A-rep ] dip call ] + [ [ underlying2>> A-rep ] dip call ] 2bi + ] dip '[ _ boa ] call( u1 u2 -- v ) ; inline + +: A-vpack-op ( v1 v2 to-type quot -- v ) + swap [ + '[ [ underlying1>> ] [ underlying2>> ] bi A-rep @ ] bi* + ] dip '[ _ boa ] call( u1 u2 -- v ) ; inline + +: A-vunpack-head-op ( v1 to-type quot -- v ) + '[ + underlying1>> + [ A-rep @ ] + [ A-rep (simd-(vunpack-tail)) ] bi + ] dip '[ _ boa ] call( u1 u2 -- v ) ; inline + +: A-vunpack-tail-op ( v1 to-type quot -- v ) + '[ + underlying2>> + [ A-rep (simd-(vunpack-head)) ] + [ A-rep @ ] bi + ] dip '[ _ boa ] call( u1 u2 -- v ) ; inline simd new \ A >>class @@ -445,8 +496,14 @@ simd new { vnone? A-vany-op } { vany? A-vany-op } { vall? A-vall-op } - { vmerge-head A-vmerge-head-op } - { vmerge-tail A-vmerge-tail-op } + { (vmerge-head) A-vmerge-head-op } + { (vmerge-tail) A-vmerge-tail-op } + { (v>integer) A-v-conversion-op } + { (v>float) A-v-conversion-op } + { (vpack-signed) A-vpack-op } + { (vpack-unsigned) A-vpack-op } + { (vunpack-head) A-vunpack-head-op } + { (vunpack-tail) A-vunpack-tail-op } } >>special-wrappers { { { +vector+ +vector+ -> +vector+ } A-vv->v-op } diff --git a/basis/math/vectors/simd/intrinsics/intrinsics.factor b/basis/math/vectors/simd/intrinsics/intrinsics.factor index dd87d4aaa9..0efb0c2417 100644 --- a/basis/math/vectors/simd/intrinsics/intrinsics.factor +++ b/basis/math/vectors/simd/intrinsics/intrinsics.factor @@ -2,7 +2,9 @@ ! See http://factorcode.org/license.txt for BSD license. USING: alien alien.c-types alien.data assocs combinators cpu.architecture compiler.cfg.comparisons fry generalizations -kernel libc macros math sequences effects accessors namespaces +kernel libc macros math +math.vectors.conversion.backend +sequences sets effects accessors namespaces lexer parser vocabs.parser words arrays math.vectors ; IN: math.vectors.simd.intrinsics @@ -12,17 +14,27 @@ ERROR: bad-simd-call ; : simd-effect ( word -- effect ) stack-effect [ in>> "rep" suffix ] [ out>> ] bi ; +: simd-conversion-effect ( word -- effect ) + stack-effect [ in>> but-last "rep" suffix ] [ out>> ] bi ; SYMBOL: simd-ops V{ } clone simd-ops set-global -SYNTAX: SIMD-OP: - scan-word dup name>> "(simd-" ")" surround create-in - [ nip [ bad-simd-call ] define ] - [ [ simd-effect ] dip set-stack-effect ] +: (SIMD-OP:) ( accum quot -- accum ) + [ + scan-word dup name>> "(simd-" ")" surround create-in + [ nip [ bad-simd-call ] define ] + ] dip + '[ _ dip set-stack-effect ] [ 2array simd-ops get push ] - 2tri ; + 2tri ; inline + +SYNTAX: SIMD-OP: + [ simd-effect ] (SIMD-OP:) ; + +SYNTAX: SIMD-CONVERSION-OP: + [ simd-conversion-effect ] (SIMD-OP:) ; >> @@ -55,8 +67,8 @@ SIMD-OP: vrshift SIMD-OP: hlshift SIMD-OP: hrshift SIMD-OP: vshuffle -SIMD-OP: vmerge-head -SIMD-OP: vmerge-tail +SIMD-OP: (vmerge-head) +SIMD-OP: (vmerge-tail) SIMD-OP: v<= SIMD-OP: v< SIMD-OP: v= @@ -67,6 +79,13 @@ SIMD-OP: vany? SIMD-OP: vall? SIMD-OP: vnone? +SIMD-CONVERSION-OP: (v>float) +SIMD-CONVERSION-OP: (v>integer) +SIMD-CONVERSION-OP: (vpack-signed) +SIMD-CONVERSION-OP: (vpack-unsigned) +SIMD-CONVERSION-OP: (vunpack-head) +SIMD-CONVERSION-OP: (vunpack-tail) + : (simd-with) ( x rep -- v ) bad-simd-call ; : (simd-gather-2) ( a b rep -- v ) bad-simd-call ; : (simd-gather-4) ( a b c d rep -- v ) bad-simd-call ; @@ -118,48 +137,58 @@ MACRO: (simd-boa) ( rep -- quot ) GENERIC# supported-simd-op? 1 ( rep intrinsic -- ? ) +: (%unpack-reps) ( -- reps ) + %merge-vector-reps [ int-vector-rep? ] filter + %unpack-vector-head-reps union ; + M: vector-rep supported-simd-op? { - { \ (simd-v+) [ %add-vector-reps ] } - { \ (simd-vs+) [ %saturated-add-vector-reps ] } - { \ (simd-v+-) [ %add-sub-vector-reps ] } - { \ (simd-v-) [ %sub-vector-reps ] } - { \ (simd-vs-) [ %saturated-sub-vector-reps ] } - { \ (simd-v*) [ %mul-vector-reps ] } - { \ (simd-vs*) [ %saturated-mul-vector-reps ] } - { \ (simd-v/) [ %div-vector-reps ] } - { \ (simd-vmin) [ %min-vector-reps ] } - { \ (simd-vmax) [ %max-vector-reps ] } - { \ (simd-v.) [ %dot-vector-reps ] } - { \ (simd-vsqrt) [ %sqrt-vector-reps ] } - { \ (simd-sum) [ %horizontal-add-vector-reps ] } - { \ (simd-vabs) [ %abs-vector-reps ] } - { \ (simd-vbitand) [ %and-vector-reps ] } - { \ (simd-vbitandn) [ %andn-vector-reps ] } - { \ (simd-vbitor) [ %or-vector-reps ] } - { \ (simd-vbitxor) [ %xor-vector-reps ] } - { \ (simd-vbitnot) [ %not-vector-reps ] } - { \ (simd-vand) [ %and-vector-reps ] } - { \ (simd-vandn) [ %andn-vector-reps ] } - { \ (simd-vor) [ %or-vector-reps ] } - { \ (simd-vxor) [ %xor-vector-reps ] } - { \ (simd-vnot) [ %not-vector-reps ] } - { \ (simd-vlshift) [ %shl-vector-reps ] } - { \ (simd-vrshift) [ %shr-vector-reps ] } - { \ (simd-hlshift) [ %horizontal-shl-vector-reps ] } - { \ (simd-hrshift) [ %horizontal-shr-vector-reps ] } - { \ (simd-vshuffle) [ %shuffle-vector-reps ] } - { \ (simd-vmerge-head) [ %merge-vector-reps ] } - { \ (simd-vmerge-tail) [ %merge-vector-reps ] } - { \ (simd-v<=) [ cc<= %compare-vector-reps ] } - { \ (simd-v<) [ cc< %compare-vector-reps ] } - { \ (simd-v=) [ cc= %compare-vector-reps ] } - { \ (simd-v>) [ cc> %compare-vector-reps ] } - { \ (simd-v>=) [ cc>= %compare-vector-reps ] } - { \ (simd-vunordered?) [ cc/<>= %compare-vector-reps ] } - { \ (simd-gather-2) [ %gather-vector-2-reps ] } - { \ (simd-gather-4) [ %gather-vector-4-reps ] } - { \ (simd-vany?) [ %test-vector-reps ] } - { \ (simd-vall?) [ %test-vector-reps ] } - { \ (simd-vnone?) [ %test-vector-reps ] } + { \ (simd-v+) [ %add-vector-reps ] } + { \ (simd-vs+) [ %saturated-add-vector-reps ] } + { \ (simd-v+-) [ %add-sub-vector-reps ] } + { \ (simd-v-) [ %sub-vector-reps ] } + { \ (simd-vs-) [ %saturated-sub-vector-reps ] } + { \ (simd-v*) [ %mul-vector-reps ] } + { \ (simd-vs*) [ %saturated-mul-vector-reps ] } + { \ (simd-v/) [ %div-vector-reps ] } + { \ (simd-vmin) [ %min-vector-reps ] } + { \ (simd-vmax) [ %max-vector-reps ] } + { \ (simd-v.) [ %dot-vector-reps ] } + { \ (simd-vsqrt) [ %sqrt-vector-reps ] } + { \ (simd-sum) [ %horizontal-add-vector-reps ] } + { \ (simd-vabs) [ %abs-vector-reps ] } + { \ (simd-vbitand) [ %and-vector-reps ] } + { \ (simd-vbitandn) [ %andn-vector-reps ] } + { \ (simd-vbitor) [ %or-vector-reps ] } + { \ (simd-vbitxor) [ %xor-vector-reps ] } + { \ (simd-vbitnot) [ %xor-vector-reps ] } + { \ (simd-vand) [ %and-vector-reps ] } + { \ (simd-vandn) [ %andn-vector-reps ] } + { \ (simd-vor) [ %or-vector-reps ] } + { \ (simd-vxor) [ %xor-vector-reps ] } + { \ (simd-vnot) [ %xor-vector-reps ] } + { \ (simd-vlshift) [ %shl-vector-reps ] } + { \ (simd-vrshift) [ %shr-vector-reps ] } + { \ (simd-hlshift) [ %horizontal-shl-vector-reps ] } + { \ (simd-hrshift) [ %horizontal-shr-vector-reps ] } + { \ (simd-vshuffle) [ %shuffle-vector-reps ] } + { \ (simd-(vmerge-head)) [ %merge-vector-reps ] } + { \ (simd-(vmerge-tail)) [ %merge-vector-reps ] } + { \ (simd-(v>float)) [ %integer>float-vector-reps ] } + { \ (simd-(v>integer)) [ %float>integer-vector-reps ] } + { \ (simd-(vpack-signed)) [ %signed-pack-vector-reps ] } + { \ (simd-(vpack-unsigned)) [ %unsigned-pack-vector-reps ] } + { \ (simd-(vunpack-head)) [ (%unpack-reps) ] } + { \ (simd-(vunpack-tail)) [ (%unpack-reps) ] } + { \ (simd-v<=) [ cc<= %compare-vector-reps ] } + { \ (simd-v<) [ cc< %compare-vector-reps ] } + { \ (simd-v=) [ cc= %compare-vector-reps ] } + { \ (simd-v>) [ cc> %compare-vector-reps ] } + { \ (simd-v>=) [ cc>= %compare-vector-reps ] } + { \ (simd-vunordered?) [ cc/<>= %compare-vector-reps ] } + { \ (simd-gather-2) [ %gather-vector-2-reps ] } + { \ (simd-gather-4) [ %gather-vector-4-reps ] } + { \ (simd-vany?) [ %test-vector-reps ] } + { \ (simd-vall?) [ %test-vector-reps ] } + { \ (simd-vnone?) [ %test-vector-reps ] } } case member? ; diff --git a/basis/math/vectors/simd/simd-tests.factor b/basis/math/vectors/simd/simd-tests.factor index 88e5d5f1ea..78c9389591 100644 --- a/basis/math/vectors/simd/simd-tests.factor +++ b/basis/math/vectors/simd/simd-tests.factor @@ -4,6 +4,7 @@ math.private math.vectors math.vectors.simd math.vectors.simd.private prettyprint random sequences system tools.test vocabs assocs compiler.cfg.debugger words locals math.vectors.specialization combinators cpu.architecture +math.vectors.conversion.backend math.vectors.simd.intrinsics namespaces byte-arrays alien specialized-arrays classes.struct eval classes.algebra sets quotations math.constants compiler.units ; @@ -128,6 +129,8 @@ CONSTANT: simd-classes [ HEX: ffffffff ] [ HEX: ffffffff [ uint-4-with ] compile-call first ] unit-test +[ HEX: ffffffff ] [ [ HEX: ffffffff uint-4-with ] compile-call first ] unit-test + "== Checking -boa constructors" print [ { } ] [ @@ -181,6 +184,9 @@ CONSTANT: simd-classes { hlshift hrshift vshuffle vbroadcast vany? vall? vnone? + (v>float) (v>integer) + (vpack-signed) (vpack-unsigned) + (vunpack-head) (vunpack-tail) } unique assoc-diff ; : ops-to-check ( elt-class -- alist ) diff --git a/basis/math/vectors/specialization/specialization.factor b/basis/math/vectors/specialization/specialization.factor index 8d9d1b49cb..be959a2a2e 100644 --- a/basis/math/vectors/specialization/specialization.factor +++ b/basis/math/vectors/specialization/specialization.factor @@ -2,6 +2,7 @@ ! See http://factorcode.org/license.txt for BSD license. USING: words kernel make sequences effects sets kernel.private accessors combinators math math.intervals math.vectors +math.vectors.conversion.backend namespaces assocs fry splitting classes.algebra generalizations locals compiler.tree.propagation.info ; IN: math.vectors.specialization @@ -98,8 +99,14 @@ H{ { hrshift { +vector+ +literal+ -> +vector+ } } { vshuffle { +vector+ +literal+ -> +vector+ } } { vbroadcast { +vector+ +literal+ -> +vector+ } } - { vmerge-head { +vector+ +vector+ -> +vector+ } } - { vmerge-tail { +vector+ +vector+ -> +vector+ } } + { (vmerge-head) { +vector+ +vector+ -> +vector+ } } + { (vmerge-tail) { +vector+ +vector+ -> +vector+ } } + { (v>float) { +vector+ +literal+ -> +vector+ } } + { (v>integer) { +vector+ +literal+ -> +vector+ } } + { (vpack-signed) { +vector+ +vector+ +literal+ -> +vector+ } } + { (vpack-unsigned) { +vector+ +vector+ +literal+ -> +vector+ } } + { (vunpack-head) { +vector+ +literal+ -> +vector+ } } + { (vunpack-tail) { +vector+ +literal+ -> +vector+ } } { v<= { +vector+ +vector+ -> +vector+ } } { v< { +vector+ +vector+ -> +vector+ } } { v= { +vector+ +vector+ -> +vector+ } } @@ -152,8 +159,13 @@ ERROR: bad-vector-word word ; { [ dup complex class<= ] [ vector-words keys { vsqrt } diff ] } [ { } ] } cond - ! Don't specialize horizontal shifts or shuffles at all, they're only for SIMD - { hlshift hrshift vshuffle vbroadcast } diff + ! Don't specialize horizontal shifts, shuffles, and conversions at all, they're only for SIMD + { + hlshift hrshift vshuffle vbroadcast + (v>integer) (v>float) + (vpack-signed) (vpack-unsigned) + (vunpack-head) (vunpack-tail) + } diff nip ; :: specialize-vector-words ( array-type elt-type simd -- ) diff --git a/basis/math/vectors/vectors-docs.factor b/basis/math/vectors/vectors-docs.factor index 82bb037186..7a9aff49b6 100644 --- a/basis/math/vectors/vectors-docs.factor +++ b/basis/math/vectors/vectors-docs.factor @@ -59,7 +59,8 @@ $nl { $subsection vbroadcast } { $subsection hlshift } { $subsection hrshift } -{ $subsection vmerge } ; +{ $subsection vmerge } +{ $subsection (vmerge) } ; ARTICLE: "math-vectors-logic" "Vector component- and bit-wise logic" { $notes @@ -357,37 +358,50 @@ HELP: hrshift { $description "Shifts the entire SIMD array to the right by " { $snippet "n" } " bytes, filling the vacated left-hand bits with zeroes. This word may only be used in a context where the compiler can statically infer that the input is a SIMD array." } ; HELP: vmerge -{ $values { "u" "a sequence" } { "v" "a sequence" } { "h" "a sequence" } { "t" "a sequence" } } -{ $description "Creates two new sequences of the same type and size as " { $snippet "u" } " and " { $snippet "v" } " by interleaving the elements of " { $snippet "u" } " and " { $snippet "v" } "." } +{ $values { "u" "a sequence" } { "v" "a sequence" } { "w" "a sequence" } } +{ $description "Creates a new sequence of the same type as and twice the length of " { $snippet "u" } " and " { $snippet "v" } " by interleaving the elements of " { $snippet "u" } " and " { $snippet "v" } "." } { $examples { $example """USING: kernel math.vectors prettyprint ; -{ "A" "B" "C" "D" } { "1" "2" "3" "4" } vmerge [ . ] bi@""" +{ "A" "B" "C" "D" } { "1" "2" "3" "4" } vmerge .""" +"""{ "A" "1" "B" "2" "C" "3" "D" "4" }""" +} } ; + +HELP: (vmerge) +{ $values { "u" "a sequence" } { "v" "a sequence" } { "h" "a sequence" } { "t" "a sequence" } } +{ $description "Creates two new sequences of the same type and size as " { $snippet "u" } " and " { $snippet "v" } " by interleaving the elements of " { $snippet "u" } " and " { $snippet "v" } "." } +{ $notes "For hardware-supported SIMD vector types this word compiles to a single instruction per output value." } +{ $examples +{ $example """USING: kernel math.vectors prettyprint ; + +{ "A" "B" "C" "D" } { "1" "2" "3" "4" } (vmerge) [ . ] bi@""" """{ "A" "1" "B" "2" } { "C" "3" "D" "4" }""" } } ; -HELP: vmerge-head +HELP: (vmerge-head) { $values { "u" "a sequence" } { "v" "a sequence" } { "h" "a sequence" } } -{ $description "Creates two new sequences of the same type and size as " { $snippet "u" } " and " { $snippet "v" } " by interleaving the elements from the first half of " { $snippet "u" } " and " { $snippet "v" } "." } +{ $description "Creates a new sequence of the same type and size as " { $snippet "u" } " and " { $snippet "v" } " by interleaving the elements from the first half of " { $snippet "u" } " and " { $snippet "v" } "." } +{ $notes "For hardware-supported SIMD vector types this word compiles to a single instruction." } { $examples { $example """USING: kernel math.vectors prettyprint ; -{ "A" "B" "C" "D" } { "1" "2" "3" "4" } vmerge-head .""" +{ "A" "B" "C" "D" } { "1" "2" "3" "4" } (vmerge-head) .""" """{ "A" "1" "B" "2" }""" } } ; -HELP: vmerge-tail +HELP: (vmerge-tail) { $values { "u" "a sequence" } { "v" "a sequence" } { "t" "a sequence" } } -{ $description "Creates two new sequences of the same type and size as " { $snippet "u" } " and " { $snippet "v" } " by interleaving the elements from the tail half of " { $snippet "u" } " and " { $snippet "v" } "." } +{ $description "Creates a new sequence of the same type and size as " { $snippet "u" } " and " { $snippet "v" } " by interleaving the elements from the tail half of " { $snippet "u" } " and " { $snippet "v" } "." } +{ $notes "For hardware-supported SIMD vector types this word compiles to a single instruction." } { $examples { $example """USING: kernel math.vectors prettyprint ; -{ "A" "B" "C" "D" } { "1" "2" "3" "4" } vmerge-tail .""" +{ "A" "B" "C" "D" } { "1" "2" "3" "4" } (vmerge-tail) .""" """{ "C" "3" "D" "4" }""" } } ; -{ vmerge vmerge-head vmerge-tail } related-words +{ vmerge (vmerge) (vmerge-head) (vmerge-tail) } related-words HELP: vbroadcast { $values { "u" "a SIMD array" } { "n" "a non-negative integer" } { "v" "a SIMD array" } } diff --git a/basis/math/vectors/vectors.factor b/basis/math/vectors/vectors.factor index 3f4fe4c7b6..c65009950d 100644 --- a/basis/math/vectors/vectors.factor +++ b/basis/math/vectors/vectors.factor @@ -91,10 +91,15 @@ PRIVATE> : hlshift ( u n -- w ) '[ _ prepend 16 head ] change-underlying ; : hrshift ( u n -- w ) '[ _ append 16 tail* ] change-underlying ; -: vmerge-head ( u v -- h ) over length 2 / '[ _ head-slice ] bi@ [ zip ] keep concat-as ; -: vmerge-tail ( u v -- t ) over length 2 / '[ _ tail-slice ] bi@ [ zip ] keep concat-as ; +: (vmerge-head) ( u v -- h ) + over length 2 /i '[ _ head-slice ] bi@ [ zip ] keep concat-as ; +: (vmerge-tail) ( u v -- t ) + over length 2 /i '[ _ tail-slice ] bi@ [ zip ] keep concat-as ; -: vmerge ( u v -- h t ) [ vmerge-head ] [ vmerge-tail ] 2bi ; inline +: (vmerge) ( u v -- h t ) + [ (vmerge-head) ] [ (vmerge-tail) ] 2bi ; inline + +: vmerge ( u v -- w ) [ zip ] keep concat-as ; : vand ( u v -- w ) over '[ [ _ element>bool ] bi@ and ] 2map ; : vandn ( u v -- w ) over '[ [ _ element>bool ] bi@ [ not ] dip and ] 2map ; diff --git a/basis/random/mersenne-twister/mersenne-twister.factor b/basis/random/mersenne-twister/mersenne-twister.factor index 51112ae980..a0e40e5c38 100644 --- a/basis/random/mersenne-twister/mersenne-twister.factor +++ b/basis/random/mersenne-twister/mersenne-twister.factor @@ -80,3 +80,4 @@ M: mersenne-twister random-32* ( mt -- r ) [ default-mersenne-twister random-generator set-global ] "bootstrap.random" add-init-hook + diff --git a/basis/random/sfmt/sfmt-tests.factor b/basis/random/sfmt/sfmt-tests.factor index 9f3fea0480..f7b75c3f13 100644 --- a/basis/random/sfmt/sfmt-tests.factor +++ b/basis/random/sfmt/sfmt-tests.factor @@ -4,14 +4,27 @@ USING: accessors kernel random random.sfmt random.sfmt.private sequences tools.test ; IN: random.sfmt.tests -[ ] [ 100 drop ] unit-test +! Period certified by virtue of seed +[ ] [ 5 drop ] unit-test -[ 1096298955 ] -[ 100 dup generate dup generate uint-array>> first ] unit-test +[ 1331696015 ] +[ 5 dup generate dup generate uint-array>> first ] unit-test -[ 2556114782 ] -[ 100 random-32* ] unit-test +[ 1432875926 ] +[ 5 random-32* ] unit-test + +! Period certified by flipping a bit +[ ] [ 7 drop ] unit-test + +[ 1674111379 ] +[ 7 dup generate dup generate uint-array>> first ] unit-test + +[ 489955657 ] +[ 7 random-32* ] unit-test + + +! Test re-seeding SFMT [ t ] [ 100 diff --git a/basis/random/sfmt/sfmt.factor b/basis/random/sfmt/sfmt.factor index 6b0fc66be2..28883838ce 100644 --- a/basis/random/sfmt/sfmt.factor +++ b/basis/random/sfmt/sfmt.factor @@ -2,7 +2,8 @@ ! See http://factorcode.org/license.txt for BSD license. USING: accessors alien.c-types kernel locals math math.ranges math.bitwise math.vectors math.vectors.simd random -sequences specialized-arrays sequences.private classes.struct ; +sequences specialized-arrays sequences.private classes.struct +combinators.short-circuit fry ; SIMD: uint SPECIALIZED-ARRAY: uint SPECIALIZED-ARRAY: uint-4 @@ -16,8 +17,9 @@ STRUCT: sfmt-state { seed uint } { n uint } { m uint } - { ix uint } + { index uint } { mask uint-4 } + { parity uint-4 } { r1 uint-4 } { r2 uint-4 } ; @@ -50,12 +52,12 @@ M:: sfmt generate ( sfmt -- ) sfmt uint-4-array>> :> array state n>> 2 - array nth state (>>r1) state n>> 1 - array nth state (>>r2) - state m>> :> m - state n>> :> n + state m>> :> m + state n>> :> n state mask>> :> mask n m - >fixnum iota [| i | - i array nth-unsafe + i array nth-unsafe i m + array nth-unsafe mask state r1>> state r2>> formula :> r @@ -75,48 +77,66 @@ M:: sfmt generate ( sfmt -- ) state r2>> state (>>r1) r state (>>r2) ] each - - 0 state (>>ix) ; + + 0 state (>>index) ; + +: period-certified? ( sfmt -- ? ) + [ uint-4-array>> first ] + [ state>> parity>> ] bi vbitand odd-parity? ; + +: first-set-bit ( x -- n ) + 0 swap [ + dup { [ 0 > ] [ 1 bitand 0 = ] } 1&& + ] [ + [ 1 + ] [ -1 shift ] bi* + ] while drop ; + +: correct-period ( sfmt -- ) + [ drop 0 ] + [ state>> parity>> first first-set-bit ] + [ uint-array>> swap '[ _ toggle-bit ] change-nth ] tri ; + +: certify-period ( sfmt -- sfmt ) + dup period-certified? [ dup correct-period ] unless ; : ( sfmt -- uint-array uint-4-array ) - state>> - [ n>> 4 * iota >uint-array ] [ seed>> ] bi + state>> + [ n>> 4 * 1 swap [a,b] >uint-array ] [ seed>> ] bi [ [ - [ - [ -30 shift ] [ ] bi bitxor - state-multiplier * 32 bits - ] dip + - ] unless-zero 32 bits + [ -30 shift ] [ ] bi bitxor + state-multiplier * 32 bits + ] dip + 32 bits ] uint-array{ } accumulate-as nip dup underlying>> byte-array>uint-4-array ; -: ( seed n m mask -- sfmt ) +: ( seed n m mask parity -- sfmt ) sfmt-state + swap >>parity swap >>mask swap >>m swap >>n swap >>seed - 0 >>ix ; + 0 >>index ; : init-sfmt ( sfmt -- sfmt' ) dup [ >>uint-array ] [ >>uint-4-array ] bi* - [ generate ] keep ; inline + certify-period [ generate ] keep ; inline -: ( seed n m mask -- sfmt ) +: ( seed n m mask parity -- sfmt ) sfmt new swap >>state init-sfmt ; inline : refill-sfmt? ( sfmt -- ? ) - state>> [ ix>> ] [ n>> 4 * ] bi >= ; + state>> [ index>> ] [ n>> 4 * ] bi >= ; inline -: next-ix ( sfmt -- ix ) - state>> [ dup 1 + ] change-ix drop ; inline +: next-index ( sfmt -- index ) + state>> [ dup 1 + ] change-index drop ; inline : next ( sfmt -- n ) - [ next-ix ] [ uint-array>> ] bi nth-unsafe ; inline + [ next-index ] [ uint-array>> ] bi nth-unsafe ; inline PRIVATE> @@ -128,5 +148,10 @@ M: sfmt seed-random ( sfmt seed -- sfmt ) [ drop init-sfmt ] 2bi ; : ( seed -- sfmt ) - 348 330 uint-4{ HEX: BFFFFFF6 HEX: BFFAFFFF HEX: DDFECB7F HEX: DFFFFFEF } + 156 122 + uint-4{ HEX: dfffffef HEX: ddfecb7f HEX: bffaffff HEX: bffffff6 } + uint-4{ HEX: 1 HEX: 0 HEX: 0 HEX: 13c9e684 } ; inline + +: default-sfmt ( -- sfmt ) + [ random-32 ] with-secure-random ; diff --git a/basis/ui/gadgets/buttons/buttons.factor b/basis/ui/gadgets/buttons/buttons.factor index d5e8dff3b1..061fd8d364 100644 --- a/basis/ui/gadgets/buttons/buttons.factor +++ b/basis/ui/gadgets/buttons/buttons.factor @@ -69,8 +69,8 @@ C: button-pen : button-pen ( button pen -- button pen ) over find-button { - { [ dup { [ pressed?>> ] [ selected?>> ] } 1&& ] - [ drop pressed-selected>> + { [ dup { [ pressed?>> ] [ selected?>> ] } 1&& ] [ + drop pressed-selected>> ] } { [ dup pressed?>> ] [ drop pressed>> ] } { [ dup selected?>> ] [ drop selected>> ] } diff --git a/basis/ui/gadgets/editors/editors.factor b/basis/ui/gadgets/editors/editors.factor index 80e15d8a98..f83c5d710a 100755 --- a/basis/ui/gadgets/editors/editors.factor +++ b/basis/ui/gadgets/editors/editors.factor @@ -112,7 +112,7 @@ M: editor ungraft* } cond ; : clicked-loc ( editor -- loc ) - [ hand-rel ] [ point>loc ] bi ; + [ hand-rel ] keep point>loc ; : click-loc ( editor model -- ) [ clicked-loc ] dip set-model ; @@ -130,7 +130,7 @@ M: editor ungraft* [ loc>x ] [ [ first ] dip line>y ceiling ] 2bi 2array ; : caret-loc ( editor -- loc ) - [ editor-caret ] [ loc>point ] bi ; + [ editor-caret ] keep loc>point ; : caret-dim ( editor -- dim ) [ 0 ] dip line-height 2array ; @@ -139,7 +139,7 @@ M: editor ungraft* dup graft-state>> second [ [ [ caret-loc ] [ caret-dim { 2 1 } v+ ] bi - ] [ scroll>rect ] bi + ] keep scroll>rect ] [ drop ] if ; : draw-caret? ( editor -- ? ) @@ -212,7 +212,7 @@ M: editor cap-height font>> font-metrics cap-height>> ; [ nip relayout ] 2tri ; : caret/mark-changed ( editor -- ) - [ restart-blinking ] [ scroll>caret ] bi ; + [ restart-blinking ] keep scroll>caret ; M: editor model-changed { diff --git a/basis/ui/gadgets/gadgets.factor b/basis/ui/gadgets/gadgets.factor index a6d9028a46..e4a0e672d2 100644 --- a/basis/ui/gadgets/gadgets.factor +++ b/basis/ui/gadgets/gadgets.factor @@ -115,7 +115,7 @@ M: gadget gadget-text-separator gadget-text-separator '[ _ % ] [ gadget-text* ] interleave ; M: gadget gadget-text* - [ children>> ] [ gadget-seq-text ] bi ; + [ children>> ] keep gadget-seq-text ; M: array gadget-text* [ gadget-text* ] each ; @@ -183,7 +183,7 @@ GENERIC: pref-dim* ( gadget -- dim ) : pref-dim ( gadget -- dim ) dup pref-dim>> [ ] [ - [ pref-dim* ] [ dup layout-state>> ] bi + [ pref-dim* ] [ ] [ layout-state>> ] tri [ drop ] [ dupd (>>pref-dim) ] if ] ?if ; @@ -388,7 +388,7 @@ M: gadget request-focus-on parent>> request-focus-on ; M: f request-focus-on 2drop ; : request-focus ( gadget -- ) - [ focusable-child ] [ request-focus-on ] bi ; + [ focusable-child ] keep request-focus-on ; : focus-path ( gadget -- seq ) [ focus>> ] follow ; diff --git a/core/layouts/layouts.factor b/core/layouts/layouts.factor index 4aa806c81f..be6276a684 100644 --- a/core/layouts/layouts.factor +++ b/core/layouts/layouts.factor @@ -27,6 +27,9 @@ SYMBOL: mega-cache-size : tag-fixnum ( n -- tagged ) tag-bits get shift ; +: untag-fixnum ( n -- tagged ) + tag-bits get neg shift ; + ! We do this in its own compilation unit so that they can be ! folded below << diff --git a/extra/benchmark/mt/authors.txt b/extra/benchmark/mt/authors.txt new file mode 100644 index 0000000000..b4bd0e7b35 --- /dev/null +++ b/extra/benchmark/mt/authors.txt @@ -0,0 +1 @@ +Doug Coleman \ No newline at end of file diff --git a/extra/benchmark/mt/mt.factor b/extra/benchmark/mt/mt.factor new file mode 100644 index 0000000000..b2f907ba68 --- /dev/null +++ b/extra/benchmark/mt/mt.factor @@ -0,0 +1,11 @@ +! Copyright (C) 2009 Doug Coleman. +! See http://factorcode.org/license.txt for BSD license. +USING: fry kernel math random random.mersenne-twister ; +IN: benchmark.mt + +: mt-benchmark ( n -- ) + >fixnum HEX: 533d '[ _ random-32* drop ] times ; + +: mt-main ( -- ) 10000000 mt-benchmark ; + +MAIN: mt-main diff --git a/extra/benchmark/sfmt/sfmt.factor b/extra/benchmark/sfmt/sfmt.factor index 9b4c6e43c8..e9c69303bd 100644 --- a/extra/benchmark/sfmt/sfmt.factor +++ b/extra/benchmark/sfmt/sfmt.factor @@ -6,6 +6,6 @@ IN: benchmark.sfmt : sfmt-benchmark ( n -- ) >fixnum HEX: 533d '[ _ random-32* drop ] times ; -: sfmt-main ( -- ) 100000000 sfmt-benchmark ; +: sfmt-main ( -- ) 10000000 sfmt-benchmark ; MAIN: sfmt-main diff --git a/extra/crypto/xor/xor.factor b/extra/crypto/xor/xor.factor index 662881f8cc..45bbe55d6e 100644 --- a/extra/crypto/xor/xor.factor +++ b/extra/crypto/xor/xor.factor @@ -9,4 +9,4 @@ ERROR: empty-xor-key ; : xor-crypt ( seq key -- seq' ) [ empty-xor-key ] when-empty - [ dup length ] dip '[ _ mod-nth bitxor ] 2map ; + [ dup length iota ] dip '[ _ mod-nth bitxor ] 2map ; diff --git a/extra/math/matrices/simd/simd.factor b/extra/math/matrices/simd/simd.factor index 1fdb4f6d56..da1149dfec 100644 --- a/extra/math/matrices/simd/simd.factor +++ b/extra/math/matrices/simd/simd.factor @@ -120,7 +120,7 @@ TYPED:: m4^n ( m: matrix4 n: fixnum -- m^n: matrix4 ) identity-matrix4 n [ m m4. ] times ; : vmerge-diagonal* ( x y -- h t ) - [ vmerge-head ] [ swap vmerge-tail ] 2bi ; inline + [ (vmerge-head) ] [ swap (vmerge-tail) ] 2bi ; inline : vmerge-diagonal ( x -- h t ) 0.0 float-4-with vmerge-diagonal* ; inline @@ -128,7 +128,7 @@ TYPED: diagonal-matrix4 ( diagonal: float-4 -- matrix: matrix4 ) [ vmerge-diagonal [ vmerge-diagonal ] bi@ ] make-matrix4 ; : vmerge-transpose ( a b c d -- a' b' c' d' ) - [ vmerge ] bi-curry@ bi* ; inline + [ (vmerge) ] bi-curry@ bi* ; inline TYPED: transpose-matrix4 ( matrix: matrix4 -- matrix: matrix4 ) [ rows vmerge-transpose vmerge-transpose ] make-matrix4 ; @@ -144,8 +144,8 @@ TYPED:: translation-matrix4 ( offset: float-4 -- matrix: matrix4 ) [ float-4{ 1.0 1.0 1.0 1.0 } :> diagonal - offset 0 float-4-with vmerge - [ 0 float-4-with swap vmerge ] bi@ drop :> z :> y :> x + offset 0 float-4-with (vmerge) + [ 0 float-4-with swap (vmerge) ] bi@ drop :> z :> y :> x diagonal y vmerge-diagonal* [ x vmerge-diagonal* ] @@ -194,7 +194,7 @@ TYPED:: frustum-matrix4 ( xy: float-4 near: float far: float -- matrix: matrix4 float-4{ t t f f } xy near far - float-4-with v? ! denom v/ :> fov - fov 0.0 float-4-with vmerge-head vmerge-diagonal + fov 0.0 float-4-with (vmerge-head) vmerge-diagonal fov float-4{ f f t t } vand float-4{ 0.0 0.0 -1.0 0.0 } ] make-matrix4 ; diff --git a/extra/random/cmwc/authors.txt b/extra/random/cmwc/authors.txt new file mode 100644 index 0000000000..b4bd0e7b35 --- /dev/null +++ b/extra/random/cmwc/authors.txt @@ -0,0 +1 @@ +Doug Coleman \ No newline at end of file diff --git a/extra/random/cmwc/cmwc-tests.factor b/extra/random/cmwc/cmwc-tests.factor new file mode 100644 index 0000000000..6e3f4ac178 --- /dev/null +++ b/extra/random/cmwc/cmwc-tests.factor @@ -0,0 +1,42 @@ +! Copyright (C) 2009 Doug Coleman. +! See http://factorcode.org/license.txt for BSD license. +USING: arrays kernel random random.cmwc sequences tools.test ; +IN: random.cmwc.tests + +[ ] [ + cmwc-4096 [ + random-32 drop + ] with-random +] unit-test + +[ +{ + 4294604858 + 4294948512 + 4294929730 + 4294910948 + 4294892166 + 4294873384 + 4294854602 + 4294835820 + 4294817038 + 4294798256 +} +] [ + cmwc-4096 + 4096 iota >array 362436 seed-random [ + 10 [ random-32 ] replicate + ] with-random +] unit-test + +[ t ] [ + cmwc-4096 [ + 4096 iota >array 362436 seed-random [ + 10 [ random-32 ] replicate + ] with-random + ] [ + 4096 iota >array 362436 seed-random [ + 10 [ random-32 ] replicate + ] with-random + ] bi = +] unit-test diff --git a/extra/random/cmwc/cmwc.factor b/extra/random/cmwc/cmwc.factor new file mode 100644 index 0000000000..00258257be --- /dev/null +++ b/extra/random/cmwc/cmwc.factor @@ -0,0 +1,54 @@ +! Copyright (C) 2009 Doug Coleman. +! See http://factorcode.org/license.txt for BSD license. +USING: accessors arrays fry kernel locals math math.bitwise +random sequences ; +IN: random.cmwc + +! Multiply-with-carry RNG + +TUPLE: cmwc Q a b c i r mod ; + +TUPLE: cmwc-seed Q c ; + +: ( length a b c -- cmwc ) + cmwc new + swap >>c + swap >>b + swap >>a + swap [ 1 - >>i ] [ 0 >>Q ] bi + dup b>> 1 - >>r + dup Q>> length 1 - >>mod ; + +: ( Q c -- cmwc-seed ) + cmwc-seed new + swap >>c + swap >>Q ; inline + +M: cmwc seed-random + [ Q>> >>Q ] + [ Q>> length 1 - >>i ] + [ c>> >>c ] tri ; + +M:: cmwc random-32* ( cmwc -- n ) + cmwc dup mod>> '[ 1 + _ bitand ] change-i + [ a>> ] + [ [ i>> ] [ Q>> ] bi nth * ] + [ c>> + ] tri :> t! + + t -32 shift cmwc (>>c) + + t cmwc [ b>> bitand ] [ c>> + ] bi 64 bits t! + t cmwc r>> > [ + cmwc [ 1 + ] change-c drop + t cmwc b>> - 64 bits t! + ] when + + cmwc [ r>> t - 32 bits dup ] [ i>> ] [ Q>> ] tri set-nth ; + +: cmwc-4096 ( -- cmwc ) + 4096 + [ 18782 4294967295 362436 ] + [ + '[ [ random-32 ] replicate ] with-system-random + 362436 seed-random + ] bi ; diff --git a/extra/random/lagged-fibonacci/authors.txt b/extra/random/lagged-fibonacci/authors.txt new file mode 100644 index 0000000000..b4bd0e7b35 --- /dev/null +++ b/extra/random/lagged-fibonacci/authors.txt @@ -0,0 +1 @@ +Doug Coleman \ No newline at end of file diff --git a/extra/random/lagged-fibonacci/lagged-fibonacci-tests.factor b/extra/random/lagged-fibonacci/lagged-fibonacci-tests.factor new file mode 100644 index 0000000000..e830c466c2 --- /dev/null +++ b/extra/random/lagged-fibonacci/lagged-fibonacci-tests.factor @@ -0,0 +1,24 @@ +! Copyright (C) 2009 Doug Coleman. +! See http://factorcode.org/license.txt for BSD license. +USING: fry kernel math.functions random random.lagged-fibonacci +sequences specialized-arrays.instances.double tools.test ; +IN: random.lagged-fibonacci.tests + +[ t ] [ + 3 [ + 1000 [ random-float ] double-array{ } replicate-as + 999 swap nth 0.860072135925293 -.01 ~ + ] with-random +] unit-test + +[ t ] [ + 3 [ + [ + 1000 [ random-float ] double-array{ } replicate-as + ] with-random + ] [ + 3 seed-random [ + 1000 [ random-float ] double-array{ } replicate-as + ] with-random = + ] bi +] unit-test diff --git a/extra/random/lagged-fibonacci/lagged-fibonacci.factor b/extra/random/lagged-fibonacci/lagged-fibonacci.factor new file mode 100644 index 0000000000..45a4b132dd --- /dev/null +++ b/extra/random/lagged-fibonacci/lagged-fibonacci.factor @@ -0,0 +1,71 @@ +! Copyright (C) 2009 Doug Coleman. +! See http://factorcode.org/license.txt for BSD license. +USING: accessors alien.c-types fry kernel literals locals math +random sequences specialized-arrays namespaces ; +SPECIALIZED-ARRAY: double +IN: random.lagged-fibonacci + +TUPLE: lagged-fibonacci u pt0 pt1 ; + + + +M:: lagged-fibonacci seed-random ( lagged-fibonacci seed! -- lagged-fibonacci ) + seed normalize-seed seed! + seed 30082 /i :> ij + seed 30082 ij * - :> kl + ij 177 /i 177 mod 2 + :> i! + ij 177 mod 2 + :> j! + kl 169 /i 178 mod 1 + :> k! + kl 169 mod :> l! + + lagged-fibonacci u>> [ + drop + 0.0 :> s! + 0.5 :> t! + 0.0 :> m! + lagged-fibonacci-sig-bits [ + i j * 179 mod k * 179 mod m! + j i! + k j! + m k! + 53 l * 1 + 169 mod l! + l m * 64 mod 31 > [ s t + s! ] when + t 0.5 * t! + ] times + s + ] change-each + lagged-fibonacci p-r >>pt0 + q-r >>pt1 ; + +: ( seed -- lagged-fibonacci ) + lagged-fibonacci new + p-r 1 + >>u + swap seed-random ; + +GENERIC: random-float* ( tuple -- r ) + +: random-float ( -- n ) random-generator get random-float* ; + +M:: lagged-fibonacci random-float* ( lagged-fibonacci -- x ) + lagged-fibonacci [ pt0>> ] [ u>> ] bi nth + lagged-fibonacci [ pt1>> ] [ u>> ] bi nth - :> uni! + uni 0.0 < [ uni 1.0 + uni! ] when + uni lagged-fibonacci [ pt0>> ] [ u>> ] bi set-nth + lagged-fibonacci [ adjust-ptr ] change-pt0 drop + lagged-fibonacci [ adjust-ptr ] change-pt1 drop + uni ; inline diff --git a/vm/code_block.cpp b/vm/code_block.cpp index 54fd455ae4..f61be7ab1c 100755 --- a/vm/code_block.cpp +++ b/vm/code_block.cpp @@ -533,9 +533,13 @@ code_block *factor_vm::add_code_block(cell type, cell code_, cell labels_, cell compiled->type = type; compiled->last_scan = data->nursery(); compiled->needs_fixup = true; - compiled->relocation = relocation.value(); /* slight space optimization */ + if(relocation.type() == BYTE_ARRAY_TYPE && array_capacity(relocation.untagged()) == 0) + compiled->relocation = F; + else + compiled->relocation = relocation.value(); + if(literals.type() == ARRAY_TYPE && array_capacity(literals.untagged()) == 0) compiled->literals = F; else diff --git a/vm/os-genunix.cpp b/vm/os-genunix.cpp index 7bbe388ff2..065f0dfd40 100644 --- a/vm/os-genunix.cpp +++ b/vm/os-genunix.cpp @@ -30,6 +30,7 @@ const char *default_image_path() char *new_path = new char[PATH_MAX + SUFFIX_LEN + 1]; memcpy(new_path,path,len + 1); memcpy(new_path + len,SUFFIX,SUFFIX_LEN + 1); + free(const_cast(path)); return new_path; } diff --git a/vm/os-linux.cpp b/vm/os-linux.cpp index 2d26fba390..352467d379 100644 --- a/vm/os-linux.cpp +++ b/vm/os-linux.cpp @@ -3,7 +3,7 @@ namespace factor { -/* Snarfed from SBCL linux-so.c. You must delete[] the result yourself. */ +/* Snarfed from SBCL linux-so.c. You must free() the result yourself. */ const char *vm_executable_path() { char *path = new char[PATH_MAX + 1]; @@ -17,7 +17,10 @@ const char *vm_executable_path() else { path[size] = '\0'; - return safe_strdup(path); + + const char *ret = safe_strdup(path); + delete[] path; + return ret; } } diff --git a/vm/quotations.cpp b/vm/quotations.cpp index c1ab60b43d..f8a9a7183b 100755 --- a/vm/quotations.cpp +++ b/vm/quotations.cpp @@ -103,6 +103,28 @@ bool quotation_jit::stack_frame_p() return false; } +bool quotation_jit::trivial_quotation_p(array *elements) +{ + return array_capacity(elements) == 1 && tagged(array_nth(elements,0)).type_p(WORD_TYPE); +} + +void quotation_jit::emit_quot(cell quot_) +{ + gc_root quot(quot_,parent_vm); + + array *elements = parent_vm->untag(quot->array); + + /* If the quotation consists of a single word, compile a direct call + to the word. */ + if(trivial_quotation_p(elements)) + literal(array_nth(elements,0)); + else + { + if(compiling) parent_vm->jit_compile(quot.value(),relocate); + literal(quot.value()); + } +} + /* Allocates memory */ void quotation_jit::iterate_quotation() { @@ -194,14 +216,8 @@ void quotation_jit::iterate_quotation() if(stack_frame) emit(parent_vm->userenv[JIT_EPILOG]); tail_call = true; - if(compiling) - { - parent_vm->jit_compile(array_nth(elements.untagged(),i),relocate); - parent_vm->jit_compile(array_nth(elements.untagged(),i + 1),relocate); - } - - literal(array_nth(elements.untagged(),i)); - literal(array_nth(elements.untagged(),i + 1)); + emit_quot(array_nth(elements.untagged(),i)); + emit_quot(array_nth(elements.untagged(),i + 1)); emit(parent_vm->userenv[JIT_IF]); i += 2; @@ -209,25 +225,22 @@ void quotation_jit::iterate_quotation() /* dip */ else if(fast_dip_p(i,length)) { - if(compiling) - parent_vm->jit_compile(obj.value(),relocate); - emit_with(parent_vm->userenv[JIT_DIP],obj.value()); + emit_quot(obj.value()); + emit(parent_vm->userenv[JIT_DIP]); i++; } /* 2dip */ else if(fast_2dip_p(i,length)) { - if(compiling) - parent_vm->jit_compile(obj.value(),relocate); - emit_with(parent_vm->userenv[JIT_2DIP],obj.value()); + emit_quot(obj.value()); + emit(parent_vm->userenv[JIT_2DIP]); i++; } /* 3dip */ else if(fast_3dip_p(i,length)) { - if(compiling) - parent_vm->jit_compile(obj.value(),relocate); - emit_with(parent_vm->userenv[JIT_3DIP],obj.value()); + emit_quot(obj.value()); + emit(parent_vm->userenv[JIT_3DIP]); i++; } else diff --git a/vm/quotations.hpp b/vm/quotations.hpp index 10d2a96f66..aee4468102 100755 --- a/vm/quotations.hpp +++ b/vm/quotations.hpp @@ -13,6 +13,8 @@ struct quotation_jit : public jit { void emit_mega_cache_lookup(cell methods, fixnum index, cell cache); bool primitive_call_p(cell i, cell length); + bool trivial_quotation_p(array *elements); + void emit_quot(cell quot); bool fast_if_p(cell i, cell length); bool fast_dip_p(cell i, cell length); bool fast_2dip_p(cell i, cell length);