Merge branch 'master' of git://factorcode.org/git/factor into new_gc
commit
e81e076935
|
@ -1,5 +1,5 @@
|
|||
USING: calendar.format calendar kernel math tools.test
|
||||
io.streams.string accessors io math.order ;
|
||||
io.streams.string accessors io math.order sequences ;
|
||||
IN: calendar.format.tests
|
||||
|
||||
[ 0 ] [
|
||||
|
@ -81,3 +81,5 @@ IN: calendar.format.tests
|
|||
] [ "Thursday, 02-Oct-2008 23:59:59 GMT" cookie-string>timestamp ] unit-test
|
||||
|
||||
|
||||
[ ]
|
||||
[ { 2008 2009 } [ year. ] each ] unit-test
|
||||
|
|
|
@ -66,7 +66,7 @@ M: array month. ( pair -- )
|
|||
[ month-name write bl number>string print ]
|
||||
[ 1 zeller-congruence ]
|
||||
[ (days-in-month) day-abbreviations2 " " join print ] 2tri
|
||||
over " " <repetition> concat write
|
||||
over " " <repetition> "" concat-as write
|
||||
[
|
||||
[ 1 + day. ] keep
|
||||
1 + + 7 mod zero? [ nl ] [ bl ] if
|
||||
|
|
|
@ -277,6 +277,11 @@ literal: rep ;
|
|||
|
||||
PURE-INSN: ##shuffle-vector
|
||||
def: dst
|
||||
use: src shuffle
|
||||
literal: rep ;
|
||||
|
||||
PURE-INSN: ##shuffle-vector-imm
|
||||
def: dst
|
||||
use: src
|
||||
literal: shuffle rep ;
|
||||
|
||||
|
|
|
@ -194,7 +194,8 @@ IN: compiler.cfg.intrinsics
|
|||
{ math.vectors.simd.intrinsics:(simd-with) [ [ ^^with-vector ] emit-unary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-gather-2) [ emit-gather-vector-2 ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-gather-4) [ emit-gather-vector-4 ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vshuffle) [ emit-shuffle-vector ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vshuffle-elements) [ emit-shuffle-vector ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-vshuffle-bytes) [ emit-shuffle-vector-var ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-(vmerge-head)) [ [ ^^merge-vector-head ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-(vmerge-tail)) [ [ ^^merge-vector-tail ] emit-binary-vector-op ] }
|
||||
{ math.vectors.simd.intrinsics:(simd-(v>float)) [ [ ^^integer>float-vector ] emit-unary-vector-op ] }
|
||||
|
|
|
@ -1,15 +1,16 @@
|
|||
! Copyright (C) 2009 Slava Pestov.
|
||||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors byte-arrays fry cpu.architecture kernel math
|
||||
sequences math.vectors.simd.intrinsics macros generalizations
|
||||
combinators combinators.short-circuit arrays locals
|
||||
USING: accessors alien byte-arrays fry classes.algebra
|
||||
cpu.architecture kernel math sequences math.vectors
|
||||
math.vectors.simd.intrinsics macros generalizations combinators
|
||||
combinators.short-circuit arrays locals
|
||||
compiler.tree.propagation.info compiler.cfg.builder.blocks
|
||||
compiler.cfg.comparisons
|
||||
compiler.cfg.stacks compiler.cfg.stacks.local compiler.cfg.hats
|
||||
compiler.cfg.instructions compiler.cfg.registers
|
||||
compiler.cfg.intrinsics.alien
|
||||
specialized-arrays ;
|
||||
FROM: alien.c-types => float double ;
|
||||
FROM: alien.c-types => heap-size char uchar float double ;
|
||||
SPECIALIZED-ARRAYS: float double ;
|
||||
IN: compiler.cfg.intrinsics.simd
|
||||
|
||||
|
@ -21,7 +22,7 @@ MACRO: check-elements ( quots -- )
|
|||
|
||||
MACRO: if-literals-match ( quots -- )
|
||||
[ length ] [ ] [ length ] tri
|
||||
! n quots n n
|
||||
! n quots n
|
||||
'[
|
||||
! node quot
|
||||
[
|
||||
|
@ -77,15 +78,41 @@ MACRO: if-literals-match ( quots -- )
|
|||
|
||||
: shuffle? ( obj -- ? ) { [ array? ] [ [ integer? ] all? ] } 1&& ;
|
||||
|
||||
: emit-shuffle-vector ( node -- )
|
||||
! Pad the permutation with zeroes if its too short, since we
|
||||
: >variable-shuffle ( shuffle rep -- shuffle' )
|
||||
rep-component-type heap-size
|
||||
[ dup <repetition> >byte-array ]
|
||||
[ iota >byte-array ] bi
|
||||
'[ _ n*v _ v+ ] map concat ;
|
||||
|
||||
: generate-shuffle-vector-imm ( src shuffle rep -- dst )
|
||||
dup %shuffle-vector-imm-reps member?
|
||||
[ ^^shuffle-vector-imm ]
|
||||
[
|
||||
[ >variable-shuffle ^^load-constant ] keep
|
||||
^^shuffle-vector
|
||||
] if ;
|
||||
|
||||
: emit-shuffle-vector-imm ( node -- )
|
||||
! Pad the permutation with zeroes if it's too short, since we
|
||||
! can't throw an error at this point.
|
||||
[ [ rep-components 0 pad-tail ] keep ^^shuffle-vector ] [unary/param]
|
||||
[ [ rep-components 0 pad-tail ] keep generate-shuffle-vector-imm ] [unary/param]
|
||||
{ [ shuffle? ] [ representation? ] } if-literals-match ;
|
||||
|
||||
: emit-shuffle-vector-var ( node -- )
|
||||
[ ^^shuffle-vector ] [binary]
|
||||
{ [ %shuffle-vector-reps member? ] } if-literals-match ;
|
||||
|
||||
: emit-shuffle-vector ( node -- )
|
||||
dup node-input-infos {
|
||||
[ length 3 = ]
|
||||
[ first class>> byte-array class<= ]
|
||||
[ second class>> byte-array class<= ]
|
||||
[ third literal>> representation? ]
|
||||
} 1&& [ emit-shuffle-vector-var ] [ emit-shuffle-vector-imm ] if ;
|
||||
|
||||
: ^^broadcast-vector ( src n rep -- dst )
|
||||
[ rep-components swap <array> ] keep
|
||||
^^shuffle-vector ;
|
||||
generate-shuffle-vector-imm ;
|
||||
|
||||
: emit-broadcast-vector ( node -- )
|
||||
[ ^^broadcast-vector ] [unary/param]
|
||||
|
@ -101,6 +128,9 @@ MACRO: if-literals-match ( quots -- )
|
|||
[ ^^select-vector ] [unary/param]
|
||||
{ [ integer? ] [ representation? ] } if-literals-match ; inline
|
||||
|
||||
: emit-alien-vector-op ( node quot: ( rep -- ) -- )
|
||||
{ [ %alien-vector-reps member? ] } if-literals-match ; inline
|
||||
|
||||
: emit-alien-vector ( node -- )
|
||||
dup [
|
||||
'[
|
||||
|
@ -108,7 +138,7 @@ MACRO: if-literals-match ( quots -- )
|
|||
_ ^^alien-vector ds-push
|
||||
]
|
||||
[ inline-alien-getter? ] inline-alien
|
||||
] with emit-vector-op ;
|
||||
] with emit-alien-vector-op ;
|
||||
|
||||
: emit-set-alien-vector ( node -- )
|
||||
dup [
|
||||
|
@ -118,7 +148,7 @@ MACRO: if-literals-match ( quots -- )
|
|||
]
|
||||
[ byte-array inline-alien-setter? ]
|
||||
inline-alien
|
||||
] with emit-vector-op ;
|
||||
] with emit-alien-vector-op ;
|
||||
|
||||
: generate-not-vector ( src rep -- dst )
|
||||
dup %not-vector-reps member?
|
||||
|
|
|
@ -450,26 +450,26 @@ M: ##set-alien-vector rewrite rewrite-alien-addressing ;
|
|||
! Some lame constant folding for SIMD intrinsics. Eventually this
|
||||
! should be redone completely.
|
||||
|
||||
: rewrite-shuffle-vector ( insn expr -- insn' )
|
||||
: rewrite-shuffle-vector-imm ( insn expr -- insn' )
|
||||
2dup [ rep>> ] bi@ eq? [
|
||||
[ [ dst>> ] [ src>> vn>vreg ] bi* ]
|
||||
[ [ shuffle>> ] bi@ nths ]
|
||||
[ drop rep>> ]
|
||||
2tri \ ##shuffle-vector new-insn
|
||||
2tri \ ##shuffle-vector-imm new-insn
|
||||
] [ 2drop f ] if ;
|
||||
|
||||
: (fold-shuffle-vector) ( shuffle bytes -- bytes' )
|
||||
: (fold-shuffle-vector-imm) ( shuffle bytes -- bytes' )
|
||||
2dup length swap length /i group nths concat ;
|
||||
|
||||
: fold-shuffle-vector ( insn expr -- insn' )
|
||||
: fold-shuffle-vector-imm ( insn expr -- insn' )
|
||||
[ [ dst>> ] [ shuffle>> ] bi ] dip value>>
|
||||
(fold-shuffle-vector) \ ##load-constant new-insn ;
|
||||
(fold-shuffle-vector-imm) \ ##load-constant new-insn ;
|
||||
|
||||
M: ##shuffle-vector rewrite
|
||||
M: ##shuffle-vector-imm rewrite
|
||||
dup src>> vreg>expr {
|
||||
{ [ dup shuffle-vector-expr? ] [ rewrite-shuffle-vector ] }
|
||||
{ [ dup reference-expr? ] [ fold-shuffle-vector ] }
|
||||
{ [ dup constant-expr? ] [ fold-shuffle-vector ] }
|
||||
{ [ dup shuffle-vector-imm-expr? ] [ rewrite-shuffle-vector-imm ] }
|
||||
{ [ dup reference-expr? ] [ fold-shuffle-vector-imm ] }
|
||||
{ [ dup constant-expr? ] [ fold-shuffle-vector-imm ] }
|
||||
[ 2drop f ]
|
||||
} cond ;
|
||||
|
||||
|
|
|
@ -136,7 +136,7 @@ M: scalar>vector-expr simplify*
|
|||
[ drop f ]
|
||||
} cond ;
|
||||
|
||||
M: shuffle-vector-expr simplify*
|
||||
M: shuffle-vector-imm-expr simplify*
|
||||
[ src>> ] [ shuffle>> ] [ rep>> rep-components iota ] tri
|
||||
sequence= [ drop f ] unless ;
|
||||
|
||||
|
|
|
@ -1215,31 +1215,31 @@ cell 8 = [
|
|||
}
|
||||
] [
|
||||
{
|
||||
T{ ##shuffle-vector f 1 0 { 0 1 2 3 } float-4-rep }
|
||||
T{ ##shuffle-vector-imm f 1 0 { 0 1 2 3 } float-4-rep }
|
||||
} value-numbering-step
|
||||
] unit-test
|
||||
|
||||
[
|
||||
{
|
||||
T{ ##shuffle-vector f 1 0 { 1 2 3 0 } float-4-rep }
|
||||
T{ ##shuffle-vector f 2 0 { 0 2 3 1 } float-4-rep }
|
||||
T{ ##shuffle-vector-imm f 1 0 { 1 2 3 0 } float-4-rep }
|
||||
T{ ##shuffle-vector-imm f 2 0 { 0 2 3 1 } float-4-rep }
|
||||
}
|
||||
] [
|
||||
{
|
||||
T{ ##shuffle-vector f 1 0 { 1 2 3 0 } float-4-rep }
|
||||
T{ ##shuffle-vector f 2 1 { 3 1 2 0 } float-4-rep }
|
||||
T{ ##shuffle-vector-imm f 1 0 { 1 2 3 0 } float-4-rep }
|
||||
T{ ##shuffle-vector-imm f 2 1 { 3 1 2 0 } float-4-rep }
|
||||
} value-numbering-step
|
||||
] unit-test
|
||||
|
||||
[
|
||||
{
|
||||
T{ ##shuffle-vector f 1 0 { 1 2 3 0 } float-4-rep }
|
||||
T{ ##shuffle-vector f 2 1 { 1 0 } double-2-rep }
|
||||
T{ ##shuffle-vector-imm f 1 0 { 1 2 3 0 } float-4-rep }
|
||||
T{ ##shuffle-vector-imm f 2 1 { 1 0 } double-2-rep }
|
||||
}
|
||||
] [
|
||||
{
|
||||
T{ ##shuffle-vector f 1 0 { 1 2 3 0 } float-4-rep }
|
||||
T{ ##shuffle-vector f 2 1 { 1 0 } double-2-rep }
|
||||
T{ ##shuffle-vector-imm f 1 0 { 1 2 3 0 } float-4-rep }
|
||||
T{ ##shuffle-vector-imm f 2 1 { 1 0 } double-2-rep }
|
||||
} value-numbering-step
|
||||
] unit-test
|
||||
|
||||
|
@ -1253,7 +1253,7 @@ cell 8 = [
|
|||
{
|
||||
T{ ##load-constant f 0 $[ 55 tag-fixnum ] }
|
||||
T{ ##scalar>vector f 1 0 int-4-rep }
|
||||
T{ ##shuffle-vector f 2 1 { 0 0 0 0 } float-4-rep }
|
||||
T{ ##shuffle-vector-imm f 2 1 { 0 0 0 0 } float-4-rep }
|
||||
} value-numbering-step
|
||||
] unit-test
|
||||
|
||||
|
@ -1267,7 +1267,7 @@ cell 8 = [
|
|||
{
|
||||
T{ ##load-constant f 0 1.25 }
|
||||
T{ ##scalar>vector f 1 0 float-4-rep }
|
||||
T{ ##shuffle-vector f 2 1 { 0 0 0 0 } float-4-rep }
|
||||
T{ ##shuffle-vector-imm f 2 1 { 0 0 0 0 } float-4-rep }
|
||||
} value-numbering-step
|
||||
] unit-test
|
||||
|
||||
|
|
|
@ -154,6 +154,7 @@ CODEGEN: ##zero-vector %zero-vector
|
|||
CODEGEN: ##fill-vector %fill-vector
|
||||
CODEGEN: ##gather-vector-2 %gather-vector-2
|
||||
CODEGEN: ##gather-vector-4 %gather-vector-4
|
||||
CODEGEN: ##shuffle-vector-imm %shuffle-vector-imm
|
||||
CODEGEN: ##shuffle-vector %shuffle-vector
|
||||
CODEGEN: ##tail>head-vector %tail>head-vector
|
||||
CODEGEN: ##merge-vector-head %merge-vector-head
|
||||
|
|
|
@ -31,7 +31,8 @@ IN: compiler.tree.propagation.simd
|
|||
(simd-vrshift)
|
||||
(simd-hlshift)
|
||||
(simd-hrshift)
|
||||
(simd-vshuffle)
|
||||
(simd-vshuffle-bytes)
|
||||
(simd-vshuffle-elements)
|
||||
(simd-(vmerge-head))
|
||||
(simd-(vmerge-tail))
|
||||
(simd-(v>float))
|
||||
|
|
|
@ -242,6 +242,7 @@ HOOK: %fill-vector cpu ( dst rep -- )
|
|||
HOOK: %gather-vector-2 cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %gather-vector-4 cpu ( dst src1 src2 src3 src4 rep -- )
|
||||
HOOK: %shuffle-vector cpu ( dst src shuffle rep -- )
|
||||
HOOK: %shuffle-vector-imm cpu ( dst src shuffle rep -- )
|
||||
HOOK: %tail>head-vector cpu ( dst src rep -- )
|
||||
HOOK: %merge-vector-head cpu ( dst src1 src2 rep -- )
|
||||
HOOK: %merge-vector-tail cpu ( dst src1 src2 rep -- )
|
||||
|
@ -288,7 +289,9 @@ HOOK: %zero-vector-reps cpu ( -- reps )
|
|||
HOOK: %fill-vector-reps cpu ( -- reps )
|
||||
HOOK: %gather-vector-2-reps cpu ( -- reps )
|
||||
HOOK: %gather-vector-4-reps cpu ( -- reps )
|
||||
HOOK: %alien-vector-reps cpu ( -- reps )
|
||||
HOOK: %shuffle-vector-reps cpu ( -- reps )
|
||||
HOOK: %shuffle-vector-imm-reps cpu ( -- reps )
|
||||
HOOK: %merge-vector-reps cpu ( -- reps )
|
||||
HOOK: %signed-pack-vector-reps cpu ( -- reps )
|
||||
HOOK: %unsigned-pack-vector-reps cpu ( -- reps )
|
||||
|
@ -328,7 +331,9 @@ M: object %zero-vector-reps { } ;
|
|||
M: object %fill-vector-reps { } ;
|
||||
M: object %gather-vector-2-reps { } ;
|
||||
M: object %gather-vector-4-reps { } ;
|
||||
M: object %alien-vector-reps { } ;
|
||||
M: object %shuffle-vector-reps { } ;
|
||||
M: object %shuffle-vector-imm-reps { } ;
|
||||
M: object %merge-vector-reps { } ;
|
||||
M: object %signed-pack-vector-reps { } ;
|
||||
M: object %unsigned-pack-vector-reps { } ;
|
||||
|
|
|
@ -432,8 +432,13 @@ M: x86 %epilogue ( n -- ) cell - incr-stack-reg ;
|
|||
temp 0 MOV \ t rc-absolute-cell rel-immediate
|
||||
dst temp word execute ; inline
|
||||
|
||||
: (%compare) ( src1 src2 cc -- )
|
||||
2over [ { cc= cc/= } member? ] [ register? ] [ 0 = ] tri* and and
|
||||
[ drop dup TEST ]
|
||||
[ CMP ] if ;
|
||||
|
||||
M:: x86 %compare ( dst src1 src2 cc temp -- )
|
||||
src1 src2 CMP
|
||||
src1 src2 cc (%compare)
|
||||
cc order-cc {
|
||||
{ cc< [ dst temp \ CMOVL %boolean ] }
|
||||
{ cc<= [ dst temp \ CMOVLE %boolean ] }
|
||||
|
@ -447,7 +452,7 @@ M: x86 %compare-imm ( dst src1 src2 cc temp -- )
|
|||
%compare ;
|
||||
|
||||
M:: x86 %compare-branch ( label src1 src2 cc -- )
|
||||
src1 src2 CMP
|
||||
src1 src2 cc (%compare)
|
||||
cc order-cc {
|
||||
{ cc< [ label JL ] }
|
||||
{ cc<= [ label JLE ] }
|
||||
|
@ -562,6 +567,12 @@ MACRO: available-reps ( alist -- )
|
|||
reverse [ { } ] suffix
|
||||
'[ _ cond ] ;
|
||||
|
||||
M: x86 %alien-vector-reps
|
||||
{
|
||||
{ sse? { float-4-rep } }
|
||||
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %zero-vector
|
||||
{
|
||||
{ double-2-rep [ dup XORPD ] }
|
||||
|
@ -673,11 +684,9 @@ M: x86 %gather-vector-2-reps
|
|||
[ dupd SHUFPD ]
|
||||
} case ;
|
||||
|
||||
: float-4-shuffle ( dst shuffle -- )
|
||||
: sse1-float-4-shuffle ( dst shuffle -- )
|
||||
{
|
||||
{ { 0 1 2 3 } [ drop ] }
|
||||
{ { 0 0 2 2 } [ dup MOVSLDUP ] }
|
||||
{ { 1 1 3 3 } [ dup MOVSHDUP ] }
|
||||
{ { 0 1 0 1 } [ dup MOVLHPS ] }
|
||||
{ { 2 3 2 3 } [ dup MOVHLPS ] }
|
||||
{ { 0 0 1 1 } [ dup UNPCKLPS ] }
|
||||
|
@ -685,6 +694,15 @@ M: x86 %gather-vector-2-reps
|
|||
[ dupd SHUFPS ]
|
||||
} case ;
|
||||
|
||||
: float-4-shuffle ( dst shuffle -- )
|
||||
sse3? [
|
||||
{
|
||||
{ { 0 0 2 2 } [ dup MOVSLDUP ] }
|
||||
{ { 1 1 3 3 } [ dup MOVSHDUP ] }
|
||||
[ sse1-float-4-shuffle ]
|
||||
} case
|
||||
] [ sse1-float-4-shuffle ] if ;
|
||||
|
||||
: int-4-shuffle ( dst shuffle -- )
|
||||
{
|
||||
{ { 0 1 2 3 } [ drop ] }
|
||||
|
@ -698,7 +716,7 @@ M: x86 %gather-vector-2-reps
|
|||
: longlong-2-shuffle ( dst shuffle -- )
|
||||
first2 [ 2 * dup 1 + ] bi@ 4array int-4-shuffle ;
|
||||
|
||||
M:: x86 %shuffle-vector ( dst src shuffle rep -- )
|
||||
M:: x86 %shuffle-vector-imm ( dst src shuffle rep -- )
|
||||
dst src rep %copy
|
||||
dst shuffle rep unsign-rep {
|
||||
{ double-2-rep [ double-2-shuffle ] }
|
||||
|
@ -707,12 +725,20 @@ M:: x86 %shuffle-vector ( dst src shuffle rep -- )
|
|||
{ longlong-2-rep [ longlong-2-shuffle ] }
|
||||
} case ;
|
||||
|
||||
M: x86 %shuffle-vector-reps
|
||||
M: x86 %shuffle-vector-imm-reps
|
||||
{
|
||||
{ sse? { float-4-rep } }
|
||||
{ sse2? { double-2-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %shuffle-vector ( dst src shuffle rep -- )
|
||||
two-operand PSHUFB ;
|
||||
|
||||
M: x86 %shuffle-vector-reps
|
||||
{
|
||||
{ ssse3? { float-4-rep double-2-rep longlong-2-rep ulonglong-2-rep int-4-rep uint-4-rep short-8-rep ushort-8-rep char-16-rep uchar-16-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %merge-vector-head
|
||||
[ two-operand ] keep
|
||||
unsign-rep {
|
||||
|
@ -790,8 +816,6 @@ M: x86 %unpack-vector-head-reps ( -- reps )
|
|||
{ sse4.1? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %unpack-vector-tail-reps ( -- reps ) { } ;
|
||||
|
||||
M: x86 %integer>float-vector ( dst src rep -- )
|
||||
{
|
||||
{ int-4-rep [ CVTDQ2PS ] }
|
||||
|
@ -1037,10 +1061,6 @@ M: x86 %mul-vector-reps
|
|||
{ sse4.1? { int-4-rep uint-4-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %saturated-mul-vector-reps
|
||||
! No multiplication with saturation on x86
|
||||
{ } ;
|
||||
|
||||
M: x86 %div-vector ( dst src1 src2 rep -- )
|
||||
[ two-operand ] keep
|
||||
{
|
||||
|
@ -1223,8 +1243,6 @@ M: x86 %xor-vector-reps
|
|||
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
|
||||
} available-reps ;
|
||||
|
||||
M: x86 %not-vector-reps { } ;
|
||||
|
||||
M: x86 %shl-vector ( dst src1 src2 rep -- )
|
||||
[ two-operand ] keep
|
||||
{
|
||||
|
@ -1271,6 +1289,30 @@ M:: x86 %scalar>integer ( dst src rep -- )
|
|||
{ uint-scalar-rep [
|
||||
dst 32-bit-version-of src MOVD
|
||||
] }
|
||||
{ short-scalar-rep [
|
||||
dst 32-bit-version-of src MOVD
|
||||
dst dst 16-bit-version-of MOVSX
|
||||
] }
|
||||
{ ushort-scalar-rep [
|
||||
dst 32-bit-version-of src MOVD
|
||||
dst dst 16-bit-version-of MOVZX
|
||||
] }
|
||||
{ char-scalar-rep [
|
||||
dst 32-bit-version-of src MOVD
|
||||
dst { } 8 [| tmp-dst |
|
||||
tmp-dst dst int-rep %copy
|
||||
tmp-dst tmp-dst 8-bit-version-of MOVSX
|
||||
dst tmp-dst int-rep %copy
|
||||
] with-small-register
|
||||
] }
|
||||
{ uchar-scalar-rep [
|
||||
dst 32-bit-version-of src MOVD
|
||||
dst { } 8 [| tmp-dst |
|
||||
tmp-dst dst int-rep %copy
|
||||
tmp-dst tmp-dst 8-bit-version-of MOVZX
|
||||
dst tmp-dst int-rep %copy
|
||||
] with-small-register
|
||||
] }
|
||||
} case ;
|
||||
|
||||
M: x86 %vector>scalar %copy ;
|
||||
|
|
|
@ -60,7 +60,7 @@ MACRO: simd-boa ( rep class -- simd-array )
|
|||
[ rep-components ] [ new ] [ '[ _ ] ] tri* swap replicate-as ; inline
|
||||
|
||||
: simd-with/nth-fast? ( rep -- ? )
|
||||
[ \ (simd-vshuffle) supported-simd-op? ]
|
||||
[ \ (simd-vshuffle-elements) supported-simd-op? ]
|
||||
[ rep-component-type can-be-unboxed? ]
|
||||
bi and ;
|
||||
|
||||
|
@ -184,6 +184,8 @@ WHERE
|
|||
TUPLE: A
|
||||
{ underlying byte-array read-only initial: $[ 16 <byte-array> ] } ;
|
||||
|
||||
INSTANCE: A simd-128
|
||||
|
||||
M: A clone underlying>> clone \ A boa ; inline
|
||||
|
||||
M: A length drop N ; inline
|
||||
|
@ -315,7 +317,7 @@ SLOT: underlying2
|
|||
class c:typedef ;
|
||||
|
||||
: (define-simd-256) ( simd -- )
|
||||
simd-ops get { vshuffle hlshift hrshift } unique assoc-diff >>ops
|
||||
simd-ops get { vshuffle-elements vshuffle-bytes hlshift hrshift } unique assoc-diff >>ops
|
||||
[ define-simd ]
|
||||
[ [ class>> ] [ rep>> ] bi define-simd-256-type ] bi ;
|
||||
|
||||
|
@ -362,6 +364,8 @@ TUPLE: A
|
|||
{ underlying1 byte-array initial: $[ 16 <byte-array> ] read-only }
|
||||
{ underlying2 byte-array initial: $[ 16 <byte-array> ] read-only } ;
|
||||
|
||||
INSTANCE: A simd-256
|
||||
|
||||
M: A clone
|
||||
[ underlying1>> clone ] [ underlying2>> clone ] bi
|
||||
\ A boa ; inline
|
||||
|
|
|
@ -67,7 +67,8 @@ SIMD-OP: vlshift
|
|||
SIMD-OP: vrshift
|
||||
SIMD-OP: hlshift
|
||||
SIMD-OP: hrshift
|
||||
SIMD-OP: vshuffle
|
||||
SIMD-OP: vshuffle-elements
|
||||
SIMD-OP: vshuffle-bytes
|
||||
SIMD-OP: (vmerge-head)
|
||||
SIMD-OP: (vmerge-tail)
|
||||
SIMD-OP: v<=
|
||||
|
@ -148,6 +149,9 @@ GENERIC# supported-simd-op? 1 ( rep intrinsic -- ? )
|
|||
union
|
||||
{ uchar-16-rep ushort-8-rep uint-4-rep ulonglong-2-rep } union ;
|
||||
|
||||
: (%shuffle-imm-reps) ( -- reps )
|
||||
%shuffle-vector-reps %shuffle-vector-imm-reps union ;
|
||||
|
||||
M: vector-rep supported-simd-op?
|
||||
{
|
||||
{ \ (simd-v+) [ %add-vector-reps ] }
|
||||
|
@ -179,7 +183,8 @@ M: vector-rep supported-simd-op?
|
|||
{ \ (simd-vrshift) [ %shr-vector-reps ] }
|
||||
{ \ (simd-hlshift) [ %horizontal-shl-vector-reps ] }
|
||||
{ \ (simd-hrshift) [ %horizontal-shr-vector-reps ] }
|
||||
{ \ (simd-vshuffle) [ %shuffle-vector-reps ] }
|
||||
{ \ (simd-vshuffle-elements) [ (%shuffle-imm-reps) ] }
|
||||
{ \ (simd-vshuffle-bytes) [ %shuffle-vector-reps ] }
|
||||
{ \ (simd-(vmerge-head)) [ %merge-vector-reps ] }
|
||||
{ \ (simd-(vmerge-tail)) [ %merge-vector-reps ] }
|
||||
{ \ (simd-(v>float)) [ %integer>float-vector-reps ] }
|
||||
|
|
|
@ -41,7 +41,21 @@ $nl
|
|||
POSTPONE: SIMD:
|
||||
POSTPONE: SIMDS:
|
||||
}
|
||||
"The following vector types are supported:"
|
||||
"The following scalar types are supported:"
|
||||
{ $code
|
||||
"char"
|
||||
"uchar"
|
||||
"short"
|
||||
"ushort"
|
||||
"int"
|
||||
"uint"
|
||||
"longlong"
|
||||
"ulonglong"
|
||||
"float"
|
||||
"double"
|
||||
}
|
||||
|
||||
"The following vector types are generated from the above scalar types:"
|
||||
{ $code
|
||||
"char-16"
|
||||
"uchar-16"
|
||||
|
@ -89,6 +103,7 @@ $nl
|
|||
{ $code
|
||||
"""USING: compiler.tree.debugger math.vectors
|
||||
math.vectors.simd ;
|
||||
SIMD: double
|
||||
SYMBOLS: x y ;
|
||||
|
||||
[
|
||||
|
@ -107,7 +122,7 @@ IN: simd-demo
|
|||
{ float-4 float-4 float-4 } declare
|
||||
[ v* ] [ [ 1.0 ] dip n-v v* ] bi-curry* bi v+ ;
|
||||
|
||||
\ interpolate optimizer-report.""" }
|
||||
\\ interpolate optimizer-report.""" }
|
||||
"Note that using " { $link declare } " is not recommended. Safer ways of getting type information for the input parameters to a word include defining methods on a generic word (the value being dispatched upon has a statically known type in the method body), as well as using " { $link "hints" } " and " { $link POSTPONE: inline } " declarations."
|
||||
$nl
|
||||
"Here is a better version of the " { $snippet "interpolate" } " words above that uses hints:"
|
||||
|
@ -122,7 +137,7 @@ IN: simd-demo
|
|||
|
||||
HINTS: interpolate float-4 float-4 float-4 ;
|
||||
|
||||
\ interpolate optimizer-report. """ }
|
||||
\\ interpolate optimizer-report. """ }
|
||||
"This time, the optimizer report lists calls to both SIMD primitives and high-level vector words, because hints cause two code paths to be generated. The " { $snippet "optimized." } " word can be used to make sure that the fast code path consists entirely of calls to primitives."
|
||||
$nl
|
||||
"If the " { $snippet "interpolate" } " word was to be used in several places with different types of vectors, it would be best to declare it " { $link POSTPONE: inline } "."
|
||||
|
@ -153,13 +168,13 @@ M: actor advance ( dt actor -- )
|
|||
[ >float ] dip
|
||||
[ update-velocity ] [ update-position ] 2bi ;
|
||||
|
||||
M\ actor advance optimized."""
|
||||
M\\ actor advance optimized."""
|
||||
}
|
||||
"The " { $vocab-link "compiler.cfg.debugger" } " vocabulary can give a lower-level picture of the generated code, that includes register assignments and other low-level details. To look at low-level optimizer output, call " { $snippet "test-mr mr." } " on a word or quotation:"
|
||||
{ $code
|
||||
"""USE: compiler.tree.debugger
|
||||
|
||||
M\ actor advance test-mr mr.""" }
|
||||
M\\ actor advance test-mr mr.""" }
|
||||
"An example of a high-performance algorithm that uses SIMD primitives can be found in the " { $vocab-link "benchmark.nbody-simd" } " vocabulary." ;
|
||||
|
||||
ARTICLE: "math.vectors.simd.intrinsics" "Low-level SIMD primitives"
|
||||
|
@ -206,7 +221,7 @@ ARTICLE: "math.vectors.simd" "Hardware vector arithmetic (SIMD)"
|
|||
HELP: SIMD:
|
||||
{ $syntax "SIMD: type" }
|
||||
{ $values { "type" "a scalar C type" } }
|
||||
{ $description "Defines 128-bit and 256-bit SIMD arrays for holding elements of " { $snippet "type" } " into the vocabulary search path. The possible type/length combinations are listed in " { $link "math.vectors.simd.types" } " and the generated words are documented in " { $link "math.vectors.simd.words" } "." } ;
|
||||
{ $description "Defines 128-bit and 256-bit SIMD arrays for holding elements of " { $snippet "type" } " into the vocabulary search path. The allowed scalar types, and the auto-generated type/length vector combinations that result, are listed in " { $link "math.vectors.simd.types" } ". Generated words are documented in " { $link "math.vectors.simd.words" } "." } ;
|
||||
|
||||
HELP: SIMDS:
|
||||
{ $syntax "SIMDS: type type type ... ;" }
|
||||
|
|
|
@ -174,7 +174,7 @@ CONSTANT: simd-classes
|
|||
: remove-special-words ( alist -- alist' )
|
||||
! These have their own tests later
|
||||
{
|
||||
hlshift hrshift vshuffle vbroadcast
|
||||
hlshift hrshift vshuffle-bytes vshuffle-elements vbroadcast
|
||||
vany? vall? vnone?
|
||||
(v>float) (v>integer)
|
||||
(vpack-signed) (vpack-unsigned)
|
||||
|
@ -360,6 +360,23 @@ simd-classes [
|
|||
] unit-test
|
||||
] each
|
||||
|
||||
"== Checking variable shuffles" print
|
||||
|
||||
: random-shift-vector ( class -- vec )
|
||||
new [ drop 16 random ] map ;
|
||||
|
||||
:: test-shift-vector ( class -- ? )
|
||||
class random-int-vector :> src
|
||||
char-16 random-shift-vector :> perm
|
||||
{ class char-16 } :> decl
|
||||
|
||||
src perm vshuffle
|
||||
src perm [ decl declare vshuffle ] compile-call
|
||||
= ; inline
|
||||
|
||||
{ char-16 uchar-16 short-8 ushort-8 int-4 uint-4 longlong-2 ulonglong-2 }
|
||||
[ 10 swap '[ [ t ] [ _ test-shift-vector ] unit-test ] times ] each
|
||||
|
||||
"== Checking vector tests" print
|
||||
|
||||
:: test-vector-tests-bool ( vector declaration -- none? any? all? )
|
||||
|
@ -512,38 +529,38 @@ SYMBOL: !!inconsistent!!
|
|||
|
||||
STRUCT: simd-struct
|
||||
{ x float-4 }
|
||||
{ y double-2 }
|
||||
{ y longlong-2 }
|
||||
{ z double-4 }
|
||||
{ w float-8 } ;
|
||||
{ w int-8 } ;
|
||||
|
||||
[ t ] [ [ simd-struct <struct> ] compile-call >c-ptr [ 0 = ] all? ] unit-test
|
||||
|
||||
[
|
||||
float-4{ 1 2 3 4 }
|
||||
double-2{ 2 1 }
|
||||
longlong-2{ 2 1 }
|
||||
double-4{ 4 3 2 1 }
|
||||
float-8{ 1 2 3 4 5 6 7 8 }
|
||||
int-8{ 1 2 3 4 5 6 7 8 }
|
||||
] [
|
||||
simd-struct <struct>
|
||||
float-4{ 1 2 3 4 } >>x
|
||||
double-2{ 2 1 } >>y
|
||||
longlong-2{ 2 1 } >>y
|
||||
double-4{ 4 3 2 1 } >>z
|
||||
float-8{ 1 2 3 4 5 6 7 8 } >>w
|
||||
int-8{ 1 2 3 4 5 6 7 8 } >>w
|
||||
{ [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave
|
||||
] unit-test
|
||||
|
||||
[
|
||||
float-4{ 1 2 3 4 }
|
||||
double-2{ 2 1 }
|
||||
longlong-2{ 2 1 }
|
||||
double-4{ 4 3 2 1 }
|
||||
float-8{ 1 2 3 4 5 6 7 8 }
|
||||
int-8{ 1 2 3 4 5 6 7 8 }
|
||||
] [
|
||||
[
|
||||
simd-struct <struct>
|
||||
float-4{ 1 2 3 4 } >>x
|
||||
double-2{ 2 1 } >>y
|
||||
longlong-2{ 2 1 } >>y
|
||||
double-4{ 4 3 2 1 } >>z
|
||||
float-8{ 1 2 3 4 5 6 7 8 } >>w
|
||||
int-8{ 1 2 3 4 5 6 7 8 } >>w
|
||||
{ [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave
|
||||
] compile-call
|
||||
] unit-test
|
||||
|
|
|
@ -98,7 +98,8 @@ H{
|
|||
{ vrshift { +vector+ +scalar+ -> +vector+ } }
|
||||
{ hlshift { +vector+ +literal+ -> +vector+ } }
|
||||
{ hrshift { +vector+ +literal+ -> +vector+ } }
|
||||
{ vshuffle { +vector+ +literal+ -> +vector+ } }
|
||||
{ vshuffle-elements { +vector+ +literal+ -> +vector+ } }
|
||||
{ vshuffle-bytes { +vector+ +vector+ -> +vector+ } }
|
||||
{ vbroadcast { +vector+ +literal+ -> +vector+ } }
|
||||
{ (vmerge-head) { +vector+ +vector+ -> +vector+ } }
|
||||
{ (vmerge-tail) { +vector+ +vector+ -> +vector+ } }
|
||||
|
@ -162,7 +163,7 @@ ERROR: bad-vector-word word ;
|
|||
} cond
|
||||
! Don't specialize horizontal shifts, shuffles, and conversions at all, they're only for SIMD
|
||||
{
|
||||
hlshift hrshift vshuffle vbroadcast
|
||||
hlshift hrshift vshuffle-elements vshuffle-bytes vbroadcast
|
||||
(v>integer) (v>float)
|
||||
(vpack-signed) (vpack-unsigned)
|
||||
(vunpack-head) (vunpack-tail)
|
||||
|
|
|
@ -6,6 +6,9 @@ locals ;
|
|||
QUALIFIED-WITH: alien.c-types c
|
||||
IN: math.vectors
|
||||
|
||||
MIXIN: simd-128
|
||||
MIXIN: simd-256
|
||||
|
||||
GENERIC: element-type ( obj -- c-type )
|
||||
M: object element-type drop f ; inline
|
||||
|
||||
|
@ -83,7 +86,20 @@ PRIVATE>
|
|||
: vbitnot ( u -- w ) dup '[ _ [ bitnot ] fp-bitwise-unary ] map ;
|
||||
|
||||
:: vbroadcast ( u n -- v ) u length n u nth <repetition> u like ;
|
||||
: vshuffle ( u perm -- v ) swap [ '[ _ nth ] ] keep map-as ;
|
||||
|
||||
: vshuffle-elements ( u perm -- v )
|
||||
swap [ '[ _ nth ] ] keep map-as ;
|
||||
|
||||
: vshuffle-bytes ( u perm -- v )
|
||||
underlying>> [
|
||||
swap [ '[ _ nth ] ] keep map-as
|
||||
] curry change-underlying ;
|
||||
|
||||
GENERIC: vshuffle ( u perm -- v )
|
||||
M: array vshuffle ( u perm -- v )
|
||||
vshuffle-elements ; inline
|
||||
M: simd-128 vshuffle ( u perm -- v )
|
||||
vshuffle-bytes ; inline
|
||||
|
||||
: vlshift ( u n -- w ) '[ _ shift ] map ;
|
||||
: vrshift ( u n -- w ) neg '[ _ shift ] map ;
|
||||
|
@ -107,9 +123,9 @@ PRIVATE>
|
|||
: vxor ( u v -- w ) over '[ [ _ element>bool ] bi@ xor ] 2map ;
|
||||
: vnot ( u -- w ) dup '[ _ element>bool not ] map ;
|
||||
|
||||
: vall? ( v -- ? ) [ ] all? ;
|
||||
: vany? ( v -- ? ) [ ] any? ;
|
||||
: vnone? ( v -- ? ) [ not ] all? ;
|
||||
: vall? ( v -- ? ) dup '[ _ element>bool ] all? ;
|
||||
: vany? ( v -- ? ) dup '[ _ element>bool ] any? ;
|
||||
: vnone? ( v -- ? ) dup '[ _ element>bool not ] all? ;
|
||||
|
||||
: v< ( u v -- w ) [ < ] 2map ;
|
||||
: v<= ( u v -- w ) [ <= ] 2map ;
|
||||
|
|
|
@ -2,7 +2,8 @@
|
|||
! See http://factorcode.org/license.txt for BSD license.
|
||||
USING: accessors alien alien.c-types alien.data alien.parser
|
||||
assocs byte-arrays classes compiler.units functors kernel lexer
|
||||
libc math math.vectors math.vectors.specialization namespaces
|
||||
libc math math.vectors math.vectors.private
|
||||
math.vectors.specialization namespaces
|
||||
parser prettyprint.custom sequences sequences.private strings
|
||||
summary vocabs vocabs.loader vocabs.parser vocabs.generated
|
||||
words fry combinators present ;
|
||||
|
@ -68,6 +69,8 @@ TUPLE: A
|
|||
[ drop \ T bad-byte-array-length ] unless
|
||||
<direct-A> ; inline
|
||||
|
||||
M: A new-underlying drop byte-array>A ;
|
||||
|
||||
M: A clone [ underlying>> clone ] [ length>> ] bi <direct-A> ; inline
|
||||
|
||||
M: A length length>> ; inline
|
||||
|
|
|
@ -129,17 +129,18 @@ ARTICLE: "tuple-inheritance-example" "Tuple subclassing example"
|
|||
}
|
||||
"Rectangles and parallelograms use the same algorithm for computing the area, whereas they use different algorithms for computing perimiter. Also, rectangles and parallelograms both have " { $snippet "width" } " and " { $snippet "height" } " slots. We can exploit this with subclassing:"
|
||||
{ $code
|
||||
"USING: accessors kernel math math.constants math.functions ;"
|
||||
"GENERIC: area ( shape -- n )"
|
||||
"GENERIC: perimiter ( shape -- n )"
|
||||
""
|
||||
"TUPLE: shape ;"
|
||||
""
|
||||
"TUPLE: circle < shape radius ;"
|
||||
"M: area circle radius>> sq pi * ;"
|
||||
"M: perimiter circle radius>> 2 * pi * ;"
|
||||
"M: circle area radius>> sq pi * ;"
|
||||
"M: circle perimiter radius>> 2 * pi * ;"
|
||||
""
|
||||
"TUPLE: quad < shape width height"
|
||||
"M: area quad [ width>> ] [ height>> ] bi * ;"
|
||||
"TUPLE: quad < shape width height ;"
|
||||
"M: quad area [ width>> ] [ height>> ] bi * ;"
|
||||
""
|
||||
"TUPLE: rectangle < quad ;"
|
||||
"M: rectangle perimiter [ width>> 2 * ] [ height>> 2 * ] bi + ;"
|
||||
|
|
|
@ -52,10 +52,10 @@ IN: math.matrices.simd.tests
|
|||
[
|
||||
S{ matrix4 f
|
||||
float-4-array{
|
||||
float-4{ 1.0 0.0 0.0 3.0 }
|
||||
float-4{ 0.0 1.0 0.0 4.0 }
|
||||
float-4{ 0.0 0.0 1.0 2.0 }
|
||||
float-4{ 0.0 0.0 0.0 1.0 }
|
||||
float-4{ 1.0 0.0 0.0 0.0 }
|
||||
float-4{ 0.0 1.0 0.0 0.0 }
|
||||
float-4{ 0.0 0.0 1.0 0.0 }
|
||||
float-4{ 3.0 4.0 2.0 1.0 }
|
||||
}
|
||||
}
|
||||
] [ float-4{ 3.0 4.0 2.0 0.0 } translation-matrix4 ] unit-test
|
||||
|
@ -77,9 +77,9 @@ IN: math.matrices.simd.tests
|
|||
float-4{ 0.0 1.0 0.0 1.0 } pi 1/2. * rotation-matrix4
|
||||
S{ matrix4 f
|
||||
float-4-array{
|
||||
float-4{ 0.0 0.0 1.0 0.0 }
|
||||
float-4{ 0.0 0.0 -1.0 0.0 }
|
||||
float-4{ 0.0 1.0 0.0 0.0 }
|
||||
float-4{ -1.0 0.0 0.0 0.0 }
|
||||
float-4{ 1.0 0.0 0.0 0.0 }
|
||||
float-4{ 0.0 0.0 0.0 1.0 }
|
||||
}
|
||||
}
|
||||
|
@ -89,10 +89,10 @@ IN: math.matrices.simd.tests
|
|||
[
|
||||
S{ matrix4 f
|
||||
float-4-array{
|
||||
float-4{ 2.0 0.0 0.0 10.0 }
|
||||
float-4{ 0.0 3.0 0.0 18.0 }
|
||||
float-4{ 0.0 0.0 4.0 28.0 }
|
||||
float-4{ 0.0 0.0 0.0 1.0 }
|
||||
float-4{ 2.0 0.0 0.0 0.0 }
|
||||
float-4{ 0.0 3.0 0.0 0.0 }
|
||||
float-4{ 0.0 0.0 4.0 0.0 }
|
||||
float-4{ 10.0 18.0 28.0 1.0 }
|
||||
}
|
||||
}
|
||||
] [
|
||||
|
@ -106,10 +106,10 @@ IN: math.matrices.simd.tests
|
|||
}
|
||||
S{ matrix4 f
|
||||
float-4-array{
|
||||
float-4{ 1.0 0.0 0.0 5.0 }
|
||||
float-4{ 0.0 1.0 0.0 6.0 }
|
||||
float-4{ 0.0 0.0 1.0 7.0 }
|
||||
float-4{ 0.0 0.0 0.0 1.0 }
|
||||
float-4{ 1.0 0.0 0.0 0.0 }
|
||||
float-4{ 0.0 1.0 0.0 0.0 }
|
||||
float-4{ 0.0 0.0 1.0 0.0 }
|
||||
float-4{ 5.0 6.0 7.0 1.0 }
|
||||
}
|
||||
}
|
||||
m4.
|
||||
|
@ -118,10 +118,10 @@ IN: math.matrices.simd.tests
|
|||
[
|
||||
S{ matrix4 f
|
||||
float-4-array{
|
||||
float-4{ 3.0 0.0 0.0 5.0 }
|
||||
float-4{ 0.0 4.0 0.0 6.0 }
|
||||
float-4{ 0.0 0.0 5.0 7.0 }
|
||||
float-4{ 0.0 0.0 0.0 2.0 }
|
||||
float-4{ 3.0 0.0 0.0 0.0 }
|
||||
float-4{ 0.0 4.0 0.0 0.0 }
|
||||
float-4{ 0.0 0.0 5.0 0.0 }
|
||||
float-4{ 5.0 6.0 7.0 2.0 }
|
||||
}
|
||||
}
|
||||
] [
|
||||
|
@ -135,10 +135,10 @@ IN: math.matrices.simd.tests
|
|||
}
|
||||
S{ matrix4 f
|
||||
float-4-array{
|
||||
float-4{ 1.0 0.0 0.0 5.0 }
|
||||
float-4{ 0.0 1.0 0.0 6.0 }
|
||||
float-4{ 0.0 0.0 1.0 7.0 }
|
||||
float-4{ 0.0 0.0 0.0 1.0 }
|
||||
float-4{ 1.0 0.0 0.0 0.0 }
|
||||
float-4{ 0.0 1.0 0.0 0.0 }
|
||||
float-4{ 0.0 0.0 1.0 0.0 }
|
||||
float-4{ 5.0 6.0 7.0 1.0 }
|
||||
}
|
||||
}
|
||||
m4+
|
||||
|
@ -147,10 +147,10 @@ IN: math.matrices.simd.tests
|
|||
[
|
||||
S{ matrix4 f
|
||||
float-4-array{
|
||||
float-4{ 1.0 0.0 0.0 -5.0 }
|
||||
float-4{ 0.0 2.0 0.0 -6.0 }
|
||||
float-4{ 0.0 0.0 3.0 -7.0 }
|
||||
float-4{ 0.0 0.0 0.0 0.0 }
|
||||
float-4{ 1.0 0.0 0.0 0.0 }
|
||||
float-4{ 0.0 2.0 0.0 0.0 }
|
||||
float-4{ 0.0 0.0 3.0 0.0 }
|
||||
float-4{ -5.0 -6.0 -7.0 0.0 }
|
||||
}
|
||||
}
|
||||
] [
|
||||
|
@ -164,10 +164,10 @@ IN: math.matrices.simd.tests
|
|||
}
|
||||
S{ matrix4 f
|
||||
float-4-array{
|
||||
float-4{ 1.0 0.0 0.0 5.0 }
|
||||
float-4{ 0.0 1.0 0.0 6.0 }
|
||||
float-4{ 0.0 0.0 1.0 7.0 }
|
||||
float-4{ 0.0 0.0 0.0 1.0 }
|
||||
float-4{ 1.0 0.0 0.0 0.0 }
|
||||
float-4{ 0.0 1.0 0.0 0.0 }
|
||||
float-4{ 0.0 0.0 1.0 0.0 }
|
||||
float-4{ 5.0 6.0 7.0 1.0 }
|
||||
}
|
||||
}
|
||||
m4-
|
||||
|
@ -219,10 +219,10 @@ IN: math.matrices.simd.tests
|
|||
[
|
||||
S{ matrix4 f
|
||||
float-4-array{
|
||||
float-4{ 1/2. 0.0 0.0 0.0 }
|
||||
float-4{ 0.0 1/2. 0.0 0.0 }
|
||||
float-4{ 0.0 0.0 -6/4. -10/4. }
|
||||
float-4{ 0.0 0.0 -1.0 0.0 }
|
||||
float-4{ 1/2. 0.0 0.0 0.0 }
|
||||
float-4{ 0.0 1/2. 0.0 0.0 }
|
||||
float-4{ 0.0 0.0 -6/4. -1.0 }
|
||||
float-4{ 0.0 0.0 -10/4. 0.0 }
|
||||
}
|
||||
}
|
||||
] [
|
||||
|
|
|
@ -9,34 +9,34 @@ SPECIALIZED-ARRAY: float-4
|
|||
IN: math.matrices.simd
|
||||
|
||||
STRUCT: matrix4
|
||||
{ rows float-4[4] } ;
|
||||
{ columns float-4[4] } ;
|
||||
|
||||
INSTANCE: matrix4 immutable-sequence
|
||||
|
||||
M: matrix4 length drop 4 ; inline
|
||||
M: matrix4 nth-unsafe rows>> nth-unsafe ; inline
|
||||
M: matrix4 nth-unsafe columns>> nth-unsafe ; inline
|
||||
M: matrix4 new-sequence 2drop matrix4 (struct) ; inline
|
||||
|
||||
<PRIVATE
|
||||
|
||||
: rows ( a -- a1 a2 a3 a4 )
|
||||
rows>> 4 firstn ; inline
|
||||
: columns ( a -- a1 a2 a3 a4 )
|
||||
columns>> 4 firstn ; inline
|
||||
|
||||
:: set-rows ( c1 c2 c3 c4 c -- c )
|
||||
c rows>> :> rows
|
||||
c1 rows set-first
|
||||
c2 rows set-second
|
||||
c3 rows set-third
|
||||
c4 rows set-fourth
|
||||
:: set-columns ( c1 c2 c3 c4 c -- c )
|
||||
c columns>> :> columns
|
||||
c1 columns set-first
|
||||
c2 columns set-second
|
||||
c3 columns set-third
|
||||
c4 columns set-fourth
|
||||
c ; inline
|
||||
|
||||
: make-matrix4 ( quot: ( -- c1 c2 c3 c4 ) -- c )
|
||||
matrix4 (struct) swap dip set-rows ; inline
|
||||
matrix4 (struct) swap dip set-columns ; inline
|
||||
|
||||
:: 2map-rows ( a b quot -- c )
|
||||
:: 2map-columns ( a b quot -- c )
|
||||
[
|
||||
a rows :> a4 :> a3 :> a2 :> a1
|
||||
b rows :> b4 :> b3 :> b2 :> b1
|
||||
a columns :> a4 :> a3 :> a2 :> a1
|
||||
b columns :> b4 :> b3 :> b2 :> b1
|
||||
|
||||
a1 b1 quot call
|
||||
a2 b2 quot call
|
||||
|
@ -44,57 +44,57 @@ M: matrix4 new-sequence 2drop matrix4 (struct) ; inline
|
|||
a4 b4 quot call
|
||||
] make-matrix4 ; inline
|
||||
|
||||
: map-rows ( a quot -- c )
|
||||
'[ rows _ 4 napply ] make-matrix4 ; inline
|
||||
: map-columns ( a quot -- c )
|
||||
'[ columns _ 4 napply ] make-matrix4 ; inline
|
||||
|
||||
PRIVATE>
|
||||
|
||||
TYPED: m4+ ( a: matrix4 b: matrix4 -- c: matrix4 ) [ v+ ] 2map-rows ;
|
||||
TYPED: m4- ( a: matrix4 b: matrix4 -- c: matrix4 ) [ v- ] 2map-rows ;
|
||||
TYPED: m4* ( a: matrix4 b: matrix4 -- c: matrix4 ) [ v* ] 2map-rows ;
|
||||
TYPED: m4/ ( a: matrix4 b: matrix4 -- c: matrix4 ) [ v/ ] 2map-rows ;
|
||||
TYPED: m4+ ( a: matrix4 b: matrix4 -- c: matrix4 ) [ v+ ] 2map-columns ;
|
||||
TYPED: m4- ( a: matrix4 b: matrix4 -- c: matrix4 ) [ v- ] 2map-columns ;
|
||||
TYPED: m4* ( a: matrix4 b: matrix4 -- c: matrix4 ) [ v* ] 2map-columns ;
|
||||
TYPED: m4/ ( a: matrix4 b: matrix4 -- c: matrix4 ) [ v/ ] 2map-columns ;
|
||||
|
||||
TYPED: m4*n ( a: matrix4 b: float -- c: matrix4 ) [ v*n ] curry map-rows ;
|
||||
TYPED: m4/n ( a: matrix4 b: float -- c: matrix4 ) [ v/n ] curry map-rows ;
|
||||
TYPED: n*m4 ( a: float b: matrix4 -- c: matrix4 ) [ n*v ] with map-rows ;
|
||||
TYPED: n/m4 ( a: float b: matrix4 -- c: matrix4 ) [ n/v ] with map-rows ;
|
||||
TYPED: m4*n ( a: matrix4 b: float -- c: matrix4 ) [ v*n ] curry map-columns ;
|
||||
TYPED: m4/n ( a: matrix4 b: float -- c: matrix4 ) [ v/n ] curry map-columns ;
|
||||
TYPED: n*m4 ( a: float b: matrix4 -- c: matrix4 ) [ n*v ] with map-columns ;
|
||||
TYPED: n/m4 ( a: float b: matrix4 -- c: matrix4 ) [ n/v ] with map-columns ;
|
||||
|
||||
TYPED:: m4. ( a: matrix4 b: matrix4 -- c: matrix4 )
|
||||
[
|
||||
a rows :> a4 :> a3 :> a2 :> a1
|
||||
b rows :> b4 :> b3 :> b2 :> b1
|
||||
a columns :> a4 :> a3 :> a2 :> a1
|
||||
b columns :> b4 :> b3 :> b2 :> b1
|
||||
|
||||
a1 first b1 n*v :> c1a
|
||||
a2 first b1 n*v :> c2a
|
||||
a3 first b1 n*v :> c3a
|
||||
a4 first b1 n*v :> c4a
|
||||
b1 first a1 n*v :> c1a
|
||||
b2 first a1 n*v :> c2a
|
||||
b3 first a1 n*v :> c3a
|
||||
b4 first a1 n*v :> c4a
|
||||
|
||||
a1 second b2 n*v c1a v+ :> c1b
|
||||
a2 second b2 n*v c2a v+ :> c2b
|
||||
a3 second b2 n*v c3a v+ :> c3b
|
||||
a4 second b2 n*v c4a v+ :> c4b
|
||||
b1 second a2 n*v c1a v+ :> c1b
|
||||
b2 second a2 n*v c2a v+ :> c2b
|
||||
b3 second a2 n*v c3a v+ :> c3b
|
||||
b4 second a2 n*v c4a v+ :> c4b
|
||||
|
||||
a1 third b3 n*v c1b v+ :> c1c
|
||||
a2 third b3 n*v c2b v+ :> c2c
|
||||
a3 third b3 n*v c3b v+ :> c3c
|
||||
a4 third b3 n*v c4b v+ :> c4c
|
||||
b1 third a3 n*v c1b v+ :> c1c
|
||||
b2 third a3 n*v c2b v+ :> c2c
|
||||
b3 third a3 n*v c3b v+ :> c3c
|
||||
b4 third a3 n*v c4b v+ :> c4c
|
||||
|
||||
a1 fourth b4 n*v c1c v+
|
||||
a2 fourth b4 n*v c2c v+
|
||||
a3 fourth b4 n*v c3c v+
|
||||
a4 fourth b4 n*v c4c v+
|
||||
b1 fourth a4 n*v c1c v+
|
||||
b2 fourth a4 n*v c2c v+
|
||||
b3 fourth a4 n*v c3c v+
|
||||
b4 fourth a4 n*v c4c v+
|
||||
] make-matrix4 ;
|
||||
|
||||
TYPED:: v.m4 ( a: float-4 b: matrix4 -- c: float-4 )
|
||||
b rows :> b4 :> b3 :> b2 :> b1
|
||||
TYPED:: m4.v ( m: matrix4 v: float-4 -- v': float-4 )
|
||||
m columns :> m4 :> m3 :> m2 :> m1
|
||||
|
||||
a first b1 n*v
|
||||
a second b2 n*v v+
|
||||
a third b3 n*v v+
|
||||
a fourth b4 n*v v+ ;
|
||||
v first m1 n*v
|
||||
v second m2 n*v v+
|
||||
v third m3 n*v v+
|
||||
v fourth m4 n*v v+ ;
|
||||
|
||||
TYPED:: m4.v ( a: matrix4 b: float-4 -- c: float-4 )
|
||||
a rows [ b v. ] 4 napply float-4-boa ;
|
||||
TYPED:: v.m4 ( v: float-4 m: matrix4 -- c: float-4 )
|
||||
m columns [ v v. ] 4 napply float-4-boa ;
|
||||
|
||||
CONSTANT: identity-matrix4
|
||||
S{ matrix4 f
|
||||
|
@ -131,37 +131,37 @@ TYPED: diagonal-matrix4 ( diagonal: float-4 -- matrix: matrix4 )
|
|||
[ (vmerge) ] bi-curry@ bi* ; inline
|
||||
|
||||
TYPED: transpose-matrix4 ( matrix: matrix4 -- matrix: matrix4 )
|
||||
[ rows vmerge-transpose vmerge-transpose ] make-matrix4 ;
|
||||
[ columns vmerge-transpose vmerge-transpose ] make-matrix4 ;
|
||||
|
||||
: linear>homogeneous ( v -- v' )
|
||||
[ float-4{ t t t f } ] dip float-4{ 0.0 0.0 0.0 1.0 } v? ; inline
|
||||
|
||||
: scale-matrix4 ( factors -- matrix )
|
||||
[ float-4{ t t t f } ] dip float-4{ 0.0 0.0 0.0 1.0 } v?
|
||||
diagonal-matrix4 ; inline
|
||||
linear>homogeneous diagonal-matrix4 ; inline
|
||||
|
||||
: ortho-matrix4 ( factors -- matrix )
|
||||
float-4{ 1.0 1.0 1.0 1.0 } swap v/ scale-matrix4 ; inline
|
||||
|
||||
TYPED:: translation-matrix4 ( offset: float-4 -- matrix: matrix4 )
|
||||
TYPED: translation-matrix4 ( offset: float-4 -- matrix: matrix4 )
|
||||
[
|
||||
float-4{ 1.0 1.0 1.0 1.0 } :> diagonal
|
||||
|
||||
offset 0 float-4-with (vmerge)
|
||||
[ 0 float-4-with swap (vmerge) ] bi@ drop :> z :> y :> x
|
||||
|
||||
diagonal y vmerge-diagonal*
|
||||
[ x vmerge-diagonal* ]
|
||||
[ z vmerge-diagonal* ] bi*
|
||||
linear>homogeneous
|
||||
[
|
||||
float-4{ 1.0 0.0 0.0 0.0 }
|
||||
float-4{ 0.0 1.0 0.0 0.0 }
|
||||
float-4{ 0.0 0.0 1.0 0.0 }
|
||||
] dip
|
||||
] make-matrix4 ;
|
||||
|
||||
TYPED:: rotation-matrix4 ( axis: float-4 theta: float -- matrix: matrix4 )
|
||||
! x*x + c*(1.0 - x*x) x*y*(1.0 - c) - s*z x*z*(1.0 - c) + s*y 0
|
||||
! x*y*(1.0 - c) + s*z y*y + c*(1.0 - y*y) y*z*(1.0 - c) - s*x 0
|
||||
! x*z*(1.0 - c) - s*y y*z*(1.0 - c) + s*x z*z + c*(1.0 - z*z) 0
|
||||
! x*x + c*(1.0 - x*x) x*y*(1.0 - c) + s*z x*z*(1.0 - c) - s*y 0
|
||||
! x*y*(1.0 - c) - s*z y*y + c*(1.0 - y*y) y*z*(1.0 - c) + s*x 0
|
||||
! x*z*(1.0 - c) + s*y y*z*(1.0 - c) - s*x z*z + c*(1.0 - z*z) 0
|
||||
! 0 0 0 1
|
||||
matrix4 (struct) :> triangle-m
|
||||
theta cos :> c
|
||||
theta sin :> s
|
||||
|
||||
float-4{ 1.0 -1.0 1.0 0.0 } :> triangle-sign
|
||||
float-4{ -1.0 1.0 -1.0 0.0 } :> triangle-sign
|
||||
|
||||
c float-4-with :> cc
|
||||
s float-4-with :> ss
|
||||
|
@ -184,7 +184,7 @@ TYPED:: rotation-matrix4 ( axis: float-4 theta: float -- matrix: matrix4 )
|
|||
triangle-lo { 1 0 3 3 } vshuffle
|
||||
float-4 new
|
||||
|
||||
triangle-m set-rows drop
|
||||
triangle-m set-columns drop
|
||||
|
||||
diagonal-m triangle-m m4+ ;
|
||||
|
||||
|
@ -194,8 +194,10 @@ TYPED:: frustum-matrix4 ( xy: float-4 near: float far: float -- matrix: matrix4
|
|||
float-4{ t t f f } xy near far - float-4-with v? ! denom
|
||||
v/ :> fov
|
||||
|
||||
fov 0.0 float-4-with (vmerge-head) vmerge-diagonal
|
||||
fov float-4{ f f t t } vand
|
||||
float-4{ 0.0 0.0 -1.0 0.0 }
|
||||
float-4{ 0.0 -1.0 0.0 0.0 } :> negone
|
||||
|
||||
fov vmerge-diagonal
|
||||
[ vmerge-diagonal ]
|
||||
[ negone (vmerge) ] bi*
|
||||
] make-matrix4 ;
|
||||
|
||||
|
|
Loading…
Reference in New Issue