Merge branch 'master' of git://factorcode.org/git/factor into new_gc

db4
Slava Pestov 2009-10-13 06:57:37 -05:00
commit e81e076935
22 changed files with 338 additions and 187 deletions

View File

@ -1,5 +1,5 @@
USING: calendar.format calendar kernel math tools.test USING: calendar.format calendar kernel math tools.test
io.streams.string accessors io math.order ; io.streams.string accessors io math.order sequences ;
IN: calendar.format.tests IN: calendar.format.tests
[ 0 ] [ [ 0 ] [
@ -81,3 +81,5 @@ IN: calendar.format.tests
] [ "Thursday, 02-Oct-2008 23:59:59 GMT" cookie-string>timestamp ] unit-test ] [ "Thursday, 02-Oct-2008 23:59:59 GMT" cookie-string>timestamp ] unit-test
[ ]
[ { 2008 2009 } [ year. ] each ] unit-test

View File

@ -66,7 +66,7 @@ M: array month. ( pair -- )
[ month-name write bl number>string print ] [ month-name write bl number>string print ]
[ 1 zeller-congruence ] [ 1 zeller-congruence ]
[ (days-in-month) day-abbreviations2 " " join print ] 2tri [ (days-in-month) day-abbreviations2 " " join print ] 2tri
over " " <repetition> concat write over " " <repetition> "" concat-as write
[ [
[ 1 + day. ] keep [ 1 + day. ] keep
1 + + 7 mod zero? [ nl ] [ bl ] if 1 + + 7 mod zero? [ nl ] [ bl ] if

View File

@ -277,6 +277,11 @@ literal: rep ;
PURE-INSN: ##shuffle-vector PURE-INSN: ##shuffle-vector
def: dst def: dst
use: src shuffle
literal: rep ;
PURE-INSN: ##shuffle-vector-imm
def: dst
use: src use: src
literal: shuffle rep ; literal: shuffle rep ;

View File

@ -194,7 +194,8 @@ IN: compiler.cfg.intrinsics
{ math.vectors.simd.intrinsics:(simd-with) [ [ ^^with-vector ] emit-unary-vector-op ] } { math.vectors.simd.intrinsics:(simd-with) [ [ ^^with-vector ] emit-unary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-gather-2) [ emit-gather-vector-2 ] } { math.vectors.simd.intrinsics:(simd-gather-2) [ emit-gather-vector-2 ] }
{ math.vectors.simd.intrinsics:(simd-gather-4) [ emit-gather-vector-4 ] } { math.vectors.simd.intrinsics:(simd-gather-4) [ emit-gather-vector-4 ] }
{ math.vectors.simd.intrinsics:(simd-vshuffle) [ emit-shuffle-vector ] } { math.vectors.simd.intrinsics:(simd-vshuffle-elements) [ emit-shuffle-vector ] }
{ math.vectors.simd.intrinsics:(simd-vshuffle-bytes) [ emit-shuffle-vector-var ] }
{ math.vectors.simd.intrinsics:(simd-(vmerge-head)) [ [ ^^merge-vector-head ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-(vmerge-head)) [ [ ^^merge-vector-head ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-(vmerge-tail)) [ [ ^^merge-vector-tail ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-(vmerge-tail)) [ [ ^^merge-vector-tail ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-(v>float)) [ [ ^^integer>float-vector ] emit-unary-vector-op ] } { math.vectors.simd.intrinsics:(simd-(v>float)) [ [ ^^integer>float-vector ] emit-unary-vector-op ] }

View File

@ -1,15 +1,16 @@
! Copyright (C) 2009 Slava Pestov. ! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: accessors byte-arrays fry cpu.architecture kernel math USING: accessors alien byte-arrays fry classes.algebra
sequences math.vectors.simd.intrinsics macros generalizations cpu.architecture kernel math sequences math.vectors
combinators combinators.short-circuit arrays locals math.vectors.simd.intrinsics macros generalizations combinators
combinators.short-circuit arrays locals
compiler.tree.propagation.info compiler.cfg.builder.blocks compiler.tree.propagation.info compiler.cfg.builder.blocks
compiler.cfg.comparisons compiler.cfg.comparisons
compiler.cfg.stacks compiler.cfg.stacks.local compiler.cfg.hats compiler.cfg.stacks compiler.cfg.stacks.local compiler.cfg.hats
compiler.cfg.instructions compiler.cfg.registers compiler.cfg.instructions compiler.cfg.registers
compiler.cfg.intrinsics.alien compiler.cfg.intrinsics.alien
specialized-arrays ; specialized-arrays ;
FROM: alien.c-types => float double ; FROM: alien.c-types => heap-size char uchar float double ;
SPECIALIZED-ARRAYS: float double ; SPECIALIZED-ARRAYS: float double ;
IN: compiler.cfg.intrinsics.simd IN: compiler.cfg.intrinsics.simd
@ -21,7 +22,7 @@ MACRO: check-elements ( quots -- )
MACRO: if-literals-match ( quots -- ) MACRO: if-literals-match ( quots -- )
[ length ] [ ] [ length ] tri [ length ] [ ] [ length ] tri
! n quots n n ! n quots n
'[ '[
! node quot ! node quot
[ [
@ -77,15 +78,41 @@ MACRO: if-literals-match ( quots -- )
: shuffle? ( obj -- ? ) { [ array? ] [ [ integer? ] all? ] } 1&& ; : shuffle? ( obj -- ? ) { [ array? ] [ [ integer? ] all? ] } 1&& ;
: emit-shuffle-vector ( node -- ) : >variable-shuffle ( shuffle rep -- shuffle' )
! Pad the permutation with zeroes if its too short, since we rep-component-type heap-size
[ dup <repetition> >byte-array ]
[ iota >byte-array ] bi
'[ _ n*v _ v+ ] map concat ;
: generate-shuffle-vector-imm ( src shuffle rep -- dst )
dup %shuffle-vector-imm-reps member?
[ ^^shuffle-vector-imm ]
[
[ >variable-shuffle ^^load-constant ] keep
^^shuffle-vector
] if ;
: emit-shuffle-vector-imm ( node -- )
! Pad the permutation with zeroes if it's too short, since we
! can't throw an error at this point. ! can't throw an error at this point.
[ [ rep-components 0 pad-tail ] keep ^^shuffle-vector ] [unary/param] [ [ rep-components 0 pad-tail ] keep generate-shuffle-vector-imm ] [unary/param]
{ [ shuffle? ] [ representation? ] } if-literals-match ; { [ shuffle? ] [ representation? ] } if-literals-match ;
: emit-shuffle-vector-var ( node -- )
[ ^^shuffle-vector ] [binary]
{ [ %shuffle-vector-reps member? ] } if-literals-match ;
: emit-shuffle-vector ( node -- )
dup node-input-infos {
[ length 3 = ]
[ first class>> byte-array class<= ]
[ second class>> byte-array class<= ]
[ third literal>> representation? ]
} 1&& [ emit-shuffle-vector-var ] [ emit-shuffle-vector-imm ] if ;
: ^^broadcast-vector ( src n rep -- dst ) : ^^broadcast-vector ( src n rep -- dst )
[ rep-components swap <array> ] keep [ rep-components swap <array> ] keep
^^shuffle-vector ; generate-shuffle-vector-imm ;
: emit-broadcast-vector ( node -- ) : emit-broadcast-vector ( node -- )
[ ^^broadcast-vector ] [unary/param] [ ^^broadcast-vector ] [unary/param]
@ -101,6 +128,9 @@ MACRO: if-literals-match ( quots -- )
[ ^^select-vector ] [unary/param] [ ^^select-vector ] [unary/param]
{ [ integer? ] [ representation? ] } if-literals-match ; inline { [ integer? ] [ representation? ] } if-literals-match ; inline
: emit-alien-vector-op ( node quot: ( rep -- ) -- )
{ [ %alien-vector-reps member? ] } if-literals-match ; inline
: emit-alien-vector ( node -- ) : emit-alien-vector ( node -- )
dup [ dup [
'[ '[
@ -108,7 +138,7 @@ MACRO: if-literals-match ( quots -- )
_ ^^alien-vector ds-push _ ^^alien-vector ds-push
] ]
[ inline-alien-getter? ] inline-alien [ inline-alien-getter? ] inline-alien
] with emit-vector-op ; ] with emit-alien-vector-op ;
: emit-set-alien-vector ( node -- ) : emit-set-alien-vector ( node -- )
dup [ dup [
@ -118,7 +148,7 @@ MACRO: if-literals-match ( quots -- )
] ]
[ byte-array inline-alien-setter? ] [ byte-array inline-alien-setter? ]
inline-alien inline-alien
] with emit-vector-op ; ] with emit-alien-vector-op ;
: generate-not-vector ( src rep -- dst ) : generate-not-vector ( src rep -- dst )
dup %not-vector-reps member? dup %not-vector-reps member?

View File

@ -450,26 +450,26 @@ M: ##set-alien-vector rewrite rewrite-alien-addressing ;
! Some lame constant folding for SIMD intrinsics. Eventually this ! Some lame constant folding for SIMD intrinsics. Eventually this
! should be redone completely. ! should be redone completely.
: rewrite-shuffle-vector ( insn expr -- insn' ) : rewrite-shuffle-vector-imm ( insn expr -- insn' )
2dup [ rep>> ] bi@ eq? [ 2dup [ rep>> ] bi@ eq? [
[ [ dst>> ] [ src>> vn>vreg ] bi* ] [ [ dst>> ] [ src>> vn>vreg ] bi* ]
[ [ shuffle>> ] bi@ nths ] [ [ shuffle>> ] bi@ nths ]
[ drop rep>> ] [ drop rep>> ]
2tri \ ##shuffle-vector new-insn 2tri \ ##shuffle-vector-imm new-insn
] [ 2drop f ] if ; ] [ 2drop f ] if ;
: (fold-shuffle-vector) ( shuffle bytes -- bytes' ) : (fold-shuffle-vector-imm) ( shuffle bytes -- bytes' )
2dup length swap length /i group nths concat ; 2dup length swap length /i group nths concat ;
: fold-shuffle-vector ( insn expr -- insn' ) : fold-shuffle-vector-imm ( insn expr -- insn' )
[ [ dst>> ] [ shuffle>> ] bi ] dip value>> [ [ dst>> ] [ shuffle>> ] bi ] dip value>>
(fold-shuffle-vector) \ ##load-constant new-insn ; (fold-shuffle-vector-imm) \ ##load-constant new-insn ;
M: ##shuffle-vector rewrite M: ##shuffle-vector-imm rewrite
dup src>> vreg>expr { dup src>> vreg>expr {
{ [ dup shuffle-vector-expr? ] [ rewrite-shuffle-vector ] } { [ dup shuffle-vector-imm-expr? ] [ rewrite-shuffle-vector-imm ] }
{ [ dup reference-expr? ] [ fold-shuffle-vector ] } { [ dup reference-expr? ] [ fold-shuffle-vector-imm ] }
{ [ dup constant-expr? ] [ fold-shuffle-vector ] } { [ dup constant-expr? ] [ fold-shuffle-vector-imm ] }
[ 2drop f ] [ 2drop f ]
} cond ; } cond ;

View File

@ -136,7 +136,7 @@ M: scalar>vector-expr simplify*
[ drop f ] [ drop f ]
} cond ; } cond ;
M: shuffle-vector-expr simplify* M: shuffle-vector-imm-expr simplify*
[ src>> ] [ shuffle>> ] [ rep>> rep-components iota ] tri [ src>> ] [ shuffle>> ] [ rep>> rep-components iota ] tri
sequence= [ drop f ] unless ; sequence= [ drop f ] unless ;

View File

@ -1215,31 +1215,31 @@ cell 8 = [
} }
] [ ] [
{ {
T{ ##shuffle-vector f 1 0 { 0 1 2 3 } float-4-rep } T{ ##shuffle-vector-imm f 1 0 { 0 1 2 3 } float-4-rep }
} value-numbering-step } value-numbering-step
] unit-test ] unit-test
[ [
{ {
T{ ##shuffle-vector f 1 0 { 1 2 3 0 } float-4-rep } T{ ##shuffle-vector-imm f 1 0 { 1 2 3 0 } float-4-rep }
T{ ##shuffle-vector f 2 0 { 0 2 3 1 } float-4-rep } T{ ##shuffle-vector-imm f 2 0 { 0 2 3 1 } float-4-rep }
} }
] [ ] [
{ {
T{ ##shuffle-vector f 1 0 { 1 2 3 0 } float-4-rep } T{ ##shuffle-vector-imm f 1 0 { 1 2 3 0 } float-4-rep }
T{ ##shuffle-vector f 2 1 { 3 1 2 0 } float-4-rep } T{ ##shuffle-vector-imm f 2 1 { 3 1 2 0 } float-4-rep }
} value-numbering-step } value-numbering-step
] unit-test ] unit-test
[ [
{ {
T{ ##shuffle-vector f 1 0 { 1 2 3 0 } float-4-rep } T{ ##shuffle-vector-imm f 1 0 { 1 2 3 0 } float-4-rep }
T{ ##shuffle-vector f 2 1 { 1 0 } double-2-rep } T{ ##shuffle-vector-imm f 2 1 { 1 0 } double-2-rep }
} }
] [ ] [
{ {
T{ ##shuffle-vector f 1 0 { 1 2 3 0 } float-4-rep } T{ ##shuffle-vector-imm f 1 0 { 1 2 3 0 } float-4-rep }
T{ ##shuffle-vector f 2 1 { 1 0 } double-2-rep } T{ ##shuffle-vector-imm f 2 1 { 1 0 } double-2-rep }
} value-numbering-step } value-numbering-step
] unit-test ] unit-test
@ -1253,7 +1253,7 @@ cell 8 = [
{ {
T{ ##load-constant f 0 $[ 55 tag-fixnum ] } T{ ##load-constant f 0 $[ 55 tag-fixnum ] }
T{ ##scalar>vector f 1 0 int-4-rep } T{ ##scalar>vector f 1 0 int-4-rep }
T{ ##shuffle-vector f 2 1 { 0 0 0 0 } float-4-rep } T{ ##shuffle-vector-imm f 2 1 { 0 0 0 0 } float-4-rep }
} value-numbering-step } value-numbering-step
] unit-test ] unit-test
@ -1267,7 +1267,7 @@ cell 8 = [
{ {
T{ ##load-constant f 0 1.25 } T{ ##load-constant f 0 1.25 }
T{ ##scalar>vector f 1 0 float-4-rep } T{ ##scalar>vector f 1 0 float-4-rep }
T{ ##shuffle-vector f 2 1 { 0 0 0 0 } float-4-rep } T{ ##shuffle-vector-imm f 2 1 { 0 0 0 0 } float-4-rep }
} value-numbering-step } value-numbering-step
] unit-test ] unit-test

View File

@ -154,6 +154,7 @@ CODEGEN: ##zero-vector %zero-vector
CODEGEN: ##fill-vector %fill-vector CODEGEN: ##fill-vector %fill-vector
CODEGEN: ##gather-vector-2 %gather-vector-2 CODEGEN: ##gather-vector-2 %gather-vector-2
CODEGEN: ##gather-vector-4 %gather-vector-4 CODEGEN: ##gather-vector-4 %gather-vector-4
CODEGEN: ##shuffle-vector-imm %shuffle-vector-imm
CODEGEN: ##shuffle-vector %shuffle-vector CODEGEN: ##shuffle-vector %shuffle-vector
CODEGEN: ##tail>head-vector %tail>head-vector CODEGEN: ##tail>head-vector %tail>head-vector
CODEGEN: ##merge-vector-head %merge-vector-head CODEGEN: ##merge-vector-head %merge-vector-head

View File

@ -31,7 +31,8 @@ IN: compiler.tree.propagation.simd
(simd-vrshift) (simd-vrshift)
(simd-hlshift) (simd-hlshift)
(simd-hrshift) (simd-hrshift)
(simd-vshuffle) (simd-vshuffle-bytes)
(simd-vshuffle-elements)
(simd-(vmerge-head)) (simd-(vmerge-head))
(simd-(vmerge-tail)) (simd-(vmerge-tail))
(simd-(v>float)) (simd-(v>float))

View File

@ -242,6 +242,7 @@ HOOK: %fill-vector cpu ( dst rep -- )
HOOK: %gather-vector-2 cpu ( dst src1 src2 rep -- ) HOOK: %gather-vector-2 cpu ( dst src1 src2 rep -- )
HOOK: %gather-vector-4 cpu ( dst src1 src2 src3 src4 rep -- ) HOOK: %gather-vector-4 cpu ( dst src1 src2 src3 src4 rep -- )
HOOK: %shuffle-vector cpu ( dst src shuffle rep -- ) HOOK: %shuffle-vector cpu ( dst src shuffle rep -- )
HOOK: %shuffle-vector-imm cpu ( dst src shuffle rep -- )
HOOK: %tail>head-vector cpu ( dst src rep -- ) HOOK: %tail>head-vector cpu ( dst src rep -- )
HOOK: %merge-vector-head cpu ( dst src1 src2 rep -- ) HOOK: %merge-vector-head cpu ( dst src1 src2 rep -- )
HOOK: %merge-vector-tail cpu ( dst src1 src2 rep -- ) HOOK: %merge-vector-tail cpu ( dst src1 src2 rep -- )
@ -288,7 +289,9 @@ HOOK: %zero-vector-reps cpu ( -- reps )
HOOK: %fill-vector-reps cpu ( -- reps ) HOOK: %fill-vector-reps cpu ( -- reps )
HOOK: %gather-vector-2-reps cpu ( -- reps ) HOOK: %gather-vector-2-reps cpu ( -- reps )
HOOK: %gather-vector-4-reps cpu ( -- reps ) HOOK: %gather-vector-4-reps cpu ( -- reps )
HOOK: %alien-vector-reps cpu ( -- reps )
HOOK: %shuffle-vector-reps cpu ( -- reps ) HOOK: %shuffle-vector-reps cpu ( -- reps )
HOOK: %shuffle-vector-imm-reps cpu ( -- reps )
HOOK: %merge-vector-reps cpu ( -- reps ) HOOK: %merge-vector-reps cpu ( -- reps )
HOOK: %signed-pack-vector-reps cpu ( -- reps ) HOOK: %signed-pack-vector-reps cpu ( -- reps )
HOOK: %unsigned-pack-vector-reps cpu ( -- reps ) HOOK: %unsigned-pack-vector-reps cpu ( -- reps )
@ -328,7 +331,9 @@ M: object %zero-vector-reps { } ;
M: object %fill-vector-reps { } ; M: object %fill-vector-reps { } ;
M: object %gather-vector-2-reps { } ; M: object %gather-vector-2-reps { } ;
M: object %gather-vector-4-reps { } ; M: object %gather-vector-4-reps { } ;
M: object %alien-vector-reps { } ;
M: object %shuffle-vector-reps { } ; M: object %shuffle-vector-reps { } ;
M: object %shuffle-vector-imm-reps { } ;
M: object %merge-vector-reps { } ; M: object %merge-vector-reps { } ;
M: object %signed-pack-vector-reps { } ; M: object %signed-pack-vector-reps { } ;
M: object %unsigned-pack-vector-reps { } ; M: object %unsigned-pack-vector-reps { } ;

View File

@ -432,8 +432,13 @@ M: x86 %epilogue ( n -- ) cell - incr-stack-reg ;
temp 0 MOV \ t rc-absolute-cell rel-immediate temp 0 MOV \ t rc-absolute-cell rel-immediate
dst temp word execute ; inline dst temp word execute ; inline
: (%compare) ( src1 src2 cc -- )
2over [ { cc= cc/= } member? ] [ register? ] [ 0 = ] tri* and and
[ drop dup TEST ]
[ CMP ] if ;
M:: x86 %compare ( dst src1 src2 cc temp -- ) M:: x86 %compare ( dst src1 src2 cc temp -- )
src1 src2 CMP src1 src2 cc (%compare)
cc order-cc { cc order-cc {
{ cc< [ dst temp \ CMOVL %boolean ] } { cc< [ dst temp \ CMOVL %boolean ] }
{ cc<= [ dst temp \ CMOVLE %boolean ] } { cc<= [ dst temp \ CMOVLE %boolean ] }
@ -447,7 +452,7 @@ M: x86 %compare-imm ( dst src1 src2 cc temp -- )
%compare ; %compare ;
M:: x86 %compare-branch ( label src1 src2 cc -- ) M:: x86 %compare-branch ( label src1 src2 cc -- )
src1 src2 CMP src1 src2 cc (%compare)
cc order-cc { cc order-cc {
{ cc< [ label JL ] } { cc< [ label JL ] }
{ cc<= [ label JLE ] } { cc<= [ label JLE ] }
@ -562,6 +567,12 @@ MACRO: available-reps ( alist -- )
reverse [ { } ] suffix reverse [ { } ] suffix
'[ _ cond ] ; '[ _ cond ] ;
M: x86 %alien-vector-reps
{
{ sse? { float-4-rep } }
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
} available-reps ;
M: x86 %zero-vector M: x86 %zero-vector
{ {
{ double-2-rep [ dup XORPD ] } { double-2-rep [ dup XORPD ] }
@ -673,11 +684,9 @@ M: x86 %gather-vector-2-reps
[ dupd SHUFPD ] [ dupd SHUFPD ]
} case ; } case ;
: float-4-shuffle ( dst shuffle -- ) : sse1-float-4-shuffle ( dst shuffle -- )
{ {
{ { 0 1 2 3 } [ drop ] } { { 0 1 2 3 } [ drop ] }
{ { 0 0 2 2 } [ dup MOVSLDUP ] }
{ { 1 1 3 3 } [ dup MOVSHDUP ] }
{ { 0 1 0 1 } [ dup MOVLHPS ] } { { 0 1 0 1 } [ dup MOVLHPS ] }
{ { 2 3 2 3 } [ dup MOVHLPS ] } { { 2 3 2 3 } [ dup MOVHLPS ] }
{ { 0 0 1 1 } [ dup UNPCKLPS ] } { { 0 0 1 1 } [ dup UNPCKLPS ] }
@ -685,6 +694,15 @@ M: x86 %gather-vector-2-reps
[ dupd SHUFPS ] [ dupd SHUFPS ]
} case ; } case ;
: float-4-shuffle ( dst shuffle -- )
sse3? [
{
{ { 0 0 2 2 } [ dup MOVSLDUP ] }
{ { 1 1 3 3 } [ dup MOVSHDUP ] }
[ sse1-float-4-shuffle ]
} case
] [ sse1-float-4-shuffle ] if ;
: int-4-shuffle ( dst shuffle -- ) : int-4-shuffle ( dst shuffle -- )
{ {
{ { 0 1 2 3 } [ drop ] } { { 0 1 2 3 } [ drop ] }
@ -698,7 +716,7 @@ M: x86 %gather-vector-2-reps
: longlong-2-shuffle ( dst shuffle -- ) : longlong-2-shuffle ( dst shuffle -- )
first2 [ 2 * dup 1 + ] bi@ 4array int-4-shuffle ; first2 [ 2 * dup 1 + ] bi@ 4array int-4-shuffle ;
M:: x86 %shuffle-vector ( dst src shuffle rep -- ) M:: x86 %shuffle-vector-imm ( dst src shuffle rep -- )
dst src rep %copy dst src rep %copy
dst shuffle rep unsign-rep { dst shuffle rep unsign-rep {
{ double-2-rep [ double-2-shuffle ] } { double-2-rep [ double-2-shuffle ] }
@ -707,12 +725,20 @@ M:: x86 %shuffle-vector ( dst src shuffle rep -- )
{ longlong-2-rep [ longlong-2-shuffle ] } { longlong-2-rep [ longlong-2-shuffle ] }
} case ; } case ;
M: x86 %shuffle-vector-reps M: x86 %shuffle-vector-imm-reps
{ {
{ sse? { float-4-rep } } { sse? { float-4-rep } }
{ sse2? { double-2-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } } { sse2? { double-2-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
} available-reps ; } available-reps ;
M: x86 %shuffle-vector ( dst src shuffle rep -- )
two-operand PSHUFB ;
M: x86 %shuffle-vector-reps
{
{ ssse3? { float-4-rep double-2-rep longlong-2-rep ulonglong-2-rep int-4-rep uint-4-rep short-8-rep ushort-8-rep char-16-rep uchar-16-rep } }
} available-reps ;
M: x86 %merge-vector-head M: x86 %merge-vector-head
[ two-operand ] keep [ two-operand ] keep
unsign-rep { unsign-rep {
@ -790,8 +816,6 @@ M: x86 %unpack-vector-head-reps ( -- reps )
{ sse4.1? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } } { sse4.1? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
} available-reps ; } available-reps ;
M: x86 %unpack-vector-tail-reps ( -- reps ) { } ;
M: x86 %integer>float-vector ( dst src rep -- ) M: x86 %integer>float-vector ( dst src rep -- )
{ {
{ int-4-rep [ CVTDQ2PS ] } { int-4-rep [ CVTDQ2PS ] }
@ -1037,10 +1061,6 @@ M: x86 %mul-vector-reps
{ sse4.1? { int-4-rep uint-4-rep } } { sse4.1? { int-4-rep uint-4-rep } }
} available-reps ; } available-reps ;
M: x86 %saturated-mul-vector-reps
! No multiplication with saturation on x86
{ } ;
M: x86 %div-vector ( dst src1 src2 rep -- ) M: x86 %div-vector ( dst src1 src2 rep -- )
[ two-operand ] keep [ two-operand ] keep
{ {
@ -1223,8 +1243,6 @@ M: x86 %xor-vector-reps
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } } { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
} available-reps ; } available-reps ;
M: x86 %not-vector-reps { } ;
M: x86 %shl-vector ( dst src1 src2 rep -- ) M: x86 %shl-vector ( dst src1 src2 rep -- )
[ two-operand ] keep [ two-operand ] keep
{ {
@ -1271,6 +1289,30 @@ M:: x86 %scalar>integer ( dst src rep -- )
{ uint-scalar-rep [ { uint-scalar-rep [
dst 32-bit-version-of src MOVD dst 32-bit-version-of src MOVD
] } ] }
{ short-scalar-rep [
dst 32-bit-version-of src MOVD
dst dst 16-bit-version-of MOVSX
] }
{ ushort-scalar-rep [
dst 32-bit-version-of src MOVD
dst dst 16-bit-version-of MOVZX
] }
{ char-scalar-rep [
dst 32-bit-version-of src MOVD
dst { } 8 [| tmp-dst |
tmp-dst dst int-rep %copy
tmp-dst tmp-dst 8-bit-version-of MOVSX
dst tmp-dst int-rep %copy
] with-small-register
] }
{ uchar-scalar-rep [
dst 32-bit-version-of src MOVD
dst { } 8 [| tmp-dst |
tmp-dst dst int-rep %copy
tmp-dst tmp-dst 8-bit-version-of MOVZX
dst tmp-dst int-rep %copy
] with-small-register
] }
} case ; } case ;
M: x86 %vector>scalar %copy ; M: x86 %vector>scalar %copy ;

View File

@ -60,7 +60,7 @@ MACRO: simd-boa ( rep class -- simd-array )
[ rep-components ] [ new ] [ '[ _ ] ] tri* swap replicate-as ; inline [ rep-components ] [ new ] [ '[ _ ] ] tri* swap replicate-as ; inline
: simd-with/nth-fast? ( rep -- ? ) : simd-with/nth-fast? ( rep -- ? )
[ \ (simd-vshuffle) supported-simd-op? ] [ \ (simd-vshuffle-elements) supported-simd-op? ]
[ rep-component-type can-be-unboxed? ] [ rep-component-type can-be-unboxed? ]
bi and ; bi and ;
@ -184,6 +184,8 @@ WHERE
TUPLE: A TUPLE: A
{ underlying byte-array read-only initial: $[ 16 <byte-array> ] } ; { underlying byte-array read-only initial: $[ 16 <byte-array> ] } ;
INSTANCE: A simd-128
M: A clone underlying>> clone \ A boa ; inline M: A clone underlying>> clone \ A boa ; inline
M: A length drop N ; inline M: A length drop N ; inline
@ -315,7 +317,7 @@ SLOT: underlying2
class c:typedef ; class c:typedef ;
: (define-simd-256) ( simd -- ) : (define-simd-256) ( simd -- )
simd-ops get { vshuffle hlshift hrshift } unique assoc-diff >>ops simd-ops get { vshuffle-elements vshuffle-bytes hlshift hrshift } unique assoc-diff >>ops
[ define-simd ] [ define-simd ]
[ [ class>> ] [ rep>> ] bi define-simd-256-type ] bi ; [ [ class>> ] [ rep>> ] bi define-simd-256-type ] bi ;
@ -362,6 +364,8 @@ TUPLE: A
{ underlying1 byte-array initial: $[ 16 <byte-array> ] read-only } { underlying1 byte-array initial: $[ 16 <byte-array> ] read-only }
{ underlying2 byte-array initial: $[ 16 <byte-array> ] read-only } ; { underlying2 byte-array initial: $[ 16 <byte-array> ] read-only } ;
INSTANCE: A simd-256
M: A clone M: A clone
[ underlying1>> clone ] [ underlying2>> clone ] bi [ underlying1>> clone ] [ underlying2>> clone ] bi
\ A boa ; inline \ A boa ; inline

View File

@ -67,7 +67,8 @@ SIMD-OP: vlshift
SIMD-OP: vrshift SIMD-OP: vrshift
SIMD-OP: hlshift SIMD-OP: hlshift
SIMD-OP: hrshift SIMD-OP: hrshift
SIMD-OP: vshuffle SIMD-OP: vshuffle-elements
SIMD-OP: vshuffle-bytes
SIMD-OP: (vmerge-head) SIMD-OP: (vmerge-head)
SIMD-OP: (vmerge-tail) SIMD-OP: (vmerge-tail)
SIMD-OP: v<= SIMD-OP: v<=
@ -148,6 +149,9 @@ GENERIC# supported-simd-op? 1 ( rep intrinsic -- ? )
union union
{ uchar-16-rep ushort-8-rep uint-4-rep ulonglong-2-rep } union ; { uchar-16-rep ushort-8-rep uint-4-rep ulonglong-2-rep } union ;
: (%shuffle-imm-reps) ( -- reps )
%shuffle-vector-reps %shuffle-vector-imm-reps union ;
M: vector-rep supported-simd-op? M: vector-rep supported-simd-op?
{ {
{ \ (simd-v+) [ %add-vector-reps ] } { \ (simd-v+) [ %add-vector-reps ] }
@ -179,7 +183,8 @@ M: vector-rep supported-simd-op?
{ \ (simd-vrshift) [ %shr-vector-reps ] } { \ (simd-vrshift) [ %shr-vector-reps ] }
{ \ (simd-hlshift) [ %horizontal-shl-vector-reps ] } { \ (simd-hlshift) [ %horizontal-shl-vector-reps ] }
{ \ (simd-hrshift) [ %horizontal-shr-vector-reps ] } { \ (simd-hrshift) [ %horizontal-shr-vector-reps ] }
{ \ (simd-vshuffle) [ %shuffle-vector-reps ] } { \ (simd-vshuffle-elements) [ (%shuffle-imm-reps) ] }
{ \ (simd-vshuffle-bytes) [ %shuffle-vector-reps ] }
{ \ (simd-(vmerge-head)) [ %merge-vector-reps ] } { \ (simd-(vmerge-head)) [ %merge-vector-reps ] }
{ \ (simd-(vmerge-tail)) [ %merge-vector-reps ] } { \ (simd-(vmerge-tail)) [ %merge-vector-reps ] }
{ \ (simd-(v>float)) [ %integer>float-vector-reps ] } { \ (simd-(v>float)) [ %integer>float-vector-reps ] }

View File

@ -41,7 +41,21 @@ $nl
POSTPONE: SIMD: POSTPONE: SIMD:
POSTPONE: SIMDS: POSTPONE: SIMDS:
} }
"The following vector types are supported:" "The following scalar types are supported:"
{ $code
"char"
"uchar"
"short"
"ushort"
"int"
"uint"
"longlong"
"ulonglong"
"float"
"double"
}
"The following vector types are generated from the above scalar types:"
{ $code { $code
"char-16" "char-16"
"uchar-16" "uchar-16"
@ -89,6 +103,7 @@ $nl
{ $code { $code
"""USING: compiler.tree.debugger math.vectors """USING: compiler.tree.debugger math.vectors
math.vectors.simd ; math.vectors.simd ;
SIMD: double
SYMBOLS: x y ; SYMBOLS: x y ;
[ [
@ -107,7 +122,7 @@ IN: simd-demo
{ float-4 float-4 float-4 } declare { float-4 float-4 float-4 } declare
[ v* ] [ [ 1.0 ] dip n-v v* ] bi-curry* bi v+ ; [ v* ] [ [ 1.0 ] dip n-v v* ] bi-curry* bi v+ ;
\ interpolate optimizer-report.""" } \\ interpolate optimizer-report.""" }
"Note that using " { $link declare } " is not recommended. Safer ways of getting type information for the input parameters to a word include defining methods on a generic word (the value being dispatched upon has a statically known type in the method body), as well as using " { $link "hints" } " and " { $link POSTPONE: inline } " declarations." "Note that using " { $link declare } " is not recommended. Safer ways of getting type information for the input parameters to a word include defining methods on a generic word (the value being dispatched upon has a statically known type in the method body), as well as using " { $link "hints" } " and " { $link POSTPONE: inline } " declarations."
$nl $nl
"Here is a better version of the " { $snippet "interpolate" } " words above that uses hints:" "Here is a better version of the " { $snippet "interpolate" } " words above that uses hints:"
@ -122,7 +137,7 @@ IN: simd-demo
HINTS: interpolate float-4 float-4 float-4 ; HINTS: interpolate float-4 float-4 float-4 ;
\ interpolate optimizer-report. """ } \\ interpolate optimizer-report. """ }
"This time, the optimizer report lists calls to both SIMD primitives and high-level vector words, because hints cause two code paths to be generated. The " { $snippet "optimized." } " word can be used to make sure that the fast code path consists entirely of calls to primitives." "This time, the optimizer report lists calls to both SIMD primitives and high-level vector words, because hints cause two code paths to be generated. The " { $snippet "optimized." } " word can be used to make sure that the fast code path consists entirely of calls to primitives."
$nl $nl
"If the " { $snippet "interpolate" } " word was to be used in several places with different types of vectors, it would be best to declare it " { $link POSTPONE: inline } "." "If the " { $snippet "interpolate" } " word was to be used in several places with different types of vectors, it would be best to declare it " { $link POSTPONE: inline } "."
@ -153,13 +168,13 @@ M: actor advance ( dt actor -- )
[ >float ] dip [ >float ] dip
[ update-velocity ] [ update-position ] 2bi ; [ update-velocity ] [ update-position ] 2bi ;
M\ actor advance optimized.""" M\\ actor advance optimized."""
} }
"The " { $vocab-link "compiler.cfg.debugger" } " vocabulary can give a lower-level picture of the generated code, that includes register assignments and other low-level details. To look at low-level optimizer output, call " { $snippet "test-mr mr." } " on a word or quotation:" "The " { $vocab-link "compiler.cfg.debugger" } " vocabulary can give a lower-level picture of the generated code, that includes register assignments and other low-level details. To look at low-level optimizer output, call " { $snippet "test-mr mr." } " on a word or quotation:"
{ $code { $code
"""USE: compiler.tree.debugger """USE: compiler.tree.debugger
M\ actor advance test-mr mr.""" } M\\ actor advance test-mr mr.""" }
"An example of a high-performance algorithm that uses SIMD primitives can be found in the " { $vocab-link "benchmark.nbody-simd" } " vocabulary." ; "An example of a high-performance algorithm that uses SIMD primitives can be found in the " { $vocab-link "benchmark.nbody-simd" } " vocabulary." ;
ARTICLE: "math.vectors.simd.intrinsics" "Low-level SIMD primitives" ARTICLE: "math.vectors.simd.intrinsics" "Low-level SIMD primitives"
@ -206,7 +221,7 @@ ARTICLE: "math.vectors.simd" "Hardware vector arithmetic (SIMD)"
HELP: SIMD: HELP: SIMD:
{ $syntax "SIMD: type" } { $syntax "SIMD: type" }
{ $values { "type" "a scalar C type" } } { $values { "type" "a scalar C type" } }
{ $description "Defines 128-bit and 256-bit SIMD arrays for holding elements of " { $snippet "type" } " into the vocabulary search path. The possible type/length combinations are listed in " { $link "math.vectors.simd.types" } " and the generated words are documented in " { $link "math.vectors.simd.words" } "." } ; { $description "Defines 128-bit and 256-bit SIMD arrays for holding elements of " { $snippet "type" } " into the vocabulary search path. The allowed scalar types, and the auto-generated type/length vector combinations that result, are listed in " { $link "math.vectors.simd.types" } ". Generated words are documented in " { $link "math.vectors.simd.words" } "." } ;
HELP: SIMDS: HELP: SIMDS:
{ $syntax "SIMDS: type type type ... ;" } { $syntax "SIMDS: type type type ... ;" }

View File

@ -174,7 +174,7 @@ CONSTANT: simd-classes
: remove-special-words ( alist -- alist' ) : remove-special-words ( alist -- alist' )
! These have their own tests later ! These have their own tests later
{ {
hlshift hrshift vshuffle vbroadcast hlshift hrshift vshuffle-bytes vshuffle-elements vbroadcast
vany? vall? vnone? vany? vall? vnone?
(v>float) (v>integer) (v>float) (v>integer)
(vpack-signed) (vpack-unsigned) (vpack-signed) (vpack-unsigned)
@ -360,6 +360,23 @@ simd-classes [
] unit-test ] unit-test
] each ] each
"== Checking variable shuffles" print
: random-shift-vector ( class -- vec )
new [ drop 16 random ] map ;
:: test-shift-vector ( class -- ? )
class random-int-vector :> src
char-16 random-shift-vector :> perm
{ class char-16 } :> decl
src perm vshuffle
src perm [ decl declare vshuffle ] compile-call
= ; inline
{ char-16 uchar-16 short-8 ushort-8 int-4 uint-4 longlong-2 ulonglong-2 }
[ 10 swap '[ [ t ] [ _ test-shift-vector ] unit-test ] times ] each
"== Checking vector tests" print "== Checking vector tests" print
:: test-vector-tests-bool ( vector declaration -- none? any? all? ) :: test-vector-tests-bool ( vector declaration -- none? any? all? )
@ -512,38 +529,38 @@ SYMBOL: !!inconsistent!!
STRUCT: simd-struct STRUCT: simd-struct
{ x float-4 } { x float-4 }
{ y double-2 } { y longlong-2 }
{ z double-4 } { z double-4 }
{ w float-8 } ; { w int-8 } ;
[ t ] [ [ simd-struct <struct> ] compile-call >c-ptr [ 0 = ] all? ] unit-test [ t ] [ [ simd-struct <struct> ] compile-call >c-ptr [ 0 = ] all? ] unit-test
[ [
float-4{ 1 2 3 4 } float-4{ 1 2 3 4 }
double-2{ 2 1 } longlong-2{ 2 1 }
double-4{ 4 3 2 1 } double-4{ 4 3 2 1 }
float-8{ 1 2 3 4 5 6 7 8 } int-8{ 1 2 3 4 5 6 7 8 }
] [ ] [
simd-struct <struct> simd-struct <struct>
float-4{ 1 2 3 4 } >>x float-4{ 1 2 3 4 } >>x
double-2{ 2 1 } >>y longlong-2{ 2 1 } >>y
double-4{ 4 3 2 1 } >>z double-4{ 4 3 2 1 } >>z
float-8{ 1 2 3 4 5 6 7 8 } >>w int-8{ 1 2 3 4 5 6 7 8 } >>w
{ [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave { [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave
] unit-test ] unit-test
[ [
float-4{ 1 2 3 4 } float-4{ 1 2 3 4 }
double-2{ 2 1 } longlong-2{ 2 1 }
double-4{ 4 3 2 1 } double-4{ 4 3 2 1 }
float-8{ 1 2 3 4 5 6 7 8 } int-8{ 1 2 3 4 5 6 7 8 }
] [ ] [
[ [
simd-struct <struct> simd-struct <struct>
float-4{ 1 2 3 4 } >>x float-4{ 1 2 3 4 } >>x
double-2{ 2 1 } >>y longlong-2{ 2 1 } >>y
double-4{ 4 3 2 1 } >>z double-4{ 4 3 2 1 } >>z
float-8{ 1 2 3 4 5 6 7 8 } >>w int-8{ 1 2 3 4 5 6 7 8 } >>w
{ [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave { [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave
] compile-call ] compile-call
] unit-test ] unit-test

View File

@ -98,7 +98,8 @@ H{
{ vrshift { +vector+ +scalar+ -> +vector+ } } { vrshift { +vector+ +scalar+ -> +vector+ } }
{ hlshift { +vector+ +literal+ -> +vector+ } } { hlshift { +vector+ +literal+ -> +vector+ } }
{ hrshift { +vector+ +literal+ -> +vector+ } } { hrshift { +vector+ +literal+ -> +vector+ } }
{ vshuffle { +vector+ +literal+ -> +vector+ } } { vshuffle-elements { +vector+ +literal+ -> +vector+ } }
{ vshuffle-bytes { +vector+ +vector+ -> +vector+ } }
{ vbroadcast { +vector+ +literal+ -> +vector+ } } { vbroadcast { +vector+ +literal+ -> +vector+ } }
{ (vmerge-head) { +vector+ +vector+ -> +vector+ } } { (vmerge-head) { +vector+ +vector+ -> +vector+ } }
{ (vmerge-tail) { +vector+ +vector+ -> +vector+ } } { (vmerge-tail) { +vector+ +vector+ -> +vector+ } }
@ -162,7 +163,7 @@ ERROR: bad-vector-word word ;
} cond } cond
! Don't specialize horizontal shifts, shuffles, and conversions at all, they're only for SIMD ! Don't specialize horizontal shifts, shuffles, and conversions at all, they're only for SIMD
{ {
hlshift hrshift vshuffle vbroadcast hlshift hrshift vshuffle-elements vshuffle-bytes vbroadcast
(v>integer) (v>float) (v>integer) (v>float)
(vpack-signed) (vpack-unsigned) (vpack-signed) (vpack-unsigned)
(vunpack-head) (vunpack-tail) (vunpack-head) (vunpack-tail)

View File

@ -6,6 +6,9 @@ locals ;
QUALIFIED-WITH: alien.c-types c QUALIFIED-WITH: alien.c-types c
IN: math.vectors IN: math.vectors
MIXIN: simd-128
MIXIN: simd-256
GENERIC: element-type ( obj -- c-type ) GENERIC: element-type ( obj -- c-type )
M: object element-type drop f ; inline M: object element-type drop f ; inline
@ -83,7 +86,20 @@ PRIVATE>
: vbitnot ( u -- w ) dup '[ _ [ bitnot ] fp-bitwise-unary ] map ; : vbitnot ( u -- w ) dup '[ _ [ bitnot ] fp-bitwise-unary ] map ;
:: vbroadcast ( u n -- v ) u length n u nth <repetition> u like ; :: vbroadcast ( u n -- v ) u length n u nth <repetition> u like ;
: vshuffle ( u perm -- v ) swap [ '[ _ nth ] ] keep map-as ;
: vshuffle-elements ( u perm -- v )
swap [ '[ _ nth ] ] keep map-as ;
: vshuffle-bytes ( u perm -- v )
underlying>> [
swap [ '[ _ nth ] ] keep map-as
] curry change-underlying ;
GENERIC: vshuffle ( u perm -- v )
M: array vshuffle ( u perm -- v )
vshuffle-elements ; inline
M: simd-128 vshuffle ( u perm -- v )
vshuffle-bytes ; inline
: vlshift ( u n -- w ) '[ _ shift ] map ; : vlshift ( u n -- w ) '[ _ shift ] map ;
: vrshift ( u n -- w ) neg '[ _ shift ] map ; : vrshift ( u n -- w ) neg '[ _ shift ] map ;
@ -107,9 +123,9 @@ PRIVATE>
: vxor ( u v -- w ) over '[ [ _ element>bool ] bi@ xor ] 2map ; : vxor ( u v -- w ) over '[ [ _ element>bool ] bi@ xor ] 2map ;
: vnot ( u -- w ) dup '[ _ element>bool not ] map ; : vnot ( u -- w ) dup '[ _ element>bool not ] map ;
: vall? ( v -- ? ) [ ] all? ; : vall? ( v -- ? ) dup '[ _ element>bool ] all? ;
: vany? ( v -- ? ) [ ] any? ; : vany? ( v -- ? ) dup '[ _ element>bool ] any? ;
: vnone? ( v -- ? ) [ not ] all? ; : vnone? ( v -- ? ) dup '[ _ element>bool not ] all? ;
: v< ( u v -- w ) [ < ] 2map ; : v< ( u v -- w ) [ < ] 2map ;
: v<= ( u v -- w ) [ <= ] 2map ; : v<= ( u v -- w ) [ <= ] 2map ;

View File

@ -2,7 +2,8 @@
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
USING: accessors alien alien.c-types alien.data alien.parser USING: accessors alien alien.c-types alien.data alien.parser
assocs byte-arrays classes compiler.units functors kernel lexer assocs byte-arrays classes compiler.units functors kernel lexer
libc math math.vectors math.vectors.specialization namespaces libc math math.vectors math.vectors.private
math.vectors.specialization namespaces
parser prettyprint.custom sequences sequences.private strings parser prettyprint.custom sequences sequences.private strings
summary vocabs vocabs.loader vocabs.parser vocabs.generated summary vocabs vocabs.loader vocabs.parser vocabs.generated
words fry combinators present ; words fry combinators present ;
@ -68,6 +69,8 @@ TUPLE: A
[ drop \ T bad-byte-array-length ] unless [ drop \ T bad-byte-array-length ] unless
<direct-A> ; inline <direct-A> ; inline
M: A new-underlying drop byte-array>A ;
M: A clone [ underlying>> clone ] [ length>> ] bi <direct-A> ; inline M: A clone [ underlying>> clone ] [ length>> ] bi <direct-A> ; inline
M: A length length>> ; inline M: A length length>> ; inline

View File

@ -129,17 +129,18 @@ ARTICLE: "tuple-inheritance-example" "Tuple subclassing example"
} }
"Rectangles and parallelograms use the same algorithm for computing the area, whereas they use different algorithms for computing perimiter. Also, rectangles and parallelograms both have " { $snippet "width" } " and " { $snippet "height" } " slots. We can exploit this with subclassing:" "Rectangles and parallelograms use the same algorithm for computing the area, whereas they use different algorithms for computing perimiter. Also, rectangles and parallelograms both have " { $snippet "width" } " and " { $snippet "height" } " slots. We can exploit this with subclassing:"
{ $code { $code
"USING: accessors kernel math math.constants math.functions ;"
"GENERIC: area ( shape -- n )" "GENERIC: area ( shape -- n )"
"GENERIC: perimiter ( shape -- n )" "GENERIC: perimiter ( shape -- n )"
"" ""
"TUPLE: shape ;" "TUPLE: shape ;"
"" ""
"TUPLE: circle < shape radius ;" "TUPLE: circle < shape radius ;"
"M: area circle radius>> sq pi * ;" "M: circle area radius>> sq pi * ;"
"M: perimiter circle radius>> 2 * pi * ;" "M: circle perimiter radius>> 2 * pi * ;"
"" ""
"TUPLE: quad < shape width height" "TUPLE: quad < shape width height ;"
"M: area quad [ width>> ] [ height>> ] bi * ;" "M: quad area [ width>> ] [ height>> ] bi * ;"
"" ""
"TUPLE: rectangle < quad ;" "TUPLE: rectangle < quad ;"
"M: rectangle perimiter [ width>> 2 * ] [ height>> 2 * ] bi + ;" "M: rectangle perimiter [ width>> 2 * ] [ height>> 2 * ] bi + ;"

View File

@ -52,10 +52,10 @@ IN: math.matrices.simd.tests
[ [
S{ matrix4 f S{ matrix4 f
float-4-array{ float-4-array{
float-4{ 1.0 0.0 0.0 3.0 } float-4{ 1.0 0.0 0.0 0.0 }
float-4{ 0.0 1.0 0.0 4.0 } float-4{ 0.0 1.0 0.0 0.0 }
float-4{ 0.0 0.0 1.0 2.0 } float-4{ 0.0 0.0 1.0 0.0 }
float-4{ 0.0 0.0 0.0 1.0 } float-4{ 3.0 4.0 2.0 1.0 }
} }
} }
] [ float-4{ 3.0 4.0 2.0 0.0 } translation-matrix4 ] unit-test ] [ float-4{ 3.0 4.0 2.0 0.0 } translation-matrix4 ] unit-test
@ -77,9 +77,9 @@ IN: math.matrices.simd.tests
float-4{ 0.0 1.0 0.0 1.0 } pi 1/2. * rotation-matrix4 float-4{ 0.0 1.0 0.0 1.0 } pi 1/2. * rotation-matrix4
S{ matrix4 f S{ matrix4 f
float-4-array{ float-4-array{
float-4{ 0.0 0.0 1.0 0.0 } float-4{ 0.0 0.0 -1.0 0.0 }
float-4{ 0.0 1.0 0.0 0.0 } float-4{ 0.0 1.0 0.0 0.0 }
float-4{ -1.0 0.0 0.0 0.0 } float-4{ 1.0 0.0 0.0 0.0 }
float-4{ 0.0 0.0 0.0 1.0 } float-4{ 0.0 0.0 0.0 1.0 }
} }
} }
@ -89,10 +89,10 @@ IN: math.matrices.simd.tests
[ [
S{ matrix4 f S{ matrix4 f
float-4-array{ float-4-array{
float-4{ 2.0 0.0 0.0 10.0 } float-4{ 2.0 0.0 0.0 0.0 }
float-4{ 0.0 3.0 0.0 18.0 } float-4{ 0.0 3.0 0.0 0.0 }
float-4{ 0.0 0.0 4.0 28.0 } float-4{ 0.0 0.0 4.0 0.0 }
float-4{ 0.0 0.0 0.0 1.0 } float-4{ 10.0 18.0 28.0 1.0 }
} }
} }
] [ ] [
@ -106,10 +106,10 @@ IN: math.matrices.simd.tests
} }
S{ matrix4 f S{ matrix4 f
float-4-array{ float-4-array{
float-4{ 1.0 0.0 0.0 5.0 } float-4{ 1.0 0.0 0.0 0.0 }
float-4{ 0.0 1.0 0.0 6.0 } float-4{ 0.0 1.0 0.0 0.0 }
float-4{ 0.0 0.0 1.0 7.0 } float-4{ 0.0 0.0 1.0 0.0 }
float-4{ 0.0 0.0 0.0 1.0 } float-4{ 5.0 6.0 7.0 1.0 }
} }
} }
m4. m4.
@ -118,10 +118,10 @@ IN: math.matrices.simd.tests
[ [
S{ matrix4 f S{ matrix4 f
float-4-array{ float-4-array{
float-4{ 3.0 0.0 0.0 5.0 } float-4{ 3.0 0.0 0.0 0.0 }
float-4{ 0.0 4.0 0.0 6.0 } float-4{ 0.0 4.0 0.0 0.0 }
float-4{ 0.0 0.0 5.0 7.0 } float-4{ 0.0 0.0 5.0 0.0 }
float-4{ 0.0 0.0 0.0 2.0 } float-4{ 5.0 6.0 7.0 2.0 }
} }
} }
] [ ] [
@ -135,10 +135,10 @@ IN: math.matrices.simd.tests
} }
S{ matrix4 f S{ matrix4 f
float-4-array{ float-4-array{
float-4{ 1.0 0.0 0.0 5.0 } float-4{ 1.0 0.0 0.0 0.0 }
float-4{ 0.0 1.0 0.0 6.0 } float-4{ 0.0 1.0 0.0 0.0 }
float-4{ 0.0 0.0 1.0 7.0 } float-4{ 0.0 0.0 1.0 0.0 }
float-4{ 0.0 0.0 0.0 1.0 } float-4{ 5.0 6.0 7.0 1.0 }
} }
} }
m4+ m4+
@ -147,10 +147,10 @@ IN: math.matrices.simd.tests
[ [
S{ matrix4 f S{ matrix4 f
float-4-array{ float-4-array{
float-4{ 1.0 0.0 0.0 -5.0 } float-4{ 1.0 0.0 0.0 0.0 }
float-4{ 0.0 2.0 0.0 -6.0 } float-4{ 0.0 2.0 0.0 0.0 }
float-4{ 0.0 0.0 3.0 -7.0 } float-4{ 0.0 0.0 3.0 0.0 }
float-4{ 0.0 0.0 0.0 0.0 } float-4{ -5.0 -6.0 -7.0 0.0 }
} }
} }
] [ ] [
@ -164,10 +164,10 @@ IN: math.matrices.simd.tests
} }
S{ matrix4 f S{ matrix4 f
float-4-array{ float-4-array{
float-4{ 1.0 0.0 0.0 5.0 } float-4{ 1.0 0.0 0.0 0.0 }
float-4{ 0.0 1.0 0.0 6.0 } float-4{ 0.0 1.0 0.0 0.0 }
float-4{ 0.0 0.0 1.0 7.0 } float-4{ 0.0 0.0 1.0 0.0 }
float-4{ 0.0 0.0 0.0 1.0 } float-4{ 5.0 6.0 7.0 1.0 }
} }
} }
m4- m4-
@ -219,10 +219,10 @@ IN: math.matrices.simd.tests
[ [
S{ matrix4 f S{ matrix4 f
float-4-array{ float-4-array{
float-4{ 1/2. 0.0 0.0 0.0 } float-4{ 1/2. 0.0 0.0 0.0 }
float-4{ 0.0 1/2. 0.0 0.0 } float-4{ 0.0 1/2. 0.0 0.0 }
float-4{ 0.0 0.0 -6/4. -10/4. } float-4{ 0.0 0.0 -6/4. -1.0 }
float-4{ 0.0 0.0 -1.0 0.0 } float-4{ 0.0 0.0 -10/4. 0.0 }
} }
} }
] [ ] [

View File

@ -9,34 +9,34 @@ SPECIALIZED-ARRAY: float-4
IN: math.matrices.simd IN: math.matrices.simd
STRUCT: matrix4 STRUCT: matrix4
{ rows float-4[4] } ; { columns float-4[4] } ;
INSTANCE: matrix4 immutable-sequence INSTANCE: matrix4 immutable-sequence
M: matrix4 length drop 4 ; inline M: matrix4 length drop 4 ; inline
M: matrix4 nth-unsafe rows>> nth-unsafe ; inline M: matrix4 nth-unsafe columns>> nth-unsafe ; inline
M: matrix4 new-sequence 2drop matrix4 (struct) ; inline M: matrix4 new-sequence 2drop matrix4 (struct) ; inline
<PRIVATE <PRIVATE
: rows ( a -- a1 a2 a3 a4 ) : columns ( a -- a1 a2 a3 a4 )
rows>> 4 firstn ; inline columns>> 4 firstn ; inline
:: set-rows ( c1 c2 c3 c4 c -- c ) :: set-columns ( c1 c2 c3 c4 c -- c )
c rows>> :> rows c columns>> :> columns
c1 rows set-first c1 columns set-first
c2 rows set-second c2 columns set-second
c3 rows set-third c3 columns set-third
c4 rows set-fourth c4 columns set-fourth
c ; inline c ; inline
: make-matrix4 ( quot: ( -- c1 c2 c3 c4 ) -- c ) : make-matrix4 ( quot: ( -- c1 c2 c3 c4 ) -- c )
matrix4 (struct) swap dip set-rows ; inline matrix4 (struct) swap dip set-columns ; inline
:: 2map-rows ( a b quot -- c ) :: 2map-columns ( a b quot -- c )
[ [
a rows :> a4 :> a3 :> a2 :> a1 a columns :> a4 :> a3 :> a2 :> a1
b rows :> b4 :> b3 :> b2 :> b1 b columns :> b4 :> b3 :> b2 :> b1
a1 b1 quot call a1 b1 quot call
a2 b2 quot call a2 b2 quot call
@ -44,57 +44,57 @@ M: matrix4 new-sequence 2drop matrix4 (struct) ; inline
a4 b4 quot call a4 b4 quot call
] make-matrix4 ; inline ] make-matrix4 ; inline
: map-rows ( a quot -- c ) : map-columns ( a quot -- c )
'[ rows _ 4 napply ] make-matrix4 ; inline '[ columns _ 4 napply ] make-matrix4 ; inline
PRIVATE> PRIVATE>
TYPED: m4+ ( a: matrix4 b: matrix4 -- c: matrix4 ) [ v+ ] 2map-rows ; TYPED: m4+ ( a: matrix4 b: matrix4 -- c: matrix4 ) [ v+ ] 2map-columns ;
TYPED: m4- ( a: matrix4 b: matrix4 -- c: matrix4 ) [ v- ] 2map-rows ; TYPED: m4- ( a: matrix4 b: matrix4 -- c: matrix4 ) [ v- ] 2map-columns ;
TYPED: m4* ( a: matrix4 b: matrix4 -- c: matrix4 ) [ v* ] 2map-rows ; TYPED: m4* ( a: matrix4 b: matrix4 -- c: matrix4 ) [ v* ] 2map-columns ;
TYPED: m4/ ( a: matrix4 b: matrix4 -- c: matrix4 ) [ v/ ] 2map-rows ; TYPED: m4/ ( a: matrix4 b: matrix4 -- c: matrix4 ) [ v/ ] 2map-columns ;
TYPED: m4*n ( a: matrix4 b: float -- c: matrix4 ) [ v*n ] curry map-rows ; TYPED: m4*n ( a: matrix4 b: float -- c: matrix4 ) [ v*n ] curry map-columns ;
TYPED: m4/n ( a: matrix4 b: float -- c: matrix4 ) [ v/n ] curry map-rows ; TYPED: m4/n ( a: matrix4 b: float -- c: matrix4 ) [ v/n ] curry map-columns ;
TYPED: n*m4 ( a: float b: matrix4 -- c: matrix4 ) [ n*v ] with map-rows ; TYPED: n*m4 ( a: float b: matrix4 -- c: matrix4 ) [ n*v ] with map-columns ;
TYPED: n/m4 ( a: float b: matrix4 -- c: matrix4 ) [ n/v ] with map-rows ; TYPED: n/m4 ( a: float b: matrix4 -- c: matrix4 ) [ n/v ] with map-columns ;
TYPED:: m4. ( a: matrix4 b: matrix4 -- c: matrix4 ) TYPED:: m4. ( a: matrix4 b: matrix4 -- c: matrix4 )
[ [
a rows :> a4 :> a3 :> a2 :> a1 a columns :> a4 :> a3 :> a2 :> a1
b rows :> b4 :> b3 :> b2 :> b1 b columns :> b4 :> b3 :> b2 :> b1
a1 first b1 n*v :> c1a b1 first a1 n*v :> c1a
a2 first b1 n*v :> c2a b2 first a1 n*v :> c2a
a3 first b1 n*v :> c3a b3 first a1 n*v :> c3a
a4 first b1 n*v :> c4a b4 first a1 n*v :> c4a
a1 second b2 n*v c1a v+ :> c1b b1 second a2 n*v c1a v+ :> c1b
a2 second b2 n*v c2a v+ :> c2b b2 second a2 n*v c2a v+ :> c2b
a3 second b2 n*v c3a v+ :> c3b b3 second a2 n*v c3a v+ :> c3b
a4 second b2 n*v c4a v+ :> c4b b4 second a2 n*v c4a v+ :> c4b
a1 third b3 n*v c1b v+ :> c1c b1 third a3 n*v c1b v+ :> c1c
a2 third b3 n*v c2b v+ :> c2c b2 third a3 n*v c2b v+ :> c2c
a3 third b3 n*v c3b v+ :> c3c b3 third a3 n*v c3b v+ :> c3c
a4 third b3 n*v c4b v+ :> c4c b4 third a3 n*v c4b v+ :> c4c
a1 fourth b4 n*v c1c v+ b1 fourth a4 n*v c1c v+
a2 fourth b4 n*v c2c v+ b2 fourth a4 n*v c2c v+
a3 fourth b4 n*v c3c v+ b3 fourth a4 n*v c3c v+
a4 fourth b4 n*v c4c v+ b4 fourth a4 n*v c4c v+
] make-matrix4 ; ] make-matrix4 ;
TYPED:: v.m4 ( a: float-4 b: matrix4 -- c: float-4 ) TYPED:: m4.v ( m: matrix4 v: float-4 -- v': float-4 )
b rows :> b4 :> b3 :> b2 :> b1 m columns :> m4 :> m3 :> m2 :> m1
a first b1 n*v v first m1 n*v
a second b2 n*v v+ v second m2 n*v v+
a third b3 n*v v+ v third m3 n*v v+
a fourth b4 n*v v+ ; v fourth m4 n*v v+ ;
TYPED:: m4.v ( a: matrix4 b: float-4 -- c: float-4 ) TYPED:: v.m4 ( v: float-4 m: matrix4 -- c: float-4 )
a rows [ b v. ] 4 napply float-4-boa ; m columns [ v v. ] 4 napply float-4-boa ;
CONSTANT: identity-matrix4 CONSTANT: identity-matrix4
S{ matrix4 f S{ matrix4 f
@ -131,37 +131,37 @@ TYPED: diagonal-matrix4 ( diagonal: float-4 -- matrix: matrix4 )
[ (vmerge) ] bi-curry@ bi* ; inline [ (vmerge) ] bi-curry@ bi* ; inline
TYPED: transpose-matrix4 ( matrix: matrix4 -- matrix: matrix4 ) TYPED: transpose-matrix4 ( matrix: matrix4 -- matrix: matrix4 )
[ rows vmerge-transpose vmerge-transpose ] make-matrix4 ; [ columns vmerge-transpose vmerge-transpose ] make-matrix4 ;
: linear>homogeneous ( v -- v' )
[ float-4{ t t t f } ] dip float-4{ 0.0 0.0 0.0 1.0 } v? ; inline
: scale-matrix4 ( factors -- matrix ) : scale-matrix4 ( factors -- matrix )
[ float-4{ t t t f } ] dip float-4{ 0.0 0.0 0.0 1.0 } v? linear>homogeneous diagonal-matrix4 ; inline
diagonal-matrix4 ; inline
: ortho-matrix4 ( factors -- matrix ) : ortho-matrix4 ( factors -- matrix )
float-4{ 1.0 1.0 1.0 1.0 } swap v/ scale-matrix4 ; inline float-4{ 1.0 1.0 1.0 1.0 } swap v/ scale-matrix4 ; inline
TYPED:: translation-matrix4 ( offset: float-4 -- matrix: matrix4 ) TYPED: translation-matrix4 ( offset: float-4 -- matrix: matrix4 )
[ [
float-4{ 1.0 1.0 1.0 1.0 } :> diagonal linear>homogeneous
[
offset 0 float-4-with (vmerge) float-4{ 1.0 0.0 0.0 0.0 }
[ 0 float-4-with swap (vmerge) ] bi@ drop :> z :> y :> x float-4{ 0.0 1.0 0.0 0.0 }
float-4{ 0.0 0.0 1.0 0.0 }
diagonal y vmerge-diagonal* ] dip
[ x vmerge-diagonal* ]
[ z vmerge-diagonal* ] bi*
] make-matrix4 ; ] make-matrix4 ;
TYPED:: rotation-matrix4 ( axis: float-4 theta: float -- matrix: matrix4 ) TYPED:: rotation-matrix4 ( axis: float-4 theta: float -- matrix: matrix4 )
! x*x + c*(1.0 - x*x) x*y*(1.0 - c) - s*z x*z*(1.0 - c) + s*y 0 ! x*x + c*(1.0 - x*x) x*y*(1.0 - c) + s*z x*z*(1.0 - c) - s*y 0
! x*y*(1.0 - c) + s*z y*y + c*(1.0 - y*y) y*z*(1.0 - c) - s*x 0 ! x*y*(1.0 - c) - s*z y*y + c*(1.0 - y*y) y*z*(1.0 - c) + s*x 0
! x*z*(1.0 - c) - s*y y*z*(1.0 - c) + s*x z*z + c*(1.0 - z*z) 0 ! x*z*(1.0 - c) + s*y y*z*(1.0 - c) - s*x z*z + c*(1.0 - z*z) 0
! 0 0 0 1 ! 0 0 0 1
matrix4 (struct) :> triangle-m matrix4 (struct) :> triangle-m
theta cos :> c theta cos :> c
theta sin :> s theta sin :> s
float-4{ 1.0 -1.0 1.0 0.0 } :> triangle-sign float-4{ -1.0 1.0 -1.0 0.0 } :> triangle-sign
c float-4-with :> cc c float-4-with :> cc
s float-4-with :> ss s float-4-with :> ss
@ -184,7 +184,7 @@ TYPED:: rotation-matrix4 ( axis: float-4 theta: float -- matrix: matrix4 )
triangle-lo { 1 0 3 3 } vshuffle triangle-lo { 1 0 3 3 } vshuffle
float-4 new float-4 new
triangle-m set-rows drop triangle-m set-columns drop
diagonal-m triangle-m m4+ ; diagonal-m triangle-m m4+ ;
@ -194,8 +194,10 @@ TYPED:: frustum-matrix4 ( xy: float-4 near: float far: float -- matrix: matrix4
float-4{ t t f f } xy near far - float-4-with v? ! denom float-4{ t t f f } xy near far - float-4-with v? ! denom
v/ :> fov v/ :> fov
fov 0.0 float-4-with (vmerge-head) vmerge-diagonal float-4{ 0.0 -1.0 0.0 0.0 } :> negone
fov float-4{ f f t t } vand
float-4{ 0.0 0.0 -1.0 0.0 } fov vmerge-diagonal
[ vmerge-diagonal ]
[ negone (vmerge) ] bi*
] make-matrix4 ; ] make-matrix4 ;