Merge branch 'master' of git://factorcode.org/git/factor into new_gc

db4
Slava Pestov 2009-10-13 06:57:37 -05:00
commit e81e076935
22 changed files with 338 additions and 187 deletions

View File

@ -1,5 +1,5 @@
USING: calendar.format calendar kernel math tools.test
io.streams.string accessors io math.order ;
io.streams.string accessors io math.order sequences ;
IN: calendar.format.tests
[ 0 ] [
@ -81,3 +81,5 @@ IN: calendar.format.tests
] [ "Thursday, 02-Oct-2008 23:59:59 GMT" cookie-string>timestamp ] unit-test
[ ]
[ { 2008 2009 } [ year. ] each ] unit-test

View File

@ -66,7 +66,7 @@ M: array month. ( pair -- )
[ month-name write bl number>string print ]
[ 1 zeller-congruence ]
[ (days-in-month) day-abbreviations2 " " join print ] 2tri
over " " <repetition> concat write
over " " <repetition> "" concat-as write
[
[ 1 + day. ] keep
1 + + 7 mod zero? [ nl ] [ bl ] if

View File

@ -277,6 +277,11 @@ literal: rep ;
PURE-INSN: ##shuffle-vector
def: dst
use: src shuffle
literal: rep ;
PURE-INSN: ##shuffle-vector-imm
def: dst
use: src
literal: shuffle rep ;

View File

@ -194,7 +194,8 @@ IN: compiler.cfg.intrinsics
{ math.vectors.simd.intrinsics:(simd-with) [ [ ^^with-vector ] emit-unary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-gather-2) [ emit-gather-vector-2 ] }
{ math.vectors.simd.intrinsics:(simd-gather-4) [ emit-gather-vector-4 ] }
{ math.vectors.simd.intrinsics:(simd-vshuffle) [ emit-shuffle-vector ] }
{ math.vectors.simd.intrinsics:(simd-vshuffle-elements) [ emit-shuffle-vector ] }
{ math.vectors.simd.intrinsics:(simd-vshuffle-bytes) [ emit-shuffle-vector-var ] }
{ math.vectors.simd.intrinsics:(simd-(vmerge-head)) [ [ ^^merge-vector-head ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-(vmerge-tail)) [ [ ^^merge-vector-tail ] emit-binary-vector-op ] }
{ math.vectors.simd.intrinsics:(simd-(v>float)) [ [ ^^integer>float-vector ] emit-unary-vector-op ] }

View File

@ -1,15 +1,16 @@
! Copyright (C) 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors byte-arrays fry cpu.architecture kernel math
sequences math.vectors.simd.intrinsics macros generalizations
combinators combinators.short-circuit arrays locals
USING: accessors alien byte-arrays fry classes.algebra
cpu.architecture kernel math sequences math.vectors
math.vectors.simd.intrinsics macros generalizations combinators
combinators.short-circuit arrays locals
compiler.tree.propagation.info compiler.cfg.builder.blocks
compiler.cfg.comparisons
compiler.cfg.stacks compiler.cfg.stacks.local compiler.cfg.hats
compiler.cfg.instructions compiler.cfg.registers
compiler.cfg.intrinsics.alien
specialized-arrays ;
FROM: alien.c-types => float double ;
FROM: alien.c-types => heap-size char uchar float double ;
SPECIALIZED-ARRAYS: float double ;
IN: compiler.cfg.intrinsics.simd
@ -21,7 +22,7 @@ MACRO: check-elements ( quots -- )
MACRO: if-literals-match ( quots -- )
[ length ] [ ] [ length ] tri
! n quots n n
! n quots n
'[
! node quot
[
@ -77,15 +78,41 @@ MACRO: if-literals-match ( quots -- )
: shuffle? ( obj -- ? ) { [ array? ] [ [ integer? ] all? ] } 1&& ;
: emit-shuffle-vector ( node -- )
! Pad the permutation with zeroes if its too short, since we
: >variable-shuffle ( shuffle rep -- shuffle' )
rep-component-type heap-size
[ dup <repetition> >byte-array ]
[ iota >byte-array ] bi
'[ _ n*v _ v+ ] map concat ;
: generate-shuffle-vector-imm ( src shuffle rep -- dst )
dup %shuffle-vector-imm-reps member?
[ ^^shuffle-vector-imm ]
[
[ >variable-shuffle ^^load-constant ] keep
^^shuffle-vector
] if ;
: emit-shuffle-vector-imm ( node -- )
! Pad the permutation with zeroes if it's too short, since we
! can't throw an error at this point.
[ [ rep-components 0 pad-tail ] keep ^^shuffle-vector ] [unary/param]
[ [ rep-components 0 pad-tail ] keep generate-shuffle-vector-imm ] [unary/param]
{ [ shuffle? ] [ representation? ] } if-literals-match ;
: emit-shuffle-vector-var ( node -- )
[ ^^shuffle-vector ] [binary]
{ [ %shuffle-vector-reps member? ] } if-literals-match ;
: emit-shuffle-vector ( node -- )
dup node-input-infos {
[ length 3 = ]
[ first class>> byte-array class<= ]
[ second class>> byte-array class<= ]
[ third literal>> representation? ]
} 1&& [ emit-shuffle-vector-var ] [ emit-shuffle-vector-imm ] if ;
: ^^broadcast-vector ( src n rep -- dst )
[ rep-components swap <array> ] keep
^^shuffle-vector ;
generate-shuffle-vector-imm ;
: emit-broadcast-vector ( node -- )
[ ^^broadcast-vector ] [unary/param]
@ -101,6 +128,9 @@ MACRO: if-literals-match ( quots -- )
[ ^^select-vector ] [unary/param]
{ [ integer? ] [ representation? ] } if-literals-match ; inline
: emit-alien-vector-op ( node quot: ( rep -- ) -- )
{ [ %alien-vector-reps member? ] } if-literals-match ; inline
: emit-alien-vector ( node -- )
dup [
'[
@ -108,7 +138,7 @@ MACRO: if-literals-match ( quots -- )
_ ^^alien-vector ds-push
]
[ inline-alien-getter? ] inline-alien
] with emit-vector-op ;
] with emit-alien-vector-op ;
: emit-set-alien-vector ( node -- )
dup [
@ -118,7 +148,7 @@ MACRO: if-literals-match ( quots -- )
]
[ byte-array inline-alien-setter? ]
inline-alien
] with emit-vector-op ;
] with emit-alien-vector-op ;
: generate-not-vector ( src rep -- dst )
dup %not-vector-reps member?

View File

@ -450,26 +450,26 @@ M: ##set-alien-vector rewrite rewrite-alien-addressing ;
! Some lame constant folding for SIMD intrinsics. Eventually this
! should be redone completely.
: rewrite-shuffle-vector ( insn expr -- insn' )
: rewrite-shuffle-vector-imm ( insn expr -- insn' )
2dup [ rep>> ] bi@ eq? [
[ [ dst>> ] [ src>> vn>vreg ] bi* ]
[ [ shuffle>> ] bi@ nths ]
[ drop rep>> ]
2tri \ ##shuffle-vector new-insn
2tri \ ##shuffle-vector-imm new-insn
] [ 2drop f ] if ;
: (fold-shuffle-vector) ( shuffle bytes -- bytes' )
: (fold-shuffle-vector-imm) ( shuffle bytes -- bytes' )
2dup length swap length /i group nths concat ;
: fold-shuffle-vector ( insn expr -- insn' )
: fold-shuffle-vector-imm ( insn expr -- insn' )
[ [ dst>> ] [ shuffle>> ] bi ] dip value>>
(fold-shuffle-vector) \ ##load-constant new-insn ;
(fold-shuffle-vector-imm) \ ##load-constant new-insn ;
M: ##shuffle-vector rewrite
M: ##shuffle-vector-imm rewrite
dup src>> vreg>expr {
{ [ dup shuffle-vector-expr? ] [ rewrite-shuffle-vector ] }
{ [ dup reference-expr? ] [ fold-shuffle-vector ] }
{ [ dup constant-expr? ] [ fold-shuffle-vector ] }
{ [ dup shuffle-vector-imm-expr? ] [ rewrite-shuffle-vector-imm ] }
{ [ dup reference-expr? ] [ fold-shuffle-vector-imm ] }
{ [ dup constant-expr? ] [ fold-shuffle-vector-imm ] }
[ 2drop f ]
} cond ;

View File

@ -136,7 +136,7 @@ M: scalar>vector-expr simplify*
[ drop f ]
} cond ;
M: shuffle-vector-expr simplify*
M: shuffle-vector-imm-expr simplify*
[ src>> ] [ shuffle>> ] [ rep>> rep-components iota ] tri
sequence= [ drop f ] unless ;

View File

@ -1215,31 +1215,31 @@ cell 8 = [
}
] [
{
T{ ##shuffle-vector f 1 0 { 0 1 2 3 } float-4-rep }
T{ ##shuffle-vector-imm f 1 0 { 0 1 2 3 } float-4-rep }
} value-numbering-step
] unit-test
[
{
T{ ##shuffle-vector f 1 0 { 1 2 3 0 } float-4-rep }
T{ ##shuffle-vector f 2 0 { 0 2 3 1 } float-4-rep }
T{ ##shuffle-vector-imm f 1 0 { 1 2 3 0 } float-4-rep }
T{ ##shuffle-vector-imm f 2 0 { 0 2 3 1 } float-4-rep }
}
] [
{
T{ ##shuffle-vector f 1 0 { 1 2 3 0 } float-4-rep }
T{ ##shuffle-vector f 2 1 { 3 1 2 0 } float-4-rep }
T{ ##shuffle-vector-imm f 1 0 { 1 2 3 0 } float-4-rep }
T{ ##shuffle-vector-imm f 2 1 { 3 1 2 0 } float-4-rep }
} value-numbering-step
] unit-test
[
{
T{ ##shuffle-vector f 1 0 { 1 2 3 0 } float-4-rep }
T{ ##shuffle-vector f 2 1 { 1 0 } double-2-rep }
T{ ##shuffle-vector-imm f 1 0 { 1 2 3 0 } float-4-rep }
T{ ##shuffle-vector-imm f 2 1 { 1 0 } double-2-rep }
}
] [
{
T{ ##shuffle-vector f 1 0 { 1 2 3 0 } float-4-rep }
T{ ##shuffle-vector f 2 1 { 1 0 } double-2-rep }
T{ ##shuffle-vector-imm f 1 0 { 1 2 3 0 } float-4-rep }
T{ ##shuffle-vector-imm f 2 1 { 1 0 } double-2-rep }
} value-numbering-step
] unit-test
@ -1253,7 +1253,7 @@ cell 8 = [
{
T{ ##load-constant f 0 $[ 55 tag-fixnum ] }
T{ ##scalar>vector f 1 0 int-4-rep }
T{ ##shuffle-vector f 2 1 { 0 0 0 0 } float-4-rep }
T{ ##shuffle-vector-imm f 2 1 { 0 0 0 0 } float-4-rep }
} value-numbering-step
] unit-test
@ -1267,7 +1267,7 @@ cell 8 = [
{
T{ ##load-constant f 0 1.25 }
T{ ##scalar>vector f 1 0 float-4-rep }
T{ ##shuffle-vector f 2 1 { 0 0 0 0 } float-4-rep }
T{ ##shuffle-vector-imm f 2 1 { 0 0 0 0 } float-4-rep }
} value-numbering-step
] unit-test

View File

@ -154,6 +154,7 @@ CODEGEN: ##zero-vector %zero-vector
CODEGEN: ##fill-vector %fill-vector
CODEGEN: ##gather-vector-2 %gather-vector-2
CODEGEN: ##gather-vector-4 %gather-vector-4
CODEGEN: ##shuffle-vector-imm %shuffle-vector-imm
CODEGEN: ##shuffle-vector %shuffle-vector
CODEGEN: ##tail>head-vector %tail>head-vector
CODEGEN: ##merge-vector-head %merge-vector-head

View File

@ -31,7 +31,8 @@ IN: compiler.tree.propagation.simd
(simd-vrshift)
(simd-hlshift)
(simd-hrshift)
(simd-vshuffle)
(simd-vshuffle-bytes)
(simd-vshuffle-elements)
(simd-(vmerge-head))
(simd-(vmerge-tail))
(simd-(v>float))

View File

@ -242,6 +242,7 @@ HOOK: %fill-vector cpu ( dst rep -- )
HOOK: %gather-vector-2 cpu ( dst src1 src2 rep -- )
HOOK: %gather-vector-4 cpu ( dst src1 src2 src3 src4 rep -- )
HOOK: %shuffle-vector cpu ( dst src shuffle rep -- )
HOOK: %shuffle-vector-imm cpu ( dst src shuffle rep -- )
HOOK: %tail>head-vector cpu ( dst src rep -- )
HOOK: %merge-vector-head cpu ( dst src1 src2 rep -- )
HOOK: %merge-vector-tail cpu ( dst src1 src2 rep -- )
@ -288,7 +289,9 @@ HOOK: %zero-vector-reps cpu ( -- reps )
HOOK: %fill-vector-reps cpu ( -- reps )
HOOK: %gather-vector-2-reps cpu ( -- reps )
HOOK: %gather-vector-4-reps cpu ( -- reps )
HOOK: %alien-vector-reps cpu ( -- reps )
HOOK: %shuffle-vector-reps cpu ( -- reps )
HOOK: %shuffle-vector-imm-reps cpu ( -- reps )
HOOK: %merge-vector-reps cpu ( -- reps )
HOOK: %signed-pack-vector-reps cpu ( -- reps )
HOOK: %unsigned-pack-vector-reps cpu ( -- reps )
@ -328,7 +331,9 @@ M: object %zero-vector-reps { } ;
M: object %fill-vector-reps { } ;
M: object %gather-vector-2-reps { } ;
M: object %gather-vector-4-reps { } ;
M: object %alien-vector-reps { } ;
M: object %shuffle-vector-reps { } ;
M: object %shuffle-vector-imm-reps { } ;
M: object %merge-vector-reps { } ;
M: object %signed-pack-vector-reps { } ;
M: object %unsigned-pack-vector-reps { } ;

View File

@ -432,8 +432,13 @@ M: x86 %epilogue ( n -- ) cell - incr-stack-reg ;
temp 0 MOV \ t rc-absolute-cell rel-immediate
dst temp word execute ; inline
: (%compare) ( src1 src2 cc -- )
2over [ { cc= cc/= } member? ] [ register? ] [ 0 = ] tri* and and
[ drop dup TEST ]
[ CMP ] if ;
M:: x86 %compare ( dst src1 src2 cc temp -- )
src1 src2 CMP
src1 src2 cc (%compare)
cc order-cc {
{ cc< [ dst temp \ CMOVL %boolean ] }
{ cc<= [ dst temp \ CMOVLE %boolean ] }
@ -447,7 +452,7 @@ M: x86 %compare-imm ( dst src1 src2 cc temp -- )
%compare ;
M:: x86 %compare-branch ( label src1 src2 cc -- )
src1 src2 CMP
src1 src2 cc (%compare)
cc order-cc {
{ cc< [ label JL ] }
{ cc<= [ label JLE ] }
@ -562,6 +567,12 @@ MACRO: available-reps ( alist -- )
reverse [ { } ] suffix
'[ _ cond ] ;
M: x86 %alien-vector-reps
{
{ sse? { float-4-rep } }
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
} available-reps ;
M: x86 %zero-vector
{
{ double-2-rep [ dup XORPD ] }
@ -673,11 +684,9 @@ M: x86 %gather-vector-2-reps
[ dupd SHUFPD ]
} case ;
: float-4-shuffle ( dst shuffle -- )
: sse1-float-4-shuffle ( dst shuffle -- )
{
{ { 0 1 2 3 } [ drop ] }
{ { 0 0 2 2 } [ dup MOVSLDUP ] }
{ { 1 1 3 3 } [ dup MOVSHDUP ] }
{ { 0 1 0 1 } [ dup MOVLHPS ] }
{ { 2 3 2 3 } [ dup MOVHLPS ] }
{ { 0 0 1 1 } [ dup UNPCKLPS ] }
@ -685,6 +694,15 @@ M: x86 %gather-vector-2-reps
[ dupd SHUFPS ]
} case ;
: float-4-shuffle ( dst shuffle -- )
sse3? [
{
{ { 0 0 2 2 } [ dup MOVSLDUP ] }
{ { 1 1 3 3 } [ dup MOVSHDUP ] }
[ sse1-float-4-shuffle ]
} case
] [ sse1-float-4-shuffle ] if ;
: int-4-shuffle ( dst shuffle -- )
{
{ { 0 1 2 3 } [ drop ] }
@ -698,7 +716,7 @@ M: x86 %gather-vector-2-reps
: longlong-2-shuffle ( dst shuffle -- )
first2 [ 2 * dup 1 + ] bi@ 4array int-4-shuffle ;
M:: x86 %shuffle-vector ( dst src shuffle rep -- )
M:: x86 %shuffle-vector-imm ( dst src shuffle rep -- )
dst src rep %copy
dst shuffle rep unsign-rep {
{ double-2-rep [ double-2-shuffle ] }
@ -707,12 +725,20 @@ M:: x86 %shuffle-vector ( dst src shuffle rep -- )
{ longlong-2-rep [ longlong-2-shuffle ] }
} case ;
M: x86 %shuffle-vector-reps
M: x86 %shuffle-vector-imm-reps
{
{ sse? { float-4-rep } }
{ sse2? { double-2-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
} available-reps ;
M: x86 %shuffle-vector ( dst src shuffle rep -- )
two-operand PSHUFB ;
M: x86 %shuffle-vector-reps
{
{ ssse3? { float-4-rep double-2-rep longlong-2-rep ulonglong-2-rep int-4-rep uint-4-rep short-8-rep ushort-8-rep char-16-rep uchar-16-rep } }
} available-reps ;
M: x86 %merge-vector-head
[ two-operand ] keep
unsign-rep {
@ -790,8 +816,6 @@ M: x86 %unpack-vector-head-reps ( -- reps )
{ sse4.1? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
} available-reps ;
M: x86 %unpack-vector-tail-reps ( -- reps ) { } ;
M: x86 %integer>float-vector ( dst src rep -- )
{
{ int-4-rep [ CVTDQ2PS ] }
@ -1037,10 +1061,6 @@ M: x86 %mul-vector-reps
{ sse4.1? { int-4-rep uint-4-rep } }
} available-reps ;
M: x86 %saturated-mul-vector-reps
! No multiplication with saturation on x86
{ } ;
M: x86 %div-vector ( dst src1 src2 rep -- )
[ two-operand ] keep
{
@ -1223,8 +1243,6 @@ M: x86 %xor-vector-reps
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
} available-reps ;
M: x86 %not-vector-reps { } ;
M: x86 %shl-vector ( dst src1 src2 rep -- )
[ two-operand ] keep
{
@ -1271,6 +1289,30 @@ M:: x86 %scalar>integer ( dst src rep -- )
{ uint-scalar-rep [
dst 32-bit-version-of src MOVD
] }
{ short-scalar-rep [
dst 32-bit-version-of src MOVD
dst dst 16-bit-version-of MOVSX
] }
{ ushort-scalar-rep [
dst 32-bit-version-of src MOVD
dst dst 16-bit-version-of MOVZX
] }
{ char-scalar-rep [
dst 32-bit-version-of src MOVD
dst { } 8 [| tmp-dst |
tmp-dst dst int-rep %copy
tmp-dst tmp-dst 8-bit-version-of MOVSX
dst tmp-dst int-rep %copy
] with-small-register
] }
{ uchar-scalar-rep [
dst 32-bit-version-of src MOVD
dst { } 8 [| tmp-dst |
tmp-dst dst int-rep %copy
tmp-dst tmp-dst 8-bit-version-of MOVZX
dst tmp-dst int-rep %copy
] with-small-register
] }
} case ;
M: x86 %vector>scalar %copy ;

View File

@ -60,7 +60,7 @@ MACRO: simd-boa ( rep class -- simd-array )
[ rep-components ] [ new ] [ '[ _ ] ] tri* swap replicate-as ; inline
: simd-with/nth-fast? ( rep -- ? )
[ \ (simd-vshuffle) supported-simd-op? ]
[ \ (simd-vshuffle-elements) supported-simd-op? ]
[ rep-component-type can-be-unboxed? ]
bi and ;
@ -184,6 +184,8 @@ WHERE
TUPLE: A
{ underlying byte-array read-only initial: $[ 16 <byte-array> ] } ;
INSTANCE: A simd-128
M: A clone underlying>> clone \ A boa ; inline
M: A length drop N ; inline
@ -315,7 +317,7 @@ SLOT: underlying2
class c:typedef ;
: (define-simd-256) ( simd -- )
simd-ops get { vshuffle hlshift hrshift } unique assoc-diff >>ops
simd-ops get { vshuffle-elements vshuffle-bytes hlshift hrshift } unique assoc-diff >>ops
[ define-simd ]
[ [ class>> ] [ rep>> ] bi define-simd-256-type ] bi ;
@ -362,6 +364,8 @@ TUPLE: A
{ underlying1 byte-array initial: $[ 16 <byte-array> ] read-only }
{ underlying2 byte-array initial: $[ 16 <byte-array> ] read-only } ;
INSTANCE: A simd-256
M: A clone
[ underlying1>> clone ] [ underlying2>> clone ] bi
\ A boa ; inline

View File

@ -67,7 +67,8 @@ SIMD-OP: vlshift
SIMD-OP: vrshift
SIMD-OP: hlshift
SIMD-OP: hrshift
SIMD-OP: vshuffle
SIMD-OP: vshuffle-elements
SIMD-OP: vshuffle-bytes
SIMD-OP: (vmerge-head)
SIMD-OP: (vmerge-tail)
SIMD-OP: v<=
@ -148,6 +149,9 @@ GENERIC# supported-simd-op? 1 ( rep intrinsic -- ? )
union
{ uchar-16-rep ushort-8-rep uint-4-rep ulonglong-2-rep } union ;
: (%shuffle-imm-reps) ( -- reps )
%shuffle-vector-reps %shuffle-vector-imm-reps union ;
M: vector-rep supported-simd-op?
{
{ \ (simd-v+) [ %add-vector-reps ] }
@ -179,7 +183,8 @@ M: vector-rep supported-simd-op?
{ \ (simd-vrshift) [ %shr-vector-reps ] }
{ \ (simd-hlshift) [ %horizontal-shl-vector-reps ] }
{ \ (simd-hrshift) [ %horizontal-shr-vector-reps ] }
{ \ (simd-vshuffle) [ %shuffle-vector-reps ] }
{ \ (simd-vshuffle-elements) [ (%shuffle-imm-reps) ] }
{ \ (simd-vshuffle-bytes) [ %shuffle-vector-reps ] }
{ \ (simd-(vmerge-head)) [ %merge-vector-reps ] }
{ \ (simd-(vmerge-tail)) [ %merge-vector-reps ] }
{ \ (simd-(v>float)) [ %integer>float-vector-reps ] }

View File

@ -41,7 +41,21 @@ $nl
POSTPONE: SIMD:
POSTPONE: SIMDS:
}
"The following vector types are supported:"
"The following scalar types are supported:"
{ $code
"char"
"uchar"
"short"
"ushort"
"int"
"uint"
"longlong"
"ulonglong"
"float"
"double"
}
"The following vector types are generated from the above scalar types:"
{ $code
"char-16"
"uchar-16"
@ -89,6 +103,7 @@ $nl
{ $code
"""USING: compiler.tree.debugger math.vectors
math.vectors.simd ;
SIMD: double
SYMBOLS: x y ;
[
@ -107,7 +122,7 @@ IN: simd-demo
{ float-4 float-4 float-4 } declare
[ v* ] [ [ 1.0 ] dip n-v v* ] bi-curry* bi v+ ;
\ interpolate optimizer-report.""" }
\\ interpolate optimizer-report.""" }
"Note that using " { $link declare } " is not recommended. Safer ways of getting type information for the input parameters to a word include defining methods on a generic word (the value being dispatched upon has a statically known type in the method body), as well as using " { $link "hints" } " and " { $link POSTPONE: inline } " declarations."
$nl
"Here is a better version of the " { $snippet "interpolate" } " words above that uses hints:"
@ -122,7 +137,7 @@ IN: simd-demo
HINTS: interpolate float-4 float-4 float-4 ;
\ interpolate optimizer-report. """ }
\\ interpolate optimizer-report. """ }
"This time, the optimizer report lists calls to both SIMD primitives and high-level vector words, because hints cause two code paths to be generated. The " { $snippet "optimized." } " word can be used to make sure that the fast code path consists entirely of calls to primitives."
$nl
"If the " { $snippet "interpolate" } " word was to be used in several places with different types of vectors, it would be best to declare it " { $link POSTPONE: inline } "."
@ -153,13 +168,13 @@ M: actor advance ( dt actor -- )
[ >float ] dip
[ update-velocity ] [ update-position ] 2bi ;
M\ actor advance optimized."""
M\\ actor advance optimized."""
}
"The " { $vocab-link "compiler.cfg.debugger" } " vocabulary can give a lower-level picture of the generated code, that includes register assignments and other low-level details. To look at low-level optimizer output, call " { $snippet "test-mr mr." } " on a word or quotation:"
{ $code
"""USE: compiler.tree.debugger
M\ actor advance test-mr mr.""" }
M\\ actor advance test-mr mr.""" }
"An example of a high-performance algorithm that uses SIMD primitives can be found in the " { $vocab-link "benchmark.nbody-simd" } " vocabulary." ;
ARTICLE: "math.vectors.simd.intrinsics" "Low-level SIMD primitives"
@ -206,7 +221,7 @@ ARTICLE: "math.vectors.simd" "Hardware vector arithmetic (SIMD)"
HELP: SIMD:
{ $syntax "SIMD: type" }
{ $values { "type" "a scalar C type" } }
{ $description "Defines 128-bit and 256-bit SIMD arrays for holding elements of " { $snippet "type" } " into the vocabulary search path. The possible type/length combinations are listed in " { $link "math.vectors.simd.types" } " and the generated words are documented in " { $link "math.vectors.simd.words" } "." } ;
{ $description "Defines 128-bit and 256-bit SIMD arrays for holding elements of " { $snippet "type" } " into the vocabulary search path. The allowed scalar types, and the auto-generated type/length vector combinations that result, are listed in " { $link "math.vectors.simd.types" } ". Generated words are documented in " { $link "math.vectors.simd.words" } "." } ;
HELP: SIMDS:
{ $syntax "SIMDS: type type type ... ;" }

View File

@ -174,7 +174,7 @@ CONSTANT: simd-classes
: remove-special-words ( alist -- alist' )
! These have their own tests later
{
hlshift hrshift vshuffle vbroadcast
hlshift hrshift vshuffle-bytes vshuffle-elements vbroadcast
vany? vall? vnone?
(v>float) (v>integer)
(vpack-signed) (vpack-unsigned)
@ -360,6 +360,23 @@ simd-classes [
] unit-test
] each
"== Checking variable shuffles" print
: random-shift-vector ( class -- vec )
new [ drop 16 random ] map ;
:: test-shift-vector ( class -- ? )
class random-int-vector :> src
char-16 random-shift-vector :> perm
{ class char-16 } :> decl
src perm vshuffle
src perm [ decl declare vshuffle ] compile-call
= ; inline
{ char-16 uchar-16 short-8 ushort-8 int-4 uint-4 longlong-2 ulonglong-2 }
[ 10 swap '[ [ t ] [ _ test-shift-vector ] unit-test ] times ] each
"== Checking vector tests" print
:: test-vector-tests-bool ( vector declaration -- none? any? all? )
@ -512,38 +529,38 @@ SYMBOL: !!inconsistent!!
STRUCT: simd-struct
{ x float-4 }
{ y double-2 }
{ y longlong-2 }
{ z double-4 }
{ w float-8 } ;
{ w int-8 } ;
[ t ] [ [ simd-struct <struct> ] compile-call >c-ptr [ 0 = ] all? ] unit-test
[
float-4{ 1 2 3 4 }
double-2{ 2 1 }
longlong-2{ 2 1 }
double-4{ 4 3 2 1 }
float-8{ 1 2 3 4 5 6 7 8 }
int-8{ 1 2 3 4 5 6 7 8 }
] [
simd-struct <struct>
float-4{ 1 2 3 4 } >>x
double-2{ 2 1 } >>y
longlong-2{ 2 1 } >>y
double-4{ 4 3 2 1 } >>z
float-8{ 1 2 3 4 5 6 7 8 } >>w
int-8{ 1 2 3 4 5 6 7 8 } >>w
{ [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave
] unit-test
[
float-4{ 1 2 3 4 }
double-2{ 2 1 }
longlong-2{ 2 1 }
double-4{ 4 3 2 1 }
float-8{ 1 2 3 4 5 6 7 8 }
int-8{ 1 2 3 4 5 6 7 8 }
] [
[
simd-struct <struct>
float-4{ 1 2 3 4 } >>x
double-2{ 2 1 } >>y
longlong-2{ 2 1 } >>y
double-4{ 4 3 2 1 } >>z
float-8{ 1 2 3 4 5 6 7 8 } >>w
int-8{ 1 2 3 4 5 6 7 8 } >>w
{ [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave
] compile-call
] unit-test

View File

@ -98,7 +98,8 @@ H{
{ vrshift { +vector+ +scalar+ -> +vector+ } }
{ hlshift { +vector+ +literal+ -> +vector+ } }
{ hrshift { +vector+ +literal+ -> +vector+ } }
{ vshuffle { +vector+ +literal+ -> +vector+ } }
{ vshuffle-elements { +vector+ +literal+ -> +vector+ } }
{ vshuffle-bytes { +vector+ +vector+ -> +vector+ } }
{ vbroadcast { +vector+ +literal+ -> +vector+ } }
{ (vmerge-head) { +vector+ +vector+ -> +vector+ } }
{ (vmerge-tail) { +vector+ +vector+ -> +vector+ } }
@ -162,7 +163,7 @@ ERROR: bad-vector-word word ;
} cond
! Don't specialize horizontal shifts, shuffles, and conversions at all, they're only for SIMD
{
hlshift hrshift vshuffle vbroadcast
hlshift hrshift vshuffle-elements vshuffle-bytes vbroadcast
(v>integer) (v>float)
(vpack-signed) (vpack-unsigned)
(vunpack-head) (vunpack-tail)

View File

@ -6,6 +6,9 @@ locals ;
QUALIFIED-WITH: alien.c-types c
IN: math.vectors
MIXIN: simd-128
MIXIN: simd-256
GENERIC: element-type ( obj -- c-type )
M: object element-type drop f ; inline
@ -83,7 +86,20 @@ PRIVATE>
: vbitnot ( u -- w ) dup '[ _ [ bitnot ] fp-bitwise-unary ] map ;
:: vbroadcast ( u n -- v ) u length n u nth <repetition> u like ;
: vshuffle ( u perm -- v ) swap [ '[ _ nth ] ] keep map-as ;
: vshuffle-elements ( u perm -- v )
swap [ '[ _ nth ] ] keep map-as ;
: vshuffle-bytes ( u perm -- v )
underlying>> [
swap [ '[ _ nth ] ] keep map-as
] curry change-underlying ;
GENERIC: vshuffle ( u perm -- v )
M: array vshuffle ( u perm -- v )
vshuffle-elements ; inline
M: simd-128 vshuffle ( u perm -- v )
vshuffle-bytes ; inline
: vlshift ( u n -- w ) '[ _ shift ] map ;
: vrshift ( u n -- w ) neg '[ _ shift ] map ;
@ -107,9 +123,9 @@ PRIVATE>
: vxor ( u v -- w ) over '[ [ _ element>bool ] bi@ xor ] 2map ;
: vnot ( u -- w ) dup '[ _ element>bool not ] map ;
: vall? ( v -- ? ) [ ] all? ;
: vany? ( v -- ? ) [ ] any? ;
: vnone? ( v -- ? ) [ not ] all? ;
: vall? ( v -- ? ) dup '[ _ element>bool ] all? ;
: vany? ( v -- ? ) dup '[ _ element>bool ] any? ;
: vnone? ( v -- ? ) dup '[ _ element>bool not ] all? ;
: v< ( u v -- w ) [ < ] 2map ;
: v<= ( u v -- w ) [ <= ] 2map ;

View File

@ -2,7 +2,8 @@
! See http://factorcode.org/license.txt for BSD license.
USING: accessors alien alien.c-types alien.data alien.parser
assocs byte-arrays classes compiler.units functors kernel lexer
libc math math.vectors math.vectors.specialization namespaces
libc math math.vectors math.vectors.private
math.vectors.specialization namespaces
parser prettyprint.custom sequences sequences.private strings
summary vocabs vocabs.loader vocabs.parser vocabs.generated
words fry combinators present ;
@ -68,6 +69,8 @@ TUPLE: A
[ drop \ T bad-byte-array-length ] unless
<direct-A> ; inline
M: A new-underlying drop byte-array>A ;
M: A clone [ underlying>> clone ] [ length>> ] bi <direct-A> ; inline
M: A length length>> ; inline

View File

@ -129,17 +129,18 @@ ARTICLE: "tuple-inheritance-example" "Tuple subclassing example"
}
"Rectangles and parallelograms use the same algorithm for computing the area, whereas they use different algorithms for computing perimiter. Also, rectangles and parallelograms both have " { $snippet "width" } " and " { $snippet "height" } " slots. We can exploit this with subclassing:"
{ $code
"USING: accessors kernel math math.constants math.functions ;"
"GENERIC: area ( shape -- n )"
"GENERIC: perimiter ( shape -- n )"
""
"TUPLE: shape ;"
""
"TUPLE: circle < shape radius ;"
"M: area circle radius>> sq pi * ;"
"M: perimiter circle radius>> 2 * pi * ;"
"M: circle area radius>> sq pi * ;"
"M: circle perimiter radius>> 2 * pi * ;"
""
"TUPLE: quad < shape width height"
"M: area quad [ width>> ] [ height>> ] bi * ;"
"TUPLE: quad < shape width height ;"
"M: quad area [ width>> ] [ height>> ] bi * ;"
""
"TUPLE: rectangle < quad ;"
"M: rectangle perimiter [ width>> 2 * ] [ height>> 2 * ] bi + ;"

View File

@ -52,10 +52,10 @@ IN: math.matrices.simd.tests
[
S{ matrix4 f
float-4-array{
float-4{ 1.0 0.0 0.0 3.0 }
float-4{ 0.0 1.0 0.0 4.0 }
float-4{ 0.0 0.0 1.0 2.0 }
float-4{ 0.0 0.0 0.0 1.0 }
float-4{ 1.0 0.0 0.0 0.0 }
float-4{ 0.0 1.0 0.0 0.0 }
float-4{ 0.0 0.0 1.0 0.0 }
float-4{ 3.0 4.0 2.0 1.0 }
}
}
] [ float-4{ 3.0 4.0 2.0 0.0 } translation-matrix4 ] unit-test
@ -77,9 +77,9 @@ IN: math.matrices.simd.tests
float-4{ 0.0 1.0 0.0 1.0 } pi 1/2. * rotation-matrix4
S{ matrix4 f
float-4-array{
float-4{ 0.0 0.0 1.0 0.0 }
float-4{ 0.0 0.0 -1.0 0.0 }
float-4{ 0.0 1.0 0.0 0.0 }
float-4{ -1.0 0.0 0.0 0.0 }
float-4{ 1.0 0.0 0.0 0.0 }
float-4{ 0.0 0.0 0.0 1.0 }
}
}
@ -89,10 +89,10 @@ IN: math.matrices.simd.tests
[
S{ matrix4 f
float-4-array{
float-4{ 2.0 0.0 0.0 10.0 }
float-4{ 0.0 3.0 0.0 18.0 }
float-4{ 0.0 0.0 4.0 28.0 }
float-4{ 0.0 0.0 0.0 1.0 }
float-4{ 2.0 0.0 0.0 0.0 }
float-4{ 0.0 3.0 0.0 0.0 }
float-4{ 0.0 0.0 4.0 0.0 }
float-4{ 10.0 18.0 28.0 1.0 }
}
}
] [
@ -106,10 +106,10 @@ IN: math.matrices.simd.tests
}
S{ matrix4 f
float-4-array{
float-4{ 1.0 0.0 0.0 5.0 }
float-4{ 0.0 1.0 0.0 6.0 }
float-4{ 0.0 0.0 1.0 7.0 }
float-4{ 0.0 0.0 0.0 1.0 }
float-4{ 1.0 0.0 0.0 0.0 }
float-4{ 0.0 1.0 0.0 0.0 }
float-4{ 0.0 0.0 1.0 0.0 }
float-4{ 5.0 6.0 7.0 1.0 }
}
}
m4.
@ -118,10 +118,10 @@ IN: math.matrices.simd.tests
[
S{ matrix4 f
float-4-array{
float-4{ 3.0 0.0 0.0 5.0 }
float-4{ 0.0 4.0 0.0 6.0 }
float-4{ 0.0 0.0 5.0 7.0 }
float-4{ 0.0 0.0 0.0 2.0 }
float-4{ 3.0 0.0 0.0 0.0 }
float-4{ 0.0 4.0 0.0 0.0 }
float-4{ 0.0 0.0 5.0 0.0 }
float-4{ 5.0 6.0 7.0 2.0 }
}
}
] [
@ -135,10 +135,10 @@ IN: math.matrices.simd.tests
}
S{ matrix4 f
float-4-array{
float-4{ 1.0 0.0 0.0 5.0 }
float-4{ 0.0 1.0 0.0 6.0 }
float-4{ 0.0 0.0 1.0 7.0 }
float-4{ 0.0 0.0 0.0 1.0 }
float-4{ 1.0 0.0 0.0 0.0 }
float-4{ 0.0 1.0 0.0 0.0 }
float-4{ 0.0 0.0 1.0 0.0 }
float-4{ 5.0 6.0 7.0 1.0 }
}
}
m4+
@ -147,10 +147,10 @@ IN: math.matrices.simd.tests
[
S{ matrix4 f
float-4-array{
float-4{ 1.0 0.0 0.0 -5.0 }
float-4{ 0.0 2.0 0.0 -6.0 }
float-4{ 0.0 0.0 3.0 -7.0 }
float-4{ 0.0 0.0 0.0 0.0 }
float-4{ 1.0 0.0 0.0 0.0 }
float-4{ 0.0 2.0 0.0 0.0 }
float-4{ 0.0 0.0 3.0 0.0 }
float-4{ -5.0 -6.0 -7.0 0.0 }
}
}
] [
@ -164,10 +164,10 @@ IN: math.matrices.simd.tests
}
S{ matrix4 f
float-4-array{
float-4{ 1.0 0.0 0.0 5.0 }
float-4{ 0.0 1.0 0.0 6.0 }
float-4{ 0.0 0.0 1.0 7.0 }
float-4{ 0.0 0.0 0.0 1.0 }
float-4{ 1.0 0.0 0.0 0.0 }
float-4{ 0.0 1.0 0.0 0.0 }
float-4{ 0.0 0.0 1.0 0.0 }
float-4{ 5.0 6.0 7.0 1.0 }
}
}
m4-
@ -219,10 +219,10 @@ IN: math.matrices.simd.tests
[
S{ matrix4 f
float-4-array{
float-4{ 1/2. 0.0 0.0 0.0 }
float-4{ 0.0 1/2. 0.0 0.0 }
float-4{ 0.0 0.0 -6/4. -10/4. }
float-4{ 0.0 0.0 -1.0 0.0 }
float-4{ 1/2. 0.0 0.0 0.0 }
float-4{ 0.0 1/2. 0.0 0.0 }
float-4{ 0.0 0.0 -6/4. -1.0 }
float-4{ 0.0 0.0 -10/4. 0.0 }
}
}
] [

View File

@ -9,34 +9,34 @@ SPECIALIZED-ARRAY: float-4
IN: math.matrices.simd
STRUCT: matrix4
{ rows float-4[4] } ;
{ columns float-4[4] } ;
INSTANCE: matrix4 immutable-sequence
M: matrix4 length drop 4 ; inline
M: matrix4 nth-unsafe rows>> nth-unsafe ; inline
M: matrix4 nth-unsafe columns>> nth-unsafe ; inline
M: matrix4 new-sequence 2drop matrix4 (struct) ; inline
<PRIVATE
: rows ( a -- a1 a2 a3 a4 )
rows>> 4 firstn ; inline
: columns ( a -- a1 a2 a3 a4 )
columns>> 4 firstn ; inline
:: set-rows ( c1 c2 c3 c4 c -- c )
c rows>> :> rows
c1 rows set-first
c2 rows set-second
c3 rows set-third
c4 rows set-fourth
:: set-columns ( c1 c2 c3 c4 c -- c )
c columns>> :> columns
c1 columns set-first
c2 columns set-second
c3 columns set-third
c4 columns set-fourth
c ; inline
: make-matrix4 ( quot: ( -- c1 c2 c3 c4 ) -- c )
matrix4 (struct) swap dip set-rows ; inline
matrix4 (struct) swap dip set-columns ; inline
:: 2map-rows ( a b quot -- c )
:: 2map-columns ( a b quot -- c )
[
a rows :> a4 :> a3 :> a2 :> a1
b rows :> b4 :> b3 :> b2 :> b1
a columns :> a4 :> a3 :> a2 :> a1
b columns :> b4 :> b3 :> b2 :> b1
a1 b1 quot call
a2 b2 quot call
@ -44,57 +44,57 @@ M: matrix4 new-sequence 2drop matrix4 (struct) ; inline
a4 b4 quot call
] make-matrix4 ; inline
: map-rows ( a quot -- c )
'[ rows _ 4 napply ] make-matrix4 ; inline
: map-columns ( a quot -- c )
'[ columns _ 4 napply ] make-matrix4 ; inline
PRIVATE>
TYPED: m4+ ( a: matrix4 b: matrix4 -- c: matrix4 ) [ v+ ] 2map-rows ;
TYPED: m4- ( a: matrix4 b: matrix4 -- c: matrix4 ) [ v- ] 2map-rows ;
TYPED: m4* ( a: matrix4 b: matrix4 -- c: matrix4 ) [ v* ] 2map-rows ;
TYPED: m4/ ( a: matrix4 b: matrix4 -- c: matrix4 ) [ v/ ] 2map-rows ;
TYPED: m4+ ( a: matrix4 b: matrix4 -- c: matrix4 ) [ v+ ] 2map-columns ;
TYPED: m4- ( a: matrix4 b: matrix4 -- c: matrix4 ) [ v- ] 2map-columns ;
TYPED: m4* ( a: matrix4 b: matrix4 -- c: matrix4 ) [ v* ] 2map-columns ;
TYPED: m4/ ( a: matrix4 b: matrix4 -- c: matrix4 ) [ v/ ] 2map-columns ;
TYPED: m4*n ( a: matrix4 b: float -- c: matrix4 ) [ v*n ] curry map-rows ;
TYPED: m4/n ( a: matrix4 b: float -- c: matrix4 ) [ v/n ] curry map-rows ;
TYPED: n*m4 ( a: float b: matrix4 -- c: matrix4 ) [ n*v ] with map-rows ;
TYPED: n/m4 ( a: float b: matrix4 -- c: matrix4 ) [ n/v ] with map-rows ;
TYPED: m4*n ( a: matrix4 b: float -- c: matrix4 ) [ v*n ] curry map-columns ;
TYPED: m4/n ( a: matrix4 b: float -- c: matrix4 ) [ v/n ] curry map-columns ;
TYPED: n*m4 ( a: float b: matrix4 -- c: matrix4 ) [ n*v ] with map-columns ;
TYPED: n/m4 ( a: float b: matrix4 -- c: matrix4 ) [ n/v ] with map-columns ;
TYPED:: m4. ( a: matrix4 b: matrix4 -- c: matrix4 )
[
a rows :> a4 :> a3 :> a2 :> a1
b rows :> b4 :> b3 :> b2 :> b1
a columns :> a4 :> a3 :> a2 :> a1
b columns :> b4 :> b3 :> b2 :> b1
a1 first b1 n*v :> c1a
a2 first b1 n*v :> c2a
a3 first b1 n*v :> c3a
a4 first b1 n*v :> c4a
b1 first a1 n*v :> c1a
b2 first a1 n*v :> c2a
b3 first a1 n*v :> c3a
b4 first a1 n*v :> c4a
a1 second b2 n*v c1a v+ :> c1b
a2 second b2 n*v c2a v+ :> c2b
a3 second b2 n*v c3a v+ :> c3b
a4 second b2 n*v c4a v+ :> c4b
b1 second a2 n*v c1a v+ :> c1b
b2 second a2 n*v c2a v+ :> c2b
b3 second a2 n*v c3a v+ :> c3b
b4 second a2 n*v c4a v+ :> c4b
a1 third b3 n*v c1b v+ :> c1c
a2 third b3 n*v c2b v+ :> c2c
a3 third b3 n*v c3b v+ :> c3c
a4 third b3 n*v c4b v+ :> c4c
b1 third a3 n*v c1b v+ :> c1c
b2 third a3 n*v c2b v+ :> c2c
b3 third a3 n*v c3b v+ :> c3c
b4 third a3 n*v c4b v+ :> c4c
a1 fourth b4 n*v c1c v+
a2 fourth b4 n*v c2c v+
a3 fourth b4 n*v c3c v+
a4 fourth b4 n*v c4c v+
b1 fourth a4 n*v c1c v+
b2 fourth a4 n*v c2c v+
b3 fourth a4 n*v c3c v+
b4 fourth a4 n*v c4c v+
] make-matrix4 ;
TYPED:: v.m4 ( a: float-4 b: matrix4 -- c: float-4 )
b rows :> b4 :> b3 :> b2 :> b1
TYPED:: m4.v ( m: matrix4 v: float-4 -- v': float-4 )
m columns :> m4 :> m3 :> m2 :> m1
a first b1 n*v
a second b2 n*v v+
a third b3 n*v v+
a fourth b4 n*v v+ ;
v first m1 n*v
v second m2 n*v v+
v third m3 n*v v+
v fourth m4 n*v v+ ;
TYPED:: m4.v ( a: matrix4 b: float-4 -- c: float-4 )
a rows [ b v. ] 4 napply float-4-boa ;
TYPED:: v.m4 ( v: float-4 m: matrix4 -- c: float-4 )
m columns [ v v. ] 4 napply float-4-boa ;
CONSTANT: identity-matrix4
S{ matrix4 f
@ -131,37 +131,37 @@ TYPED: diagonal-matrix4 ( diagonal: float-4 -- matrix: matrix4 )
[ (vmerge) ] bi-curry@ bi* ; inline
TYPED: transpose-matrix4 ( matrix: matrix4 -- matrix: matrix4 )
[ rows vmerge-transpose vmerge-transpose ] make-matrix4 ;
[ columns vmerge-transpose vmerge-transpose ] make-matrix4 ;
: linear>homogeneous ( v -- v' )
[ float-4{ t t t f } ] dip float-4{ 0.0 0.0 0.0 1.0 } v? ; inline
: scale-matrix4 ( factors -- matrix )
[ float-4{ t t t f } ] dip float-4{ 0.0 0.0 0.0 1.0 } v?
diagonal-matrix4 ; inline
linear>homogeneous diagonal-matrix4 ; inline
: ortho-matrix4 ( factors -- matrix )
float-4{ 1.0 1.0 1.0 1.0 } swap v/ scale-matrix4 ; inline
TYPED:: translation-matrix4 ( offset: float-4 -- matrix: matrix4 )
TYPED: translation-matrix4 ( offset: float-4 -- matrix: matrix4 )
[
float-4{ 1.0 1.0 1.0 1.0 } :> diagonal
offset 0 float-4-with (vmerge)
[ 0 float-4-with swap (vmerge) ] bi@ drop :> z :> y :> x
diagonal y vmerge-diagonal*
[ x vmerge-diagonal* ]
[ z vmerge-diagonal* ] bi*
linear>homogeneous
[
float-4{ 1.0 0.0 0.0 0.0 }
float-4{ 0.0 1.0 0.0 0.0 }
float-4{ 0.0 0.0 1.0 0.0 }
] dip
] make-matrix4 ;
TYPED:: rotation-matrix4 ( axis: float-4 theta: float -- matrix: matrix4 )
! x*x + c*(1.0 - x*x) x*y*(1.0 - c) - s*z x*z*(1.0 - c) + s*y 0
! x*y*(1.0 - c) + s*z y*y + c*(1.0 - y*y) y*z*(1.0 - c) - s*x 0
! x*z*(1.0 - c) - s*y y*z*(1.0 - c) + s*x z*z + c*(1.0 - z*z) 0
! x*x + c*(1.0 - x*x) x*y*(1.0 - c) + s*z x*z*(1.0 - c) - s*y 0
! x*y*(1.0 - c) - s*z y*y + c*(1.0 - y*y) y*z*(1.0 - c) + s*x 0
! x*z*(1.0 - c) + s*y y*z*(1.0 - c) - s*x z*z + c*(1.0 - z*z) 0
! 0 0 0 1
matrix4 (struct) :> triangle-m
theta cos :> c
theta sin :> s
float-4{ 1.0 -1.0 1.0 0.0 } :> triangle-sign
float-4{ -1.0 1.0 -1.0 0.0 } :> triangle-sign
c float-4-with :> cc
s float-4-with :> ss
@ -184,7 +184,7 @@ TYPED:: rotation-matrix4 ( axis: float-4 theta: float -- matrix: matrix4 )
triangle-lo { 1 0 3 3 } vshuffle
float-4 new
triangle-m set-rows drop
triangle-m set-columns drop
diagonal-m triangle-m m4+ ;
@ -194,8 +194,10 @@ TYPED:: frustum-matrix4 ( xy: float-4 near: float far: float -- matrix: matrix4
float-4{ t t f f } xy near far - float-4-with v? ! denom
v/ :> fov
fov 0.0 float-4-with (vmerge-head) vmerge-diagonal
fov float-4{ f f t t } vand
float-4{ 0.0 0.0 -1.0 0.0 }
float-4{ 0.0 -1.0 0.0 0.0 } :> negone
fov vmerge-diagonal
[ vmerge-diagonal ]
[ negone (vmerge) ] bi*
] make-matrix4 ;