From 3bc097f6ff9388924faa41c2a12a965bc5d2dd50 Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Fri, 9 Oct 2009 20:46:52 -0500 Subject: [PATCH 01/13] rename ##shuffle-vector to ##shuffle-vector-imm, and add a new ##shuffle-vector for dynamic shuffles. have vshuffle use ##shuffle-vector to do word and byte shuffles on x86 --- .../cfg/instructions/instructions.factor | 5 ++ .../compiler/cfg/intrinsics/simd/simd.factor | 47 +++++++++++++++---- .../value-numbering/rewrite/rewrite.factor | 18 +++---- .../value-numbering/simplify/simplify.factor | 2 +- .../value-numbering-tests.factor | 22 ++++----- basis/compiler/codegen/codegen.factor | 1 + basis/cpu/architecture/architecture.factor | 3 ++ basis/cpu/x86/x86.factor | 43 +++++++++++++---- .../vectors/simd/intrinsics/intrinsics.factor | 5 +- 9 files changed, 105 insertions(+), 41 deletions(-) diff --git a/basis/compiler/cfg/instructions/instructions.factor b/basis/compiler/cfg/instructions/instructions.factor index 57d88a2d86..119af6d0b1 100644 --- a/basis/compiler/cfg/instructions/instructions.factor +++ b/basis/compiler/cfg/instructions/instructions.factor @@ -277,6 +277,11 @@ literal: rep ; PURE-INSN: ##shuffle-vector def: dst +use: src shuffle +literal: rep ; + +PURE-INSN: ##shuffle-vector-imm +def: dst use: src literal: shuffle rep ; diff --git a/basis/compiler/cfg/intrinsics/simd/simd.factor b/basis/compiler/cfg/intrinsics/simd/simd.factor index 7607d69e45..3f7530caca 100644 --- a/basis/compiler/cfg/intrinsics/simd/simd.factor +++ b/basis/compiler/cfg/intrinsics/simd/simd.factor @@ -1,15 +1,15 @@ ! Copyright (C) 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: accessors byte-arrays fry cpu.architecture kernel math -sequences math.vectors.simd.intrinsics macros generalizations -combinators combinators.short-circuit arrays locals +USING: accessors alien byte-arrays fry cpu.architecture kernel math +sequences math.vectors math.vectors.simd.intrinsics macros +generalizations combinators combinators.short-circuit arrays locals compiler.tree.propagation.info compiler.cfg.builder.blocks compiler.cfg.comparisons compiler.cfg.stacks compiler.cfg.stacks.local compiler.cfg.hats compiler.cfg.instructions compiler.cfg.registers compiler.cfg.intrinsics.alien specialized-arrays ; -FROM: alien.c-types => float double ; +FROM: alien.c-types => heap-size char uchar float double ; SPECIALIZED-ARRAYS: float double ; IN: compiler.cfg.intrinsics.simd @@ -21,7 +21,7 @@ MACRO: check-elements ( quots -- ) MACRO: if-literals-match ( quots -- ) [ length ] [ ] [ length ] tri - ! n quots n n + ! n quots n '[ ! node quot [ @@ -75,17 +75,46 @@ MACRO: if-literals-match ( quots -- ) ds-push ] emit-vector-op ; -: shuffle? ( obj -- ? ) { [ array? ] [ [ integer? ] all? ] } 1&& ; +: variable-shuffle? ( obj -- ? ) + ! the vshuffle intrinsic current doesn't allow variable shuffles + drop f ; + +: immediate-shuffle? ( obj -- ? ) { [ array? ] [ [ integer? ] all? ] } 1&& ; + +: shuffle? ( obj -- ? ) { [ variable-shuffle? ] [ immediate-shuffle? ] } 1|| ; + +: (>variable-shuffle) ( shuffle rep -- shuffle ) + rep-component-type heap-size + [ dup >byte-array ] + [ iota >byte-array ] bi + '[ _ n*v _ v+ ] map concat ; + +: >variable-shuffle ( shuffle rep -- shuffle' ) + over immediate-shuffle? [ (>variable-shuffle) ] [ drop ] if ; + +: generate-shuffle-vector-imm? ( shuffle rep -- ? ) + { + [ drop immediate-shuffle? ] + [ nip %shuffle-vector-imm-reps member? ] + } 2&& ; + +: generate-shuffle-vector ( src shuffle rep -- dst ) + 2dup generate-shuffle-vector-imm? + [ ^^shuffle-vector-imm ] + [ + [ >variable-shuffle ^^load-constant ] keep + ^^shuffle-vector + ] if ; : emit-shuffle-vector ( node -- ) - ! Pad the permutation with zeroes if its too short, since we + ! Pad the permutation with zeroes if it's too short, since we ! can't throw an error at this point. - [ [ rep-components 0 pad-tail ] keep ^^shuffle-vector ] [unary/param] + [ [ rep-components 0 pad-tail ] keep generate-shuffle-vector ] [unary/param] { [ shuffle? ] [ representation? ] } if-literals-match ; : ^^broadcast-vector ( src n rep -- dst ) [ rep-components swap ] keep - ^^shuffle-vector ; + generate-shuffle-vector ; : emit-broadcast-vector ( node -- ) [ ^^broadcast-vector ] [unary/param] diff --git a/basis/compiler/cfg/value-numbering/rewrite/rewrite.factor b/basis/compiler/cfg/value-numbering/rewrite/rewrite.factor index 4a63777019..3842942a3b 100755 --- a/basis/compiler/cfg/value-numbering/rewrite/rewrite.factor +++ b/basis/compiler/cfg/value-numbering/rewrite/rewrite.factor @@ -450,26 +450,26 @@ M: ##set-alien-vector rewrite rewrite-alien-addressing ; ! Some lame constant folding for SIMD intrinsics. Eventually this ! should be redone completely. -: rewrite-shuffle-vector ( insn expr -- insn' ) +: rewrite-shuffle-vector-imm ( insn expr -- insn' ) 2dup [ rep>> ] bi@ eq? [ [ [ dst>> ] [ src>> vn>vreg ] bi* ] [ [ shuffle>> ] bi@ nths ] [ drop rep>> ] - 2tri \ ##shuffle-vector new-insn + 2tri \ ##shuffle-vector-imm new-insn ] [ 2drop f ] if ; -: (fold-shuffle-vector) ( shuffle bytes -- bytes' ) +: (fold-shuffle-vector-imm) ( shuffle bytes -- bytes' ) 2dup length swap length /i group nths concat ; -: fold-shuffle-vector ( insn expr -- insn' ) +: fold-shuffle-vector-imm ( insn expr -- insn' ) [ [ dst>> ] [ shuffle>> ] bi ] dip value>> - (fold-shuffle-vector) \ ##load-constant new-insn ; + (fold-shuffle-vector-imm) \ ##load-constant new-insn ; -M: ##shuffle-vector rewrite +M: ##shuffle-vector-imm rewrite dup src>> vreg>expr { - { [ dup shuffle-vector-expr? ] [ rewrite-shuffle-vector ] } - { [ dup reference-expr? ] [ fold-shuffle-vector ] } - { [ dup constant-expr? ] [ fold-shuffle-vector ] } + { [ dup shuffle-vector-imm-expr? ] [ rewrite-shuffle-vector-imm ] } + { [ dup reference-expr? ] [ fold-shuffle-vector-imm ] } + { [ dup constant-expr? ] [ fold-shuffle-vector-imm ] } [ 2drop f ] } cond ; diff --git a/basis/compiler/cfg/value-numbering/simplify/simplify.factor b/basis/compiler/cfg/value-numbering/simplify/simplify.factor index c2026a9483..df3dc6aab9 100644 --- a/basis/compiler/cfg/value-numbering/simplify/simplify.factor +++ b/basis/compiler/cfg/value-numbering/simplify/simplify.factor @@ -136,7 +136,7 @@ M: scalar>vector-expr simplify* [ drop f ] } cond ; -M: shuffle-vector-expr simplify* +M: shuffle-vector-imm-expr simplify* [ src>> ] [ shuffle>> ] [ rep>> rep-components iota ] tri sequence= [ drop f ] unless ; diff --git a/basis/compiler/cfg/value-numbering/value-numbering-tests.factor b/basis/compiler/cfg/value-numbering/value-numbering-tests.factor index f98824cb95..733b8cc22a 100644 --- a/basis/compiler/cfg/value-numbering/value-numbering-tests.factor +++ b/basis/compiler/cfg/value-numbering/value-numbering-tests.factor @@ -1215,31 +1215,31 @@ cell 8 = [ } ] [ { - T{ ##shuffle-vector f 1 0 { 0 1 2 3 } float-4-rep } + T{ ##shuffle-vector-imm f 1 0 { 0 1 2 3 } float-4-rep } } value-numbering-step ] unit-test [ { - T{ ##shuffle-vector f 1 0 { 1 2 3 0 } float-4-rep } - T{ ##shuffle-vector f 2 0 { 0 2 3 1 } float-4-rep } + T{ ##shuffle-vector-imm f 1 0 { 1 2 3 0 } float-4-rep } + T{ ##shuffle-vector-imm f 2 0 { 0 2 3 1 } float-4-rep } } ] [ { - T{ ##shuffle-vector f 1 0 { 1 2 3 0 } float-4-rep } - T{ ##shuffle-vector f 2 1 { 3 1 2 0 } float-4-rep } + T{ ##shuffle-vector-imm f 1 0 { 1 2 3 0 } float-4-rep } + T{ ##shuffle-vector-imm f 2 1 { 3 1 2 0 } float-4-rep } } value-numbering-step ] unit-test [ { - T{ ##shuffle-vector f 1 0 { 1 2 3 0 } float-4-rep } - T{ ##shuffle-vector f 2 1 { 1 0 } double-2-rep } + T{ ##shuffle-vector-imm f 1 0 { 1 2 3 0 } float-4-rep } + T{ ##shuffle-vector-imm f 2 1 { 1 0 } double-2-rep } } ] [ { - T{ ##shuffle-vector f 1 0 { 1 2 3 0 } float-4-rep } - T{ ##shuffle-vector f 2 1 { 1 0 } double-2-rep } + T{ ##shuffle-vector-imm f 1 0 { 1 2 3 0 } float-4-rep } + T{ ##shuffle-vector-imm f 2 1 { 1 0 } double-2-rep } } value-numbering-step ] unit-test @@ -1253,7 +1253,7 @@ cell 8 = [ { T{ ##load-constant f 0 $[ 55 tag-fixnum ] } T{ ##scalar>vector f 1 0 int-4-rep } - T{ ##shuffle-vector f 2 1 { 0 0 0 0 } float-4-rep } + T{ ##shuffle-vector-imm f 2 1 { 0 0 0 0 } float-4-rep } } value-numbering-step ] unit-test @@ -1267,7 +1267,7 @@ cell 8 = [ { T{ ##load-constant f 0 1.25 } T{ ##scalar>vector f 1 0 float-4-rep } - T{ ##shuffle-vector f 2 1 { 0 0 0 0 } float-4-rep } + T{ ##shuffle-vector-imm f 2 1 { 0 0 0 0 } float-4-rep } } value-numbering-step ] unit-test diff --git a/basis/compiler/codegen/codegen.factor b/basis/compiler/codegen/codegen.factor index 39dd21d893..938219af22 100755 --- a/basis/compiler/codegen/codegen.factor +++ b/basis/compiler/codegen/codegen.factor @@ -154,6 +154,7 @@ CODEGEN: ##zero-vector %zero-vector CODEGEN: ##fill-vector %fill-vector CODEGEN: ##gather-vector-2 %gather-vector-2 CODEGEN: ##gather-vector-4 %gather-vector-4 +CODEGEN: ##shuffle-vector-imm %shuffle-vector-imm CODEGEN: ##shuffle-vector %shuffle-vector CODEGEN: ##tail>head-vector %tail>head-vector CODEGEN: ##merge-vector-head %merge-vector-head diff --git a/basis/cpu/architecture/architecture.factor b/basis/cpu/architecture/architecture.factor index 8bf84f6670..85a43e99fd 100644 --- a/basis/cpu/architecture/architecture.factor +++ b/basis/cpu/architecture/architecture.factor @@ -242,6 +242,7 @@ HOOK: %fill-vector cpu ( dst rep -- ) HOOK: %gather-vector-2 cpu ( dst src1 src2 rep -- ) HOOK: %gather-vector-4 cpu ( dst src1 src2 src3 src4 rep -- ) HOOK: %shuffle-vector cpu ( dst src shuffle rep -- ) +HOOK: %shuffle-vector-imm cpu ( dst src shuffle rep -- ) HOOK: %tail>head-vector cpu ( dst src rep -- ) HOOK: %merge-vector-head cpu ( dst src1 src2 rep -- ) HOOK: %merge-vector-tail cpu ( dst src1 src2 rep -- ) @@ -289,6 +290,7 @@ HOOK: %fill-vector-reps cpu ( -- reps ) HOOK: %gather-vector-2-reps cpu ( -- reps ) HOOK: %gather-vector-4-reps cpu ( -- reps ) HOOK: %shuffle-vector-reps cpu ( -- reps ) +HOOK: %shuffle-vector-imm-reps cpu ( -- reps ) HOOK: %merge-vector-reps cpu ( -- reps ) HOOK: %signed-pack-vector-reps cpu ( -- reps ) HOOK: %unsigned-pack-vector-reps cpu ( -- reps ) @@ -329,6 +331,7 @@ M: object %fill-vector-reps { } ; M: object %gather-vector-2-reps { } ; M: object %gather-vector-4-reps { } ; M: object %shuffle-vector-reps { } ; +M: object %shuffle-vector-imm-reps { } ; M: object %merge-vector-reps { } ; M: object %signed-pack-vector-reps { } ; M: object %unsigned-pack-vector-reps { } ; diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index 13727bdc61..dbd34c774a 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -698,7 +698,7 @@ M: x86 %gather-vector-2-reps : longlong-2-shuffle ( dst shuffle -- ) first2 [ 2 * dup 1 + ] bi@ 4array int-4-shuffle ; -M:: x86 %shuffle-vector ( dst src shuffle rep -- ) +M:: x86 %shuffle-vector-imm ( dst src shuffle rep -- ) dst src rep %copy dst shuffle rep unsign-rep { { double-2-rep [ double-2-shuffle ] } @@ -707,12 +707,20 @@ M:: x86 %shuffle-vector ( dst src shuffle rep -- ) { longlong-2-rep [ longlong-2-shuffle ] } } case ; -M: x86 %shuffle-vector-reps +M: x86 %shuffle-vector-imm-reps { { sse? { float-4-rep } } { sse2? { double-2-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } } } available-reps ; +M: x86 %shuffle-vector ( dst src shuffle rep -- ) + two-operand PSHUFB ; + +M: x86 %shuffle-vector-reps + { + { ssse3? { float-4-rep double-2-rep longlong-2-rep ulonglong-2-rep int-4-rep uint-4-rep short-8-rep ushort-8-rep char-16-rep uchar-16-rep } } + } available-reps ; + M: x86 %merge-vector-head [ two-operand ] keep unsign-rep { @@ -790,8 +798,6 @@ M: x86 %unpack-vector-head-reps ( -- reps ) { sse4.1? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } } } available-reps ; -M: x86 %unpack-vector-tail-reps ( -- reps ) { } ; - M: x86 %integer>float-vector ( dst src rep -- ) { { int-4-rep [ CVTDQ2PS ] } @@ -1037,10 +1043,6 @@ M: x86 %mul-vector-reps { sse4.1? { int-4-rep uint-4-rep } } } available-reps ; -M: x86 %saturated-mul-vector-reps - ! No multiplication with saturation on x86 - { } ; - M: x86 %div-vector ( dst src1 src2 rep -- ) [ two-operand ] keep { @@ -1223,8 +1225,6 @@ M: x86 %xor-vector-reps { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } } } available-reps ; -M: x86 %not-vector-reps { } ; - M: x86 %shl-vector ( dst src1 src2 rep -- ) [ two-operand ] keep { @@ -1271,6 +1271,29 @@ M:: x86 %scalar>integer ( dst src rep -- ) { uint-scalar-rep [ dst 32-bit-version-of src MOVD ] } + { short-scalar-rep [ + dst 32-bit-version-of src MOVD + dst dst 16-bit-version-of MOVSX + ] } + { ushort-scalar-rep [ + dst 32-bit-version-of src MOVD + dst dst 16-bit-version-of MOVZX + ] } + { char-scalar-rep [ + dst 32-bit-version-of src MOVD + dst { } 8 [| tmp-dst | + tmp-dst dst int-rep %copy + tmp-dst tmp-dst 8-bit-version-of MOVSX + dst tmp-dst int-rep %copy + ] with-small-register + ] } + { uchar-scalar-rep [ + dst { } 8 [| tmp-dst | + tmp-dst dst int-rep %copy + tmp-dst tmp-dst 8-bit-version-of MOVZX + dst tmp-dst int-rep %copy + ] with-small-register + ] } } case ; M: x86 %vector>scalar %copy ; diff --git a/basis/math/vectors/simd/intrinsics/intrinsics.factor b/basis/math/vectors/simd/intrinsics/intrinsics.factor index 5a7974a75f..deb92c2944 100644 --- a/basis/math/vectors/simd/intrinsics/intrinsics.factor +++ b/basis/math/vectors/simd/intrinsics/intrinsics.factor @@ -148,6 +148,9 @@ GENERIC# supported-simd-op? 1 ( rep intrinsic -- ? ) union { uchar-16-rep ushort-8-rep uint-4-rep ulonglong-2-rep } union ; +: (%shuffle-reps) ( -- reps ) + %shuffle-vector-reps %shuffle-vector-imm-reps union ; + M: vector-rep supported-simd-op? { { \ (simd-v+) [ %add-vector-reps ] } @@ -179,7 +182,7 @@ M: vector-rep supported-simd-op? { \ (simd-vrshift) [ %shr-vector-reps ] } { \ (simd-hlshift) [ %horizontal-shl-vector-reps ] } { \ (simd-hrshift) [ %horizontal-shr-vector-reps ] } - { \ (simd-vshuffle) [ %shuffle-vector-reps ] } + { \ (simd-vshuffle) [ (%shuffle-reps) ] } { \ (simd-(vmerge-head)) [ %merge-vector-reps ] } { \ (simd-(vmerge-tail)) [ %merge-vector-reps ] } { \ (simd-(v>float)) [ %integer>float-vector-reps ] } From 1fa6f32790374372dc637f8529da12dae261f03e Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Sat, 10 Oct 2009 10:39:23 -0500 Subject: [PATCH 02/13] fix x86 uchar %scalar>integer --- basis/cpu/x86/x86.factor | 1 + 1 file changed, 1 insertion(+) diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index dbd34c774a..dab7d9d52b 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -1288,6 +1288,7 @@ M:: x86 %scalar>integer ( dst src rep -- ) ] with-small-register ] } { uchar-scalar-rep [ + dst 32-bit-version-of src MOVD dst { } 8 [| tmp-dst | tmp-dst dst int-rep %copy tmp-dst tmp-dst 8-bit-version-of MOVZX From d9002127fae73af40dcb79c3d64a4c3d8301de16 Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Sat, 10 Oct 2009 10:40:09 -0500 Subject: [PATCH 03/13] have vshuffle accept simd-128 variable byte shuffles --- .../compiler/cfg/intrinsics/intrinsics.factor | 3 +- .../compiler/cfg/intrinsics/simd/simd.factor | 48 +++++++++---------- .../tree/propagation/simd/simd.factor | 3 +- .../math/vectors/simd/functor/functor.factor | 8 +++- .../vectors/simd/intrinsics/intrinsics.factor | 8 ++-- basis/math/vectors/simd/simd-tests.factor | 19 +++++++- .../specialization/specialization.factor | 5 +- basis/math/vectors/vectors.factor | 18 ++++++- .../specialized-arrays.factor | 5 +- 9 files changed, 80 insertions(+), 37 deletions(-) diff --git a/basis/compiler/cfg/intrinsics/intrinsics.factor b/basis/compiler/cfg/intrinsics/intrinsics.factor index 9c4447e654..3b6674efee 100644 --- a/basis/compiler/cfg/intrinsics/intrinsics.factor +++ b/basis/compiler/cfg/intrinsics/intrinsics.factor @@ -194,7 +194,8 @@ IN: compiler.cfg.intrinsics { math.vectors.simd.intrinsics:(simd-with) [ [ ^^with-vector ] emit-unary-vector-op ] } { math.vectors.simd.intrinsics:(simd-gather-2) [ emit-gather-vector-2 ] } { math.vectors.simd.intrinsics:(simd-gather-4) [ emit-gather-vector-4 ] } - { math.vectors.simd.intrinsics:(simd-vshuffle) [ emit-shuffle-vector ] } + { math.vectors.simd.intrinsics:(simd-vshuffle-elements) [ emit-shuffle-vector ] } + { math.vectors.simd.intrinsics:(simd-vshuffle-bytes) [ emit-shuffle-vector-var ] } { math.vectors.simd.intrinsics:(simd-(vmerge-head)) [ [ ^^merge-vector-head ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-(vmerge-tail)) [ [ ^^merge-vector-tail ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-(v>float)) [ [ ^^integer>float-vector ] emit-unary-vector-op ] } diff --git a/basis/compiler/cfg/intrinsics/simd/simd.factor b/basis/compiler/cfg/intrinsics/simd/simd.factor index 3f7530caca..e608cf999c 100644 --- a/basis/compiler/cfg/intrinsics/simd/simd.factor +++ b/basis/compiler/cfg/intrinsics/simd/simd.factor @@ -1,8 +1,9 @@ ! Copyright (C) 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: accessors alien byte-arrays fry cpu.architecture kernel math -sequences math.vectors math.vectors.simd.intrinsics macros -generalizations combinators combinators.short-circuit arrays locals +USING: accessors alien byte-arrays fry classes.algebra +cpu.architecture kernel math sequences math.vectors +math.vectors.simd.intrinsics macros generalizations combinators +combinators.short-circuit arrays locals compiler.tree.propagation.info compiler.cfg.builder.blocks compiler.cfg.comparisons compiler.cfg.stacks compiler.cfg.stacks.local compiler.cfg.hats @@ -75,46 +76,43 @@ MACRO: if-literals-match ( quots -- ) ds-push ] emit-vector-op ; -: variable-shuffle? ( obj -- ? ) - ! the vshuffle intrinsic current doesn't allow variable shuffles - drop f ; +: shuffle? ( obj -- ? ) { [ array? ] [ [ integer? ] all? ] } 1&& ; -: immediate-shuffle? ( obj -- ? ) { [ array? ] [ [ integer? ] all? ] } 1&& ; - -: shuffle? ( obj -- ? ) { [ variable-shuffle? ] [ immediate-shuffle? ] } 1|| ; - -: (>variable-shuffle) ( shuffle rep -- shuffle ) +: >variable-shuffle ( shuffle rep -- shuffle' ) rep-component-type heap-size [ dup >byte-array ] [ iota >byte-array ] bi '[ _ n*v _ v+ ] map concat ; -: >variable-shuffle ( shuffle rep -- shuffle' ) - over immediate-shuffle? [ (>variable-shuffle) ] [ drop ] if ; - -: generate-shuffle-vector-imm? ( shuffle rep -- ? ) - { - [ drop immediate-shuffle? ] - [ nip %shuffle-vector-imm-reps member? ] - } 2&& ; - -: generate-shuffle-vector ( src shuffle rep -- dst ) - 2dup generate-shuffle-vector-imm? +: generate-shuffle-vector-imm ( src shuffle rep -- dst ) + dup %shuffle-vector-imm-reps member? [ ^^shuffle-vector-imm ] [ [ >variable-shuffle ^^load-constant ] keep ^^shuffle-vector ] if ; -: emit-shuffle-vector ( node -- ) +: emit-shuffle-vector-imm ( node -- ) ! Pad the permutation with zeroes if it's too short, since we ! can't throw an error at this point. - [ [ rep-components 0 pad-tail ] keep generate-shuffle-vector ] [unary/param] + [ [ rep-components 0 pad-tail ] keep generate-shuffle-vector-imm ] [unary/param] { [ shuffle? ] [ representation? ] } if-literals-match ; +: emit-shuffle-vector-var ( node -- ) + [ ^^shuffle-vector ] [binary] + { [ %shuffle-vector-reps member? ] } if-literals-match ; + +: emit-shuffle-vector ( node -- ) + dup node-input-infos { + [ length 3 = ] + [ first class>> byte-array class<= ] + [ second class>> byte-array class<= ] + [ third literal>> representation? ] + } 1&& [ emit-shuffle-vector-var ] [ emit-shuffle-vector-imm ] if ; + : ^^broadcast-vector ( src n rep -- dst ) [ rep-components swap ] keep - generate-shuffle-vector ; + generate-shuffle-vector-imm ; : emit-broadcast-vector ( node -- ) [ ^^broadcast-vector ] [unary/param] diff --git a/basis/compiler/tree/propagation/simd/simd.factor b/basis/compiler/tree/propagation/simd/simd.factor index 1909a83488..1637148b88 100644 --- a/basis/compiler/tree/propagation/simd/simd.factor +++ b/basis/compiler/tree/propagation/simd/simd.factor @@ -31,7 +31,8 @@ IN: compiler.tree.propagation.simd (simd-vrshift) (simd-hlshift) (simd-hrshift) - (simd-vshuffle) + (simd-vshuffle-bytes) + (simd-vshuffle-elements) (simd-(vmerge-head)) (simd-(vmerge-tail)) (simd-(v>float)) diff --git a/basis/math/vectors/simd/functor/functor.factor b/basis/math/vectors/simd/functor/functor.factor index 7f28f644e1..2ddaf2b8a5 100644 --- a/basis/math/vectors/simd/functor/functor.factor +++ b/basis/math/vectors/simd/functor/functor.factor @@ -60,7 +60,7 @@ MACRO: simd-boa ( rep class -- simd-array ) [ rep-components ] [ new ] [ '[ _ ] ] tri* swap replicate-as ; inline : simd-with/nth-fast? ( rep -- ? ) - [ \ (simd-vshuffle) supported-simd-op? ] + [ \ (simd-vshuffle-elements) supported-simd-op? ] [ rep-component-type can-be-unboxed? ] bi and ; @@ -184,6 +184,8 @@ WHERE TUPLE: A { underlying byte-array read-only initial: $[ 16 ] } ; +INSTANCE: A simd-128 + M: A clone underlying>> clone \ A boa ; inline M: A length drop N ; inline @@ -315,7 +317,7 @@ SLOT: underlying2 class c:typedef ; : (define-simd-256) ( simd -- ) - simd-ops get { vshuffle hlshift hrshift } unique assoc-diff >>ops + simd-ops get { vshuffle-elements vshuffle-bytes hlshift hrshift } unique assoc-diff >>ops [ define-simd ] [ [ class>> ] [ rep>> ] bi define-simd-256-type ] bi ; @@ -362,6 +364,8 @@ TUPLE: A { underlying1 byte-array initial: $[ 16 ] read-only } { underlying2 byte-array initial: $[ 16 ] read-only } ; +INSTANCE: A simd-256 + M: A clone [ underlying1>> clone ] [ underlying2>> clone ] bi \ A boa ; inline diff --git a/basis/math/vectors/simd/intrinsics/intrinsics.factor b/basis/math/vectors/simd/intrinsics/intrinsics.factor index deb92c2944..fab55949b4 100644 --- a/basis/math/vectors/simd/intrinsics/intrinsics.factor +++ b/basis/math/vectors/simd/intrinsics/intrinsics.factor @@ -67,7 +67,8 @@ SIMD-OP: vlshift SIMD-OP: vrshift SIMD-OP: hlshift SIMD-OP: hrshift -SIMD-OP: vshuffle +SIMD-OP: vshuffle-elements +SIMD-OP: vshuffle-bytes SIMD-OP: (vmerge-head) SIMD-OP: (vmerge-tail) SIMD-OP: v<= @@ -148,7 +149,7 @@ GENERIC# supported-simd-op? 1 ( rep intrinsic -- ? ) union { uchar-16-rep ushort-8-rep uint-4-rep ulonglong-2-rep } union ; -: (%shuffle-reps) ( -- reps ) +: (%shuffle-imm-reps) ( -- reps ) %shuffle-vector-reps %shuffle-vector-imm-reps union ; M: vector-rep supported-simd-op? @@ -182,7 +183,8 @@ M: vector-rep supported-simd-op? { \ (simd-vrshift) [ %shr-vector-reps ] } { \ (simd-hlshift) [ %horizontal-shl-vector-reps ] } { \ (simd-hrshift) [ %horizontal-shr-vector-reps ] } - { \ (simd-vshuffle) [ (%shuffle-reps) ] } + { \ (simd-vshuffle-elements) [ (%shuffle-imm-reps) ] } + { \ (simd-vshuffle-bytes) [ %shuffle-vector-reps ] } { \ (simd-(vmerge-head)) [ %merge-vector-reps ] } { \ (simd-(vmerge-tail)) [ %merge-vector-reps ] } { \ (simd-(v>float)) [ %integer>float-vector-reps ] } diff --git a/basis/math/vectors/simd/simd-tests.factor b/basis/math/vectors/simd/simd-tests.factor index 9e999ba9b7..460059809e 100644 --- a/basis/math/vectors/simd/simd-tests.factor +++ b/basis/math/vectors/simd/simd-tests.factor @@ -174,7 +174,7 @@ CONSTANT: simd-classes : remove-special-words ( alist -- alist' ) ! These have their own tests later { - hlshift hrshift vshuffle vbroadcast + hlshift hrshift vshuffle-bytes vshuffle-elements vbroadcast vany? vall? vnone? (v>float) (v>integer) (vpack-signed) (vpack-unsigned) @@ -360,6 +360,23 @@ simd-classes [ ] unit-test ] each +"== Checking variable shuffles" print + +: random-shift-vector ( class -- vec ) + new [ drop 16 random ] map ; + +:: test-shift-vector ( class -- ? ) + class random-int-vector :> src + char-16 random-shift-vector :> perm + { class char-16 } :> decl + + src perm vshuffle + src perm [ decl declare vshuffle ] compile-call + = ; inline + +{ char-16 uchar-16 short-8 ushort-8 int-4 uint-4 longlong-2 ulonglong-2 } +[ 10 swap '[ [ t ] [ _ test-shift-vector ] unit-test ] times ] each + "== Checking vector tests" print :: test-vector-tests-bool ( vector declaration -- none? any? all? ) diff --git a/basis/math/vectors/specialization/specialization.factor b/basis/math/vectors/specialization/specialization.factor index 28c3ee82c3..e51d8c4553 100644 --- a/basis/math/vectors/specialization/specialization.factor +++ b/basis/math/vectors/specialization/specialization.factor @@ -98,7 +98,8 @@ H{ { vrshift { +vector+ +scalar+ -> +vector+ } } { hlshift { +vector+ +literal+ -> +vector+ } } { hrshift { +vector+ +literal+ -> +vector+ } } - { vshuffle { +vector+ +literal+ -> +vector+ } } + { vshuffle-elements { +vector+ +literal+ -> +vector+ } } + { vshuffle-bytes { +vector+ +vector+ -> +vector+ } } { vbroadcast { +vector+ +literal+ -> +vector+ } } { (vmerge-head) { +vector+ +vector+ -> +vector+ } } { (vmerge-tail) { +vector+ +vector+ -> +vector+ } } @@ -162,7 +163,7 @@ ERROR: bad-vector-word word ; } cond ! Don't specialize horizontal shifts, shuffles, and conversions at all, they're only for SIMD { - hlshift hrshift vshuffle vbroadcast + hlshift hrshift vshuffle-elements vshuffle-bytes vbroadcast (v>integer) (v>float) (vpack-signed) (vpack-unsigned) (vunpack-head) (vunpack-tail) diff --git a/basis/math/vectors/vectors.factor b/basis/math/vectors/vectors.factor index c65009950d..4cb03af44c 100644 --- a/basis/math/vectors/vectors.factor +++ b/basis/math/vectors/vectors.factor @@ -6,6 +6,9 @@ locals ; QUALIFIED-WITH: alien.c-types c IN: math.vectors +MIXIN: simd-128 +MIXIN: simd-256 + GENERIC: element-type ( obj -- c-type ) M: object element-type drop f ; inline @@ -83,7 +86,20 @@ PRIVATE> : vbitnot ( u -- w ) dup '[ _ [ bitnot ] fp-bitwise-unary ] map ; :: vbroadcast ( u n -- v ) u length n u nth u like ; -: vshuffle ( u perm -- v ) swap [ '[ _ nth ] ] keep map-as ; + +: vshuffle-elements ( u perm -- v ) + swap [ '[ _ nth ] ] keep map-as ; + +: vshuffle-bytes ( u perm -- v ) + underlying>> [ + swap [ '[ _ nth ] ] keep map-as + ] curry change-underlying ; + +GENERIC: vshuffle ( u perm -- v ) +M: array vshuffle ( u perm -- v ) + vshuffle-elements ; inline +M: simd-128 vshuffle ( u perm -- v ) + vshuffle-bytes ; inline : vlshift ( u n -- w ) '[ _ shift ] map ; : vrshift ( u n -- w ) neg '[ _ shift ] map ; diff --git a/basis/specialized-arrays/specialized-arrays.factor b/basis/specialized-arrays/specialized-arrays.factor index a3d24c10c2..c5de95b5b5 100755 --- a/basis/specialized-arrays/specialized-arrays.factor +++ b/basis/specialized-arrays/specialized-arrays.factor @@ -2,7 +2,8 @@ ! See http://factorcode.org/license.txt for BSD license. USING: accessors alien alien.c-types alien.data alien.parser assocs byte-arrays classes compiler.units functors kernel lexer -libc math math.vectors math.vectors.specialization namespaces +libc math math.vectors math.vectors.private +math.vectors.specialization namespaces parser prettyprint.custom sequences sequences.private strings summary vocabs vocabs.loader vocabs.parser vocabs.generated words fry combinators present ; @@ -68,6 +69,8 @@ TUPLE: A [ drop \ T bad-byte-array-length ] unless ; inline +M: A new-underlying drop byte-array>A ; + M: A clone [ underlying>> clone ] [ length>> ] bi ; inline M: A length length>> ; inline From a5898dffdebab1240e7e32054dcfb49171fd1b1d Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Sat, 10 Oct 2009 12:00:47 -0500 Subject: [PATCH 04/13] don't use MOVSLDUP/MOVSHDUP to do specialized shuffles unless sse3 is available --- basis/cpu/x86/x86.factor | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index dab7d9d52b..c1acf92246 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -673,11 +673,9 @@ M: x86 %gather-vector-2-reps [ dupd SHUFPD ] } case ; -: float-4-shuffle ( dst shuffle -- ) +: sse1-float-4-shuffle ( dst shuffle -- ) { { { 0 1 2 3 } [ drop ] } - { { 0 0 2 2 } [ dup MOVSLDUP ] } - { { 1 1 3 3 } [ dup MOVSHDUP ] } { { 0 1 0 1 } [ dup MOVLHPS ] } { { 2 3 2 3 } [ dup MOVHLPS ] } { { 0 0 1 1 } [ dup UNPCKLPS ] } @@ -685,6 +683,15 @@ M: x86 %gather-vector-2-reps [ dupd SHUFPS ] } case ; +: float-4-shuffle ( dst shuffle -- ) + sse3? [ + { + { { 0 0 2 2 } [ dup MOVSLDUP ] } + { { 1 1 3 3 } [ dup MOVSHDUP ] } + [ sse1-float-4-shuffle ] + } case + ] [ sse1-float-4-shuffle ] if ; + : int-4-shuffle ( dst shuffle -- ) { { { 0 1 2 3 } [ drop ] } From 140780439346efcb184352d018eb52bbf14652cc Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Sat, 10 Oct 2009 12:23:25 -0500 Subject: [PATCH 05/13] change the simd-struct tests to cover integer vector slots --- basis/math/vectors/simd/simd-tests.factor | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/basis/math/vectors/simd/simd-tests.factor b/basis/math/vectors/simd/simd-tests.factor index 460059809e..8766056a96 100644 --- a/basis/math/vectors/simd/simd-tests.factor +++ b/basis/math/vectors/simd/simd-tests.factor @@ -529,38 +529,38 @@ SYMBOL: !!inconsistent!! STRUCT: simd-struct { x float-4 } -{ y double-2 } +{ y longlong-2 } { z double-4 } -{ w float-8 } ; +{ w int-8 } ; [ t ] [ [ simd-struct ] compile-call >c-ptr [ 0 = ] all? ] unit-test [ float-4{ 1 2 3 4 } - double-2{ 2 1 } + longlong-2{ 2 1 } double-4{ 4 3 2 1 } - float-8{ 1 2 3 4 5 6 7 8 } + int-8{ 1 2 3 4 5 6 7 8 } ] [ simd-struct float-4{ 1 2 3 4 } >>x - double-2{ 2 1 } >>y + longlong-2{ 2 1 } >>y double-4{ 4 3 2 1 } >>z - float-8{ 1 2 3 4 5 6 7 8 } >>w + int-8{ 1 2 3 4 5 6 7 8 } >>w { [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave ] unit-test [ float-4{ 1 2 3 4 } - double-2{ 2 1 } + longlong-2{ 2 1 } double-4{ 4 3 2 1 } - float-8{ 1 2 3 4 5 6 7 8 } + int-8{ 1 2 3 4 5 6 7 8 } ] [ [ simd-struct float-4{ 1 2 3 4 } >>x - double-2{ 2 1 } >>y + longlong-2{ 2 1 } >>y double-4{ 4 3 2 1 } >>z - float-8{ 1 2 3 4 5 6 7 8 } >>w + int-8{ 1 2 3 4 5 6 7 8 } >>w { [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave ] compile-call ] unit-test From 97ab9dc4ab3bb4e6ff9a7326dca12c3d2f66dd64 Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Sat, 10 Oct 2009 12:53:10 -0500 Subject: [PATCH 06/13] only emit ##alien-vector/##set-alien-vector insns if the rep is available --- basis/compiler/cfg/intrinsics/simd/simd.factor | 7 +++++-- basis/cpu/architecture/architecture.factor | 2 ++ basis/cpu/x86/x86.factor | 6 ++++++ 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/basis/compiler/cfg/intrinsics/simd/simd.factor b/basis/compiler/cfg/intrinsics/simd/simd.factor index e608cf999c..bd851199ca 100644 --- a/basis/compiler/cfg/intrinsics/simd/simd.factor +++ b/basis/compiler/cfg/intrinsics/simd/simd.factor @@ -128,6 +128,9 @@ MACRO: if-literals-match ( quots -- ) [ ^^select-vector ] [unary/param] { [ integer? ] [ representation? ] } if-literals-match ; inline +: emit-alien-vector-op ( node quot: ( rep -- ) -- ) + { [ %alien-vector-reps member? ] } if-literals-match ; inline + : emit-alien-vector ( node -- ) dup [ '[ @@ -135,7 +138,7 @@ MACRO: if-literals-match ( quots -- ) _ ^^alien-vector ds-push ] [ inline-alien-getter? ] inline-alien - ] with emit-vector-op ; + ] with emit-alien-vector-op ; : emit-set-alien-vector ( node -- ) dup [ @@ -145,7 +148,7 @@ MACRO: if-literals-match ( quots -- ) ] [ byte-array inline-alien-setter? ] inline-alien - ] with emit-vector-op ; + ] with emit-alien-vector-op ; : generate-not-vector ( src rep -- dst ) dup %not-vector-reps member? diff --git a/basis/cpu/architecture/architecture.factor b/basis/cpu/architecture/architecture.factor index 85a43e99fd..19b38fd8f8 100644 --- a/basis/cpu/architecture/architecture.factor +++ b/basis/cpu/architecture/architecture.factor @@ -289,6 +289,7 @@ HOOK: %zero-vector-reps cpu ( -- reps ) HOOK: %fill-vector-reps cpu ( -- reps ) HOOK: %gather-vector-2-reps cpu ( -- reps ) HOOK: %gather-vector-4-reps cpu ( -- reps ) +HOOK: %alien-vector-reps cpu ( -- reps ) HOOK: %shuffle-vector-reps cpu ( -- reps ) HOOK: %shuffle-vector-imm-reps cpu ( -- reps ) HOOK: %merge-vector-reps cpu ( -- reps ) @@ -330,6 +331,7 @@ M: object %zero-vector-reps { } ; M: object %fill-vector-reps { } ; M: object %gather-vector-2-reps { } ; M: object %gather-vector-4-reps { } ; +M: object %alien-vector-reps { } ; M: object %shuffle-vector-reps { } ; M: object %shuffle-vector-imm-reps { } ; M: object %merge-vector-reps { } ; diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index c1acf92246..a163ba6482 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -562,6 +562,12 @@ MACRO: available-reps ( alist -- ) reverse [ { } ] suffix '[ _ cond ] ; +M: x86 %alien-vector-reps + { + { sse? { float-4-rep } } + { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } } + } available-reps ; + M: x86 %zero-vector { { double-2-rep [ dup XORPD ] } From 588899a1b3814194febf4e4701db2aadc67a419c Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Sat, 10 Oct 2009 13:01:13 -0500 Subject: [PATCH 07/13] fix fallbacks for vall?, vany?, vnone? --- basis/math/vectors/vectors.factor | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/basis/math/vectors/vectors.factor b/basis/math/vectors/vectors.factor index 4cb03af44c..0a984ba2e7 100644 --- a/basis/math/vectors/vectors.factor +++ b/basis/math/vectors/vectors.factor @@ -123,9 +123,9 @@ M: simd-128 vshuffle ( u perm -- v ) : vxor ( u v -- w ) over '[ [ _ element>bool ] bi@ xor ] 2map ; : vnot ( u -- w ) dup '[ _ element>bool not ] map ; -: vall? ( v -- ? ) [ ] all? ; -: vany? ( v -- ? ) [ ] any? ; -: vnone? ( v -- ? ) [ not ] all? ; +: vall? ( v -- ? ) dup '[ _ element>bool ] all? ; +: vany? ( v -- ? ) dup '[ _ element>bool ] any? ; +: vnone? ( v -- ? ) dup '[ _ element>bool not ] all? ; : v< ( u v -- w ) [ < ] 2map ; : v<= ( u v -- w ) [ <= ] 2map ; From 4e4be608840d50f152d4f1643c5f06f2b7899a12 Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Sat, 10 Oct 2009 13:13:53 -0500 Subject: [PATCH 08/13] use TEST reg, reg to compare integer equality with zero --- basis/cpu/x86/x86.factor | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index a163ba6482..8612acdcff 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -432,8 +432,13 @@ M: x86 %epilogue ( n -- ) cell - incr-stack-reg ; temp 0 MOV \ t rc-absolute-cell rel-immediate dst temp word execute ; inline +: (%compare) ( src1 src2 cc -- ) + 2over [ { cc= cc/= } member? ] [ register? ] [ 0 = ] tri* and and + [ drop dup TEST ] + [ CMP ] if ; + M:: x86 %compare ( dst src1 src2 cc temp -- ) - src1 src2 CMP + src1 src2 cc (%compare) cc order-cc { { cc< [ dst temp \ CMOVL %boolean ] } { cc<= [ dst temp \ CMOVLE %boolean ] } @@ -447,7 +452,7 @@ M: x86 %compare-imm ( dst src1 src2 cc temp -- ) %compare ; M:: x86 %compare-branch ( label src1 src2 cc -- ) - src1 src2 CMP + src1 src2 cc (%compare) cc order-cc { { cc< [ label JL ] } { cc<= [ label JLE ] } From 584cd44856bb42e5dd81ddd0041eaa16a43e2ca9 Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Sat, 10 Oct 2009 14:02:35 -0500 Subject: [PATCH 09/13] fix the tuple subclassing example docs --- core/classes/tuple/tuple-docs.factor | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/core/classes/tuple/tuple-docs.factor b/core/classes/tuple/tuple-docs.factor index daa275e2a7..cc67075168 100644 --- a/core/classes/tuple/tuple-docs.factor +++ b/core/classes/tuple/tuple-docs.factor @@ -129,17 +129,18 @@ ARTICLE: "tuple-inheritance-example" "Tuple subclassing example" } "Rectangles and parallelograms use the same algorithm for computing the area, whereas they use different algorithms for computing perimiter. Also, rectangles and parallelograms both have " { $snippet "width" } " and " { $snippet "height" } " slots. We can exploit this with subclassing:" { $code + "USING: math.constants math.functions ;" "GENERIC: area ( shape -- n )" "GENERIC: perimiter ( shape -- n )" "" "TUPLE: shape ;" "" "TUPLE: circle < shape radius ;" - "M: area circle radius>> sq pi * ;" - "M: perimiter circle radius>> 2 * pi * ;" + "M: circle area radius>> sq pi * ;" + "M: circle perimiter radius>> 2 * pi * ;" "" - "TUPLE: quad < shape width height" - "M: area quad [ width>> ] [ height>> ] bi * ;" + "TUPLE: quad < shape width height ;" + "M: quad area [ width>> ] [ height>> ] bi * ;" "" "TUPLE: rectangle < quad ;" "M: rectangle perimiter [ width>> 2 * ] [ height>> 2 * ] bi + ;" From 50d4eb27bf2e4132952f8c1e0d4a50c59d51c156 Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Sat, 10 Oct 2009 14:04:22 -0500 Subject: [PATCH 10/13] add missing usings --- core/classes/tuple/tuple-docs.factor | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/classes/tuple/tuple-docs.factor b/core/classes/tuple/tuple-docs.factor index cc67075168..45d3931448 100644 --- a/core/classes/tuple/tuple-docs.factor +++ b/core/classes/tuple/tuple-docs.factor @@ -129,7 +129,7 @@ ARTICLE: "tuple-inheritance-example" "Tuple subclassing example" } "Rectangles and parallelograms use the same algorithm for computing the area, whereas they use different algorithms for computing perimiter. Also, rectangles and parallelograms both have " { $snippet "width" } " and " { $snippet "height" } " slots. We can exploit this with subclassing:" { $code - "USING: math.constants math.functions ;" + "USING: accessors kernel math math.constants math.functions ;" "GENERIC: area ( shape -- n )" "GENERIC: perimiter ( shape -- n )" "" From 2a24e30a242be0bd30a6540d3f3cde4676bdfb21 Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Sat, 10 Oct 2009 17:45:15 -0500 Subject: [PATCH 11/13] store math.matrices.simd matrices in column-major order so that m4.v, frustum, and translation construction are faster --- extra/math/matrices/simd/simd-tests.factor | 68 +++++----- extra/math/matrices/simd/simd.factor | 142 +++++++++++---------- 2 files changed, 106 insertions(+), 104 deletions(-) diff --git a/extra/math/matrices/simd/simd-tests.factor b/extra/math/matrices/simd/simd-tests.factor index 3798c3e98e..965c2bddb5 100644 --- a/extra/math/matrices/simd/simd-tests.factor +++ b/extra/math/matrices/simd/simd-tests.factor @@ -52,10 +52,10 @@ IN: math.matrices.simd.tests [ S{ matrix4 f float-4-array{ - float-4{ 1.0 0.0 0.0 3.0 } - float-4{ 0.0 1.0 0.0 4.0 } - float-4{ 0.0 0.0 1.0 2.0 } - float-4{ 0.0 0.0 0.0 1.0 } + float-4{ 1.0 0.0 0.0 0.0 } + float-4{ 0.0 1.0 0.0 0.0 } + float-4{ 0.0 0.0 1.0 0.0 } + float-4{ 3.0 4.0 2.0 1.0 } } } ] [ float-4{ 3.0 4.0 2.0 0.0 } translation-matrix4 ] unit-test @@ -77,9 +77,9 @@ IN: math.matrices.simd.tests float-4{ 0.0 1.0 0.0 1.0 } pi 1/2. * rotation-matrix4 S{ matrix4 f float-4-array{ - float-4{ 0.0 0.0 1.0 0.0 } + float-4{ 0.0 0.0 -1.0 0.0 } float-4{ 0.0 1.0 0.0 0.0 } - float-4{ -1.0 0.0 0.0 0.0 } + float-4{ 1.0 0.0 0.0 0.0 } float-4{ 0.0 0.0 0.0 1.0 } } } @@ -89,10 +89,10 @@ IN: math.matrices.simd.tests [ S{ matrix4 f float-4-array{ - float-4{ 2.0 0.0 0.0 10.0 } - float-4{ 0.0 3.0 0.0 18.0 } - float-4{ 0.0 0.0 4.0 28.0 } - float-4{ 0.0 0.0 0.0 1.0 } + float-4{ 2.0 0.0 0.0 0.0 } + float-4{ 0.0 3.0 0.0 0.0 } + float-4{ 0.0 0.0 4.0 0.0 } + float-4{ 10.0 18.0 28.0 1.0 } } } ] [ @@ -106,10 +106,10 @@ IN: math.matrices.simd.tests } S{ matrix4 f float-4-array{ - float-4{ 1.0 0.0 0.0 5.0 } - float-4{ 0.0 1.0 0.0 6.0 } - float-4{ 0.0 0.0 1.0 7.0 } - float-4{ 0.0 0.0 0.0 1.0 } + float-4{ 1.0 0.0 0.0 0.0 } + float-4{ 0.0 1.0 0.0 0.0 } + float-4{ 0.0 0.0 1.0 0.0 } + float-4{ 5.0 6.0 7.0 1.0 } } } m4. @@ -118,10 +118,10 @@ IN: math.matrices.simd.tests [ S{ matrix4 f float-4-array{ - float-4{ 3.0 0.0 0.0 5.0 } - float-4{ 0.0 4.0 0.0 6.0 } - float-4{ 0.0 0.0 5.0 7.0 } - float-4{ 0.0 0.0 0.0 2.0 } + float-4{ 3.0 0.0 0.0 0.0 } + float-4{ 0.0 4.0 0.0 0.0 } + float-4{ 0.0 0.0 5.0 0.0 } + float-4{ 5.0 6.0 7.0 2.0 } } } ] [ @@ -135,10 +135,10 @@ IN: math.matrices.simd.tests } S{ matrix4 f float-4-array{ - float-4{ 1.0 0.0 0.0 5.0 } - float-4{ 0.0 1.0 0.0 6.0 } - float-4{ 0.0 0.0 1.0 7.0 } - float-4{ 0.0 0.0 0.0 1.0 } + float-4{ 1.0 0.0 0.0 0.0 } + float-4{ 0.0 1.0 0.0 0.0 } + float-4{ 0.0 0.0 1.0 0.0 } + float-4{ 5.0 6.0 7.0 1.0 } } } m4+ @@ -147,10 +147,10 @@ IN: math.matrices.simd.tests [ S{ matrix4 f float-4-array{ - float-4{ 1.0 0.0 0.0 -5.0 } - float-4{ 0.0 2.0 0.0 -6.0 } - float-4{ 0.0 0.0 3.0 -7.0 } - float-4{ 0.0 0.0 0.0 0.0 } + float-4{ 1.0 0.0 0.0 0.0 } + float-4{ 0.0 2.0 0.0 0.0 } + float-4{ 0.0 0.0 3.0 0.0 } + float-4{ -5.0 -6.0 -7.0 0.0 } } } ] [ @@ -164,10 +164,10 @@ IN: math.matrices.simd.tests } S{ matrix4 f float-4-array{ - float-4{ 1.0 0.0 0.0 5.0 } - float-4{ 0.0 1.0 0.0 6.0 } - float-4{ 0.0 0.0 1.0 7.0 } - float-4{ 0.0 0.0 0.0 1.0 } + float-4{ 1.0 0.0 0.0 0.0 } + float-4{ 0.0 1.0 0.0 0.0 } + float-4{ 0.0 0.0 1.0 0.0 } + float-4{ 5.0 6.0 7.0 1.0 } } } m4- @@ -219,10 +219,10 @@ IN: math.matrices.simd.tests [ S{ matrix4 f float-4-array{ - float-4{ 1/2. 0.0 0.0 0.0 } - float-4{ 0.0 1/2. 0.0 0.0 } - float-4{ 0.0 0.0 -6/4. -10/4. } - float-4{ 0.0 0.0 -1.0 0.0 } + float-4{ 1/2. 0.0 0.0 0.0 } + float-4{ 0.0 1/2. 0.0 0.0 } + float-4{ 0.0 0.0 -6/4. -1.0 } + float-4{ 0.0 0.0 -10/4. 0.0 } } } ] [ diff --git a/extra/math/matrices/simd/simd.factor b/extra/math/matrices/simd/simd.factor index da1149dfec..edbe77781f 100644 --- a/extra/math/matrices/simd/simd.factor +++ b/extra/math/matrices/simd/simd.factor @@ -9,34 +9,34 @@ SPECIALIZED-ARRAY: float-4 IN: math.matrices.simd STRUCT: matrix4 - { rows float-4[4] } ; + { columns float-4[4] } ; INSTANCE: matrix4 immutable-sequence M: matrix4 length drop 4 ; inline -M: matrix4 nth-unsafe rows>> nth-unsafe ; inline +M: matrix4 nth-unsafe columns>> nth-unsafe ; inline M: matrix4 new-sequence 2drop matrix4 (struct) ; inline > 4 firstn ; inline +: columns ( a -- a1 a2 a3 a4 ) + columns>> 4 firstn ; inline -:: set-rows ( c1 c2 c3 c4 c -- c ) - c rows>> :> rows - c1 rows set-first - c2 rows set-second - c3 rows set-third - c4 rows set-fourth +:: set-columns ( c1 c2 c3 c4 c -- c ) + c columns>> :> columns + c1 columns set-first + c2 columns set-second + c3 columns set-third + c4 columns set-fourth c ; inline : make-matrix4 ( quot: ( -- c1 c2 c3 c4 ) -- c ) - matrix4 (struct) swap dip set-rows ; inline + matrix4 (struct) swap dip set-columns ; inline -:: 2map-rows ( a b quot -- c ) +:: 2map-columns ( a b quot -- c ) [ - a rows :> a4 :> a3 :> a2 :> a1 - b rows :> b4 :> b3 :> b2 :> b1 + a columns :> a4 :> a3 :> a2 :> a1 + b columns :> b4 :> b3 :> b2 :> b1 a1 b1 quot call a2 b2 quot call @@ -44,57 +44,57 @@ M: matrix4 new-sequence 2drop matrix4 (struct) ; inline a4 b4 quot call ] make-matrix4 ; inline -: map-rows ( a quot -- c ) - '[ rows _ 4 napply ] make-matrix4 ; inline +: map-columns ( a quot -- c ) + '[ columns _ 4 napply ] make-matrix4 ; inline PRIVATE> -TYPED: m4+ ( a: matrix4 b: matrix4 -- c: matrix4 ) [ v+ ] 2map-rows ; -TYPED: m4- ( a: matrix4 b: matrix4 -- c: matrix4 ) [ v- ] 2map-rows ; -TYPED: m4* ( a: matrix4 b: matrix4 -- c: matrix4 ) [ v* ] 2map-rows ; -TYPED: m4/ ( a: matrix4 b: matrix4 -- c: matrix4 ) [ v/ ] 2map-rows ; +TYPED: m4+ ( a: matrix4 b: matrix4 -- c: matrix4 ) [ v+ ] 2map-columns ; +TYPED: m4- ( a: matrix4 b: matrix4 -- c: matrix4 ) [ v- ] 2map-columns ; +TYPED: m4* ( a: matrix4 b: matrix4 -- c: matrix4 ) [ v* ] 2map-columns ; +TYPED: m4/ ( a: matrix4 b: matrix4 -- c: matrix4 ) [ v/ ] 2map-columns ; -TYPED: m4*n ( a: matrix4 b: float -- c: matrix4 ) [ v*n ] curry map-rows ; -TYPED: m4/n ( a: matrix4 b: float -- c: matrix4 ) [ v/n ] curry map-rows ; -TYPED: n*m4 ( a: float b: matrix4 -- c: matrix4 ) [ n*v ] with map-rows ; -TYPED: n/m4 ( a: float b: matrix4 -- c: matrix4 ) [ n/v ] with map-rows ; +TYPED: m4*n ( a: matrix4 b: float -- c: matrix4 ) [ v*n ] curry map-columns ; +TYPED: m4/n ( a: matrix4 b: float -- c: matrix4 ) [ v/n ] curry map-columns ; +TYPED: n*m4 ( a: float b: matrix4 -- c: matrix4 ) [ n*v ] with map-columns ; +TYPED: n/m4 ( a: float b: matrix4 -- c: matrix4 ) [ n/v ] with map-columns ; TYPED:: m4. ( a: matrix4 b: matrix4 -- c: matrix4 ) [ - a rows :> a4 :> a3 :> a2 :> a1 - b rows :> b4 :> b3 :> b2 :> b1 + a columns :> a4 :> a3 :> a2 :> a1 + b columns :> b4 :> b3 :> b2 :> b1 - a1 first b1 n*v :> c1a - a2 first b1 n*v :> c2a - a3 first b1 n*v :> c3a - a4 first b1 n*v :> c4a + b1 first a1 n*v :> c1a + b2 first a1 n*v :> c2a + b3 first a1 n*v :> c3a + b4 first a1 n*v :> c4a - a1 second b2 n*v c1a v+ :> c1b - a2 second b2 n*v c2a v+ :> c2b - a3 second b2 n*v c3a v+ :> c3b - a4 second b2 n*v c4a v+ :> c4b + b1 second a2 n*v c1a v+ :> c1b + b2 second a2 n*v c2a v+ :> c2b + b3 second a2 n*v c3a v+ :> c3b + b4 second a2 n*v c4a v+ :> c4b - a1 third b3 n*v c1b v+ :> c1c - a2 third b3 n*v c2b v+ :> c2c - a3 third b3 n*v c3b v+ :> c3c - a4 third b3 n*v c4b v+ :> c4c + b1 third a3 n*v c1b v+ :> c1c + b2 third a3 n*v c2b v+ :> c2c + b3 third a3 n*v c3b v+ :> c3c + b4 third a3 n*v c4b v+ :> c4c - a1 fourth b4 n*v c1c v+ - a2 fourth b4 n*v c2c v+ - a3 fourth b4 n*v c3c v+ - a4 fourth b4 n*v c4c v+ + b1 fourth a4 n*v c1c v+ + b2 fourth a4 n*v c2c v+ + b3 fourth a4 n*v c3c v+ + b4 fourth a4 n*v c4c v+ ] make-matrix4 ; -TYPED:: v.m4 ( a: float-4 b: matrix4 -- c: float-4 ) - b rows :> b4 :> b3 :> b2 :> b1 +TYPED:: m4.v ( m: matrix4 v: float-4 -- v': float-4 ) + m columns :> m4 :> m3 :> m2 :> m1 - a first b1 n*v - a second b2 n*v v+ - a third b3 n*v v+ - a fourth b4 n*v v+ ; + v first m1 n*v + v second m2 n*v v+ + v third m3 n*v v+ + v fourth m4 n*v v+ ; -TYPED:: m4.v ( a: matrix4 b: float-4 -- c: float-4 ) - a rows [ b v. ] 4 napply float-4-boa ; +TYPED:: v.m4 ( v: float-4 m: matrix4 -- c: float-4 ) + m columns [ v v. ] 4 napply float-4-boa ; CONSTANT: identity-matrix4 S{ matrix4 f @@ -131,37 +131,37 @@ TYPED: diagonal-matrix4 ( diagonal: float-4 -- matrix: matrix4 ) [ (vmerge) ] bi-curry@ bi* ; inline TYPED: transpose-matrix4 ( matrix: matrix4 -- matrix: matrix4 ) - [ rows vmerge-transpose vmerge-transpose ] make-matrix4 ; + [ columns vmerge-transpose vmerge-transpose ] make-matrix4 ; + +: linear>homogeneous ( v -- v' ) + [ float-4{ t t t f } ] dip float-4{ 0.0 0.0 0.0 1.0 } v? ; inline : scale-matrix4 ( factors -- matrix ) - [ float-4{ t t t f } ] dip float-4{ 0.0 0.0 0.0 1.0 } v? - diagonal-matrix4 ; inline + linear>homogeneous diagonal-matrix4 ; inline : ortho-matrix4 ( factors -- matrix ) float-4{ 1.0 1.0 1.0 1.0 } swap v/ scale-matrix4 ; inline -TYPED:: translation-matrix4 ( offset: float-4 -- matrix: matrix4 ) +TYPED: translation-matrix4 ( offset: float-4 -- matrix: matrix4 ) [ - float-4{ 1.0 1.0 1.0 1.0 } :> diagonal - - offset 0 float-4-with (vmerge) - [ 0 float-4-with swap (vmerge) ] bi@ drop :> z :> y :> x - - diagonal y vmerge-diagonal* - [ x vmerge-diagonal* ] - [ z vmerge-diagonal* ] bi* + linear>homogeneous + [ + float-4{ 1.0 0.0 0.0 0.0 } + float-4{ 0.0 1.0 0.0 0.0 } + float-4{ 0.0 0.0 1.0 0.0 } + ] dip ] make-matrix4 ; TYPED:: rotation-matrix4 ( axis: float-4 theta: float -- matrix: matrix4 ) - ! x*x + c*(1.0 - x*x) x*y*(1.0 - c) - s*z x*z*(1.0 - c) + s*y 0 - ! x*y*(1.0 - c) + s*z y*y + c*(1.0 - y*y) y*z*(1.0 - c) - s*x 0 - ! x*z*(1.0 - c) - s*y y*z*(1.0 - c) + s*x z*z + c*(1.0 - z*z) 0 + ! x*x + c*(1.0 - x*x) x*y*(1.0 - c) + s*z x*z*(1.0 - c) - s*y 0 + ! x*y*(1.0 - c) - s*z y*y + c*(1.0 - y*y) y*z*(1.0 - c) + s*x 0 + ! x*z*(1.0 - c) + s*y y*z*(1.0 - c) - s*x z*z + c*(1.0 - z*z) 0 ! 0 0 0 1 matrix4 (struct) :> triangle-m theta cos :> c theta sin :> s - float-4{ 1.0 -1.0 1.0 0.0 } :> triangle-sign + float-4{ -1.0 1.0 -1.0 0.0 } :> triangle-sign c float-4-with :> cc s float-4-with :> ss @@ -184,7 +184,7 @@ TYPED:: rotation-matrix4 ( axis: float-4 theta: float -- matrix: matrix4 ) triangle-lo { 1 0 3 3 } vshuffle float-4 new - triangle-m set-rows drop + triangle-m set-columns drop diagonal-m triangle-m m4+ ; @@ -194,8 +194,10 @@ TYPED:: frustum-matrix4 ( xy: float-4 near: float far: float -- matrix: matrix4 float-4{ t t f f } xy near far - float-4-with v? ! denom v/ :> fov - fov 0.0 float-4-with (vmerge-head) vmerge-diagonal - fov float-4{ f f t t } vand - float-4{ 0.0 0.0 -1.0 0.0 } + float-4{ 0.0 -1.0 0.0 0.0 } :> negone + + fov vmerge-diagonal + [ vmerge-diagonal ] + [ negone (vmerge) ] bi* ] make-matrix4 ; From 9548b7bdd8cacf4ae3a32b209d786d19e9234dcf Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Mon, 12 Oct 2009 16:55:52 -0500 Subject: [PATCH 12/13] fix a bug when printing calendars, add a unit test --- basis/calendar/format/format-tests.factor | 4 +++- basis/calendar/format/format.factor | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/basis/calendar/format/format-tests.factor b/basis/calendar/format/format-tests.factor index f8864351a4..cb1ff0b60f 100644 --- a/basis/calendar/format/format-tests.factor +++ b/basis/calendar/format/format-tests.factor @@ -1,5 +1,5 @@ USING: calendar.format calendar kernel math tools.test -io.streams.string accessors io math.order ; +io.streams.string accessors io math.order sequences ; IN: calendar.format.tests [ 0 ] [ @@ -81,3 +81,5 @@ IN: calendar.format.tests ] [ "Thursday, 02-Oct-2008 23:59:59 GMT" cookie-string>timestamp ] unit-test +[ ] +[ { 2008 2009 } [ year. ] each ] unit-test diff --git a/basis/calendar/format/format.factor b/basis/calendar/format/format.factor index 6aa4126ff9..d07d74722a 100644 --- a/basis/calendar/format/format.factor +++ b/basis/calendar/format/format.factor @@ -66,7 +66,7 @@ M: array month. ( pair -- ) [ month-name write bl number>string print ] [ 1 zeller-congruence ] [ (days-in-month) day-abbreviations2 " " join print ] 2tri - over " " concat write + over " " "" concat-as write [ [ 1 + day. ] keep 1 + + 7 mod zero? [ nl ] [ bl ] if From 48f479950540b2b73ff3a221a1c4f46a801e2699 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Tue, 13 Oct 2009 05:13:22 -0500 Subject: [PATCH 13/13] math.vectors.simd: fix typos in docs reported by Ed Swartz --- basis/math/vectors/simd/simd-docs.factor | 27 ++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/basis/math/vectors/simd/simd-docs.factor b/basis/math/vectors/simd/simd-docs.factor index 97ff99cd28..2fbe823965 100644 --- a/basis/math/vectors/simd/simd-docs.factor +++ b/basis/math/vectors/simd/simd-docs.factor @@ -41,7 +41,21 @@ $nl POSTPONE: SIMD: POSTPONE: SIMDS: } -"The following vector types are supported:" +"The following scalar types are supported:" +{ $code + "char" + "uchar" + "short" + "ushort" + "int" + "uint" + "longlong" + "ulonglong" + "float" + "double" +} + +"The following vector types are generated from the above scalar types:" { $code "char-16" "uchar-16" @@ -89,6 +103,7 @@ $nl { $code """USING: compiler.tree.debugger math.vectors math.vectors.simd ; +SIMD: double SYMBOLS: x y ; [ @@ -107,7 +122,7 @@ IN: simd-demo { float-4 float-4 float-4 } declare [ v* ] [ [ 1.0 ] dip n-v v* ] bi-curry* bi v+ ; -\ interpolate optimizer-report.""" } +\\ interpolate optimizer-report.""" } "Note that using " { $link declare } " is not recommended. Safer ways of getting type information for the input parameters to a word include defining methods on a generic word (the value being dispatched upon has a statically known type in the method body), as well as using " { $link "hints" } " and " { $link POSTPONE: inline } " declarations." $nl "Here is a better version of the " { $snippet "interpolate" } " words above that uses hints:" @@ -122,7 +137,7 @@ IN: simd-demo HINTS: interpolate float-4 float-4 float-4 ; -\ interpolate optimizer-report. """ } +\\ interpolate optimizer-report. """ } "This time, the optimizer report lists calls to both SIMD primitives and high-level vector words, because hints cause two code paths to be generated. The " { $snippet "optimized." } " word can be used to make sure that the fast code path consists entirely of calls to primitives." $nl "If the " { $snippet "interpolate" } " word was to be used in several places with different types of vectors, it would be best to declare it " { $link POSTPONE: inline } "." @@ -153,13 +168,13 @@ M: actor advance ( dt actor -- ) [ >float ] dip [ update-velocity ] [ update-position ] 2bi ; -M\ actor advance optimized.""" +M\\ actor advance optimized.""" } "The " { $vocab-link "compiler.cfg.debugger" } " vocabulary can give a lower-level picture of the generated code, that includes register assignments and other low-level details. To look at low-level optimizer output, call " { $snippet "test-mr mr." } " on a word or quotation:" { $code """USE: compiler.tree.debugger -M\ actor advance test-mr mr.""" } +M\\ actor advance test-mr mr.""" } "An example of a high-performance algorithm that uses SIMD primitives can be found in the " { $vocab-link "benchmark.nbody-simd" } " vocabulary." ; ARTICLE: "math.vectors.simd.intrinsics" "Low-level SIMD primitives" @@ -206,7 +221,7 @@ ARTICLE: "math.vectors.simd" "Hardware vector arithmetic (SIMD)" HELP: SIMD: { $syntax "SIMD: type" } { $values { "type" "a scalar C type" } } -{ $description "Defines 128-bit and 256-bit SIMD arrays for holding elements of " { $snippet "type" } " into the vocabulary search path. The possible type/length combinations are listed in " { $link "math.vectors.simd.types" } " and the generated words are documented in " { $link "math.vectors.simd.words" } "." } ; +{ $description "Defines 128-bit and 256-bit SIMD arrays for holding elements of " { $snippet "type" } " into the vocabulary search path. The allowed scalar types, and the auto-generated type/length vector combinations that result, are listed in " { $link "math.vectors.simd.types" } ". Generated words are documented in " { $link "math.vectors.simd.words" } "." } ; HELP: SIMDS: { $syntax "SIMDS: type type type ... ;" }