From 3bc097f6ff9388924faa41c2a12a965bc5d2dd50 Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Fri, 9 Oct 2009 20:46:52 -0500 Subject: [PATCH 1/8] rename ##shuffle-vector to ##shuffle-vector-imm, and add a new ##shuffle-vector for dynamic shuffles. have vshuffle use ##shuffle-vector to do word and byte shuffles on x86 --- .../cfg/instructions/instructions.factor | 5 ++ .../compiler/cfg/intrinsics/simd/simd.factor | 47 +++++++++++++++---- .../value-numbering/rewrite/rewrite.factor | 18 +++---- .../value-numbering/simplify/simplify.factor | 2 +- .../value-numbering-tests.factor | 22 ++++----- basis/compiler/codegen/codegen.factor | 1 + basis/cpu/architecture/architecture.factor | 3 ++ basis/cpu/x86/x86.factor | 43 +++++++++++++---- .../vectors/simd/intrinsics/intrinsics.factor | 5 +- 9 files changed, 105 insertions(+), 41 deletions(-) diff --git a/basis/compiler/cfg/instructions/instructions.factor b/basis/compiler/cfg/instructions/instructions.factor index 57d88a2d86..119af6d0b1 100644 --- a/basis/compiler/cfg/instructions/instructions.factor +++ b/basis/compiler/cfg/instructions/instructions.factor @@ -277,6 +277,11 @@ literal: rep ; PURE-INSN: ##shuffle-vector def: dst +use: src shuffle +literal: rep ; + +PURE-INSN: ##shuffle-vector-imm +def: dst use: src literal: shuffle rep ; diff --git a/basis/compiler/cfg/intrinsics/simd/simd.factor b/basis/compiler/cfg/intrinsics/simd/simd.factor index 7607d69e45..3f7530caca 100644 --- a/basis/compiler/cfg/intrinsics/simd/simd.factor +++ b/basis/compiler/cfg/intrinsics/simd/simd.factor @@ -1,15 +1,15 @@ ! Copyright (C) 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: accessors byte-arrays fry cpu.architecture kernel math -sequences math.vectors.simd.intrinsics macros generalizations -combinators combinators.short-circuit arrays locals +USING: accessors alien byte-arrays fry cpu.architecture kernel math +sequences math.vectors math.vectors.simd.intrinsics macros +generalizations combinators combinators.short-circuit arrays locals compiler.tree.propagation.info compiler.cfg.builder.blocks compiler.cfg.comparisons compiler.cfg.stacks compiler.cfg.stacks.local compiler.cfg.hats compiler.cfg.instructions compiler.cfg.registers compiler.cfg.intrinsics.alien specialized-arrays ; -FROM: alien.c-types => float double ; +FROM: alien.c-types => heap-size char uchar float double ; SPECIALIZED-ARRAYS: float double ; IN: compiler.cfg.intrinsics.simd @@ -21,7 +21,7 @@ MACRO: check-elements ( quots -- ) MACRO: if-literals-match ( quots -- ) [ length ] [ ] [ length ] tri - ! n quots n n + ! n quots n '[ ! node quot [ @@ -75,17 +75,46 @@ MACRO: if-literals-match ( quots -- ) ds-push ] emit-vector-op ; -: shuffle? ( obj -- ? ) { [ array? ] [ [ integer? ] all? ] } 1&& ; +: variable-shuffle? ( obj -- ? ) + ! the vshuffle intrinsic current doesn't allow variable shuffles + drop f ; + +: immediate-shuffle? ( obj -- ? ) { [ array? ] [ [ integer? ] all? ] } 1&& ; + +: shuffle? ( obj -- ? ) { [ variable-shuffle? ] [ immediate-shuffle? ] } 1|| ; + +: (>variable-shuffle) ( shuffle rep -- shuffle ) + rep-component-type heap-size + [ dup >byte-array ] + [ iota >byte-array ] bi + '[ _ n*v _ v+ ] map concat ; + +: >variable-shuffle ( shuffle rep -- shuffle' ) + over immediate-shuffle? [ (>variable-shuffle) ] [ drop ] if ; + +: generate-shuffle-vector-imm? ( shuffle rep -- ? ) + { + [ drop immediate-shuffle? ] + [ nip %shuffle-vector-imm-reps member? ] + } 2&& ; + +: generate-shuffle-vector ( src shuffle rep -- dst ) + 2dup generate-shuffle-vector-imm? + [ ^^shuffle-vector-imm ] + [ + [ >variable-shuffle ^^load-constant ] keep + ^^shuffle-vector + ] if ; : emit-shuffle-vector ( node -- ) - ! Pad the permutation with zeroes if its too short, since we + ! Pad the permutation with zeroes if it's too short, since we ! can't throw an error at this point. - [ [ rep-components 0 pad-tail ] keep ^^shuffle-vector ] [unary/param] + [ [ rep-components 0 pad-tail ] keep generate-shuffle-vector ] [unary/param] { [ shuffle? ] [ representation? ] } if-literals-match ; : ^^broadcast-vector ( src n rep -- dst ) [ rep-components swap ] keep - ^^shuffle-vector ; + generate-shuffle-vector ; : emit-broadcast-vector ( node -- ) [ ^^broadcast-vector ] [unary/param] diff --git a/basis/compiler/cfg/value-numbering/rewrite/rewrite.factor b/basis/compiler/cfg/value-numbering/rewrite/rewrite.factor index 4a63777019..3842942a3b 100755 --- a/basis/compiler/cfg/value-numbering/rewrite/rewrite.factor +++ b/basis/compiler/cfg/value-numbering/rewrite/rewrite.factor @@ -450,26 +450,26 @@ M: ##set-alien-vector rewrite rewrite-alien-addressing ; ! Some lame constant folding for SIMD intrinsics. Eventually this ! should be redone completely. -: rewrite-shuffle-vector ( insn expr -- insn' ) +: rewrite-shuffle-vector-imm ( insn expr -- insn' ) 2dup [ rep>> ] bi@ eq? [ [ [ dst>> ] [ src>> vn>vreg ] bi* ] [ [ shuffle>> ] bi@ nths ] [ drop rep>> ] - 2tri \ ##shuffle-vector new-insn + 2tri \ ##shuffle-vector-imm new-insn ] [ 2drop f ] if ; -: (fold-shuffle-vector) ( shuffle bytes -- bytes' ) +: (fold-shuffle-vector-imm) ( shuffle bytes -- bytes' ) 2dup length swap length /i group nths concat ; -: fold-shuffle-vector ( insn expr -- insn' ) +: fold-shuffle-vector-imm ( insn expr -- insn' ) [ [ dst>> ] [ shuffle>> ] bi ] dip value>> - (fold-shuffle-vector) \ ##load-constant new-insn ; + (fold-shuffle-vector-imm) \ ##load-constant new-insn ; -M: ##shuffle-vector rewrite +M: ##shuffle-vector-imm rewrite dup src>> vreg>expr { - { [ dup shuffle-vector-expr? ] [ rewrite-shuffle-vector ] } - { [ dup reference-expr? ] [ fold-shuffle-vector ] } - { [ dup constant-expr? ] [ fold-shuffle-vector ] } + { [ dup shuffle-vector-imm-expr? ] [ rewrite-shuffle-vector-imm ] } + { [ dup reference-expr? ] [ fold-shuffle-vector-imm ] } + { [ dup constant-expr? ] [ fold-shuffle-vector-imm ] } [ 2drop f ] } cond ; diff --git a/basis/compiler/cfg/value-numbering/simplify/simplify.factor b/basis/compiler/cfg/value-numbering/simplify/simplify.factor index c2026a9483..df3dc6aab9 100644 --- a/basis/compiler/cfg/value-numbering/simplify/simplify.factor +++ b/basis/compiler/cfg/value-numbering/simplify/simplify.factor @@ -136,7 +136,7 @@ M: scalar>vector-expr simplify* [ drop f ] } cond ; -M: shuffle-vector-expr simplify* +M: shuffle-vector-imm-expr simplify* [ src>> ] [ shuffle>> ] [ rep>> rep-components iota ] tri sequence= [ drop f ] unless ; diff --git a/basis/compiler/cfg/value-numbering/value-numbering-tests.factor b/basis/compiler/cfg/value-numbering/value-numbering-tests.factor index f98824cb95..733b8cc22a 100644 --- a/basis/compiler/cfg/value-numbering/value-numbering-tests.factor +++ b/basis/compiler/cfg/value-numbering/value-numbering-tests.factor @@ -1215,31 +1215,31 @@ cell 8 = [ } ] [ { - T{ ##shuffle-vector f 1 0 { 0 1 2 3 } float-4-rep } + T{ ##shuffle-vector-imm f 1 0 { 0 1 2 3 } float-4-rep } } value-numbering-step ] unit-test [ { - T{ ##shuffle-vector f 1 0 { 1 2 3 0 } float-4-rep } - T{ ##shuffle-vector f 2 0 { 0 2 3 1 } float-4-rep } + T{ ##shuffle-vector-imm f 1 0 { 1 2 3 0 } float-4-rep } + T{ ##shuffle-vector-imm f 2 0 { 0 2 3 1 } float-4-rep } } ] [ { - T{ ##shuffle-vector f 1 0 { 1 2 3 0 } float-4-rep } - T{ ##shuffle-vector f 2 1 { 3 1 2 0 } float-4-rep } + T{ ##shuffle-vector-imm f 1 0 { 1 2 3 0 } float-4-rep } + T{ ##shuffle-vector-imm f 2 1 { 3 1 2 0 } float-4-rep } } value-numbering-step ] unit-test [ { - T{ ##shuffle-vector f 1 0 { 1 2 3 0 } float-4-rep } - T{ ##shuffle-vector f 2 1 { 1 0 } double-2-rep } + T{ ##shuffle-vector-imm f 1 0 { 1 2 3 0 } float-4-rep } + T{ ##shuffle-vector-imm f 2 1 { 1 0 } double-2-rep } } ] [ { - T{ ##shuffle-vector f 1 0 { 1 2 3 0 } float-4-rep } - T{ ##shuffle-vector f 2 1 { 1 0 } double-2-rep } + T{ ##shuffle-vector-imm f 1 0 { 1 2 3 0 } float-4-rep } + T{ ##shuffle-vector-imm f 2 1 { 1 0 } double-2-rep } } value-numbering-step ] unit-test @@ -1253,7 +1253,7 @@ cell 8 = [ { T{ ##load-constant f 0 $[ 55 tag-fixnum ] } T{ ##scalar>vector f 1 0 int-4-rep } - T{ ##shuffle-vector f 2 1 { 0 0 0 0 } float-4-rep } + T{ ##shuffle-vector-imm f 2 1 { 0 0 0 0 } float-4-rep } } value-numbering-step ] unit-test @@ -1267,7 +1267,7 @@ cell 8 = [ { T{ ##load-constant f 0 1.25 } T{ ##scalar>vector f 1 0 float-4-rep } - T{ ##shuffle-vector f 2 1 { 0 0 0 0 } float-4-rep } + T{ ##shuffle-vector-imm f 2 1 { 0 0 0 0 } float-4-rep } } value-numbering-step ] unit-test diff --git a/basis/compiler/codegen/codegen.factor b/basis/compiler/codegen/codegen.factor index 39dd21d893..938219af22 100755 --- a/basis/compiler/codegen/codegen.factor +++ b/basis/compiler/codegen/codegen.factor @@ -154,6 +154,7 @@ CODEGEN: ##zero-vector %zero-vector CODEGEN: ##fill-vector %fill-vector CODEGEN: ##gather-vector-2 %gather-vector-2 CODEGEN: ##gather-vector-4 %gather-vector-4 +CODEGEN: ##shuffle-vector-imm %shuffle-vector-imm CODEGEN: ##shuffle-vector %shuffle-vector CODEGEN: ##tail>head-vector %tail>head-vector CODEGEN: ##merge-vector-head %merge-vector-head diff --git a/basis/cpu/architecture/architecture.factor b/basis/cpu/architecture/architecture.factor index 8bf84f6670..85a43e99fd 100644 --- a/basis/cpu/architecture/architecture.factor +++ b/basis/cpu/architecture/architecture.factor @@ -242,6 +242,7 @@ HOOK: %fill-vector cpu ( dst rep -- ) HOOK: %gather-vector-2 cpu ( dst src1 src2 rep -- ) HOOK: %gather-vector-4 cpu ( dst src1 src2 src3 src4 rep -- ) HOOK: %shuffle-vector cpu ( dst src shuffle rep -- ) +HOOK: %shuffle-vector-imm cpu ( dst src shuffle rep -- ) HOOK: %tail>head-vector cpu ( dst src rep -- ) HOOK: %merge-vector-head cpu ( dst src1 src2 rep -- ) HOOK: %merge-vector-tail cpu ( dst src1 src2 rep -- ) @@ -289,6 +290,7 @@ HOOK: %fill-vector-reps cpu ( -- reps ) HOOK: %gather-vector-2-reps cpu ( -- reps ) HOOK: %gather-vector-4-reps cpu ( -- reps ) HOOK: %shuffle-vector-reps cpu ( -- reps ) +HOOK: %shuffle-vector-imm-reps cpu ( -- reps ) HOOK: %merge-vector-reps cpu ( -- reps ) HOOK: %signed-pack-vector-reps cpu ( -- reps ) HOOK: %unsigned-pack-vector-reps cpu ( -- reps ) @@ -329,6 +331,7 @@ M: object %fill-vector-reps { } ; M: object %gather-vector-2-reps { } ; M: object %gather-vector-4-reps { } ; M: object %shuffle-vector-reps { } ; +M: object %shuffle-vector-imm-reps { } ; M: object %merge-vector-reps { } ; M: object %signed-pack-vector-reps { } ; M: object %unsigned-pack-vector-reps { } ; diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index 13727bdc61..dbd34c774a 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -698,7 +698,7 @@ M: x86 %gather-vector-2-reps : longlong-2-shuffle ( dst shuffle -- ) first2 [ 2 * dup 1 + ] bi@ 4array int-4-shuffle ; -M:: x86 %shuffle-vector ( dst src shuffle rep -- ) +M:: x86 %shuffle-vector-imm ( dst src shuffle rep -- ) dst src rep %copy dst shuffle rep unsign-rep { { double-2-rep [ double-2-shuffle ] } @@ -707,12 +707,20 @@ M:: x86 %shuffle-vector ( dst src shuffle rep -- ) { longlong-2-rep [ longlong-2-shuffle ] } } case ; -M: x86 %shuffle-vector-reps +M: x86 %shuffle-vector-imm-reps { { sse? { float-4-rep } } { sse2? { double-2-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } } } available-reps ; +M: x86 %shuffle-vector ( dst src shuffle rep -- ) + two-operand PSHUFB ; + +M: x86 %shuffle-vector-reps + { + { ssse3? { float-4-rep double-2-rep longlong-2-rep ulonglong-2-rep int-4-rep uint-4-rep short-8-rep ushort-8-rep char-16-rep uchar-16-rep } } + } available-reps ; + M: x86 %merge-vector-head [ two-operand ] keep unsign-rep { @@ -790,8 +798,6 @@ M: x86 %unpack-vector-head-reps ( -- reps ) { sse4.1? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } } } available-reps ; -M: x86 %unpack-vector-tail-reps ( -- reps ) { } ; - M: x86 %integer>float-vector ( dst src rep -- ) { { int-4-rep [ CVTDQ2PS ] } @@ -1037,10 +1043,6 @@ M: x86 %mul-vector-reps { sse4.1? { int-4-rep uint-4-rep } } } available-reps ; -M: x86 %saturated-mul-vector-reps - ! No multiplication with saturation on x86 - { } ; - M: x86 %div-vector ( dst src1 src2 rep -- ) [ two-operand ] keep { @@ -1223,8 +1225,6 @@ M: x86 %xor-vector-reps { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } } } available-reps ; -M: x86 %not-vector-reps { } ; - M: x86 %shl-vector ( dst src1 src2 rep -- ) [ two-operand ] keep { @@ -1271,6 +1271,29 @@ M:: x86 %scalar>integer ( dst src rep -- ) { uint-scalar-rep [ dst 32-bit-version-of src MOVD ] } + { short-scalar-rep [ + dst 32-bit-version-of src MOVD + dst dst 16-bit-version-of MOVSX + ] } + { ushort-scalar-rep [ + dst 32-bit-version-of src MOVD + dst dst 16-bit-version-of MOVZX + ] } + { char-scalar-rep [ + dst 32-bit-version-of src MOVD + dst { } 8 [| tmp-dst | + tmp-dst dst int-rep %copy + tmp-dst tmp-dst 8-bit-version-of MOVSX + dst tmp-dst int-rep %copy + ] with-small-register + ] } + { uchar-scalar-rep [ + dst { } 8 [| tmp-dst | + tmp-dst dst int-rep %copy + tmp-dst tmp-dst 8-bit-version-of MOVZX + dst tmp-dst int-rep %copy + ] with-small-register + ] } } case ; M: x86 %vector>scalar %copy ; diff --git a/basis/math/vectors/simd/intrinsics/intrinsics.factor b/basis/math/vectors/simd/intrinsics/intrinsics.factor index 5a7974a75f..deb92c2944 100644 --- a/basis/math/vectors/simd/intrinsics/intrinsics.factor +++ b/basis/math/vectors/simd/intrinsics/intrinsics.factor @@ -148,6 +148,9 @@ GENERIC# supported-simd-op? 1 ( rep intrinsic -- ? ) union { uchar-16-rep ushort-8-rep uint-4-rep ulonglong-2-rep } union ; +: (%shuffle-reps) ( -- reps ) + %shuffle-vector-reps %shuffle-vector-imm-reps union ; + M: vector-rep supported-simd-op? { { \ (simd-v+) [ %add-vector-reps ] } @@ -179,7 +182,7 @@ M: vector-rep supported-simd-op? { \ (simd-vrshift) [ %shr-vector-reps ] } { \ (simd-hlshift) [ %horizontal-shl-vector-reps ] } { \ (simd-hrshift) [ %horizontal-shr-vector-reps ] } - { \ (simd-vshuffle) [ %shuffle-vector-reps ] } + { \ (simd-vshuffle) [ (%shuffle-reps) ] } { \ (simd-(vmerge-head)) [ %merge-vector-reps ] } { \ (simd-(vmerge-tail)) [ %merge-vector-reps ] } { \ (simd-(v>float)) [ %integer>float-vector-reps ] } From 366c341c5f329f0a9ea6570a7a854f78ac13f80d Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Sat, 10 Oct 2009 00:23:50 -0500 Subject: [PATCH 2/8] compiler.tree.propagation.branches: fix live-branches computation for #dispatch nodes --- basis/compiler/tests/optimizer.factor | 30 +++++++++++++++++++ .../tree/propagation/branches/branches.factor | 15 +++++++--- 2 files changed, 41 insertions(+), 4 deletions(-) diff --git a/basis/compiler/tests/optimizer.factor b/basis/compiler/tests/optimizer.factor index 18679ce77b..0c9b1817c8 100644 --- a/basis/compiler/tests/optimizer.factor +++ b/basis/compiler/tests/optimizer.factor @@ -413,5 +413,35 @@ M: object bad-dispatch-position-test* ; ] with-compilation-unit ] unit-test +[ 16 ] [ + [ + 0 2 + [ + nip + [ + 1 + { + [ 16 ] + [ 16 ] + [ 16 ] + } dispatch + ] [ + { + [ ] + [ ] + [ ] + } dispatch + ] bi + ] each-integer + ] compile-call +] unit-test + +: dispatch-branch-problem ( a b c -- d ) + dup 0 < [ "boo" throw ] when + 1 + { [ + ] [ - ] [ * ] } dispatch ; + +[ 3 4 -1 dispatch-branch-problem ] [ "boo" = ] must-fail-with +[ -1 ] [ 3 4 0 dispatch-branch-problem ] unit-test +[ 12 ] [ 3 4 1 dispatch-branch-problem ] unit-test + ! Not sure if I want to fix this... ! [ t [ [ f ] [ 3 ] if >fixnum ] compile-call ] [ no-method? ] must-fail-with \ No newline at end of file diff --git a/basis/compiler/tree/propagation/branches/branches.factor b/basis/compiler/tree/propagation/branches/branches.factor index b8861a6292..0d837d82ae 100755 --- a/basis/compiler/tree/propagation/branches/branches.factor +++ b/basis/compiler/tree/propagation/branches/branches.factor @@ -1,8 +1,8 @@ -! Copyright (C) 2008 Slava Pestov. +! Copyright (C) 2008, 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. USING: fry kernel sequences assocs accessors namespaces math.intervals arrays classes.algebra combinators columns -stack-checker.branches locals +stack-checker.branches locals math compiler.utilities compiler.tree compiler.tree.combinators @@ -21,6 +21,9 @@ M: #if child-constraints M: #dispatch child-constraints children>> length f ; +! There is an important invariant here, either no flags are set +! in live-branches, exactly one is set, or all are set. + GENERIC: live-branches ( #branch -- indices ) M: #if live-branches @@ -32,8 +35,12 @@ M: #if live-branches } cond nip ; M: #dispatch live-branches - [ children>> length ] [ in-d>> first value-info interval>> ] bi - '[ _ interval-contains? ] map ; + [ children>> ] [ in-d>> first value-info ] bi { + { [ dup class>> null-class? ] [ drop length f ] } + { [ dup literal>> integer? not ] [ drop length t ] } + { [ 2dup literal>> swap bounds-check? not ] [ drop length t ] } + [ literal>> swap length f [ [ t ] 2dip set-nth ] keep ] + } cond ; : live-children ( #branch -- children ) [ children>> ] [ live-branches>> ] bi select-children ; From 1fa6f32790374372dc637f8529da12dae261f03e Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Sat, 10 Oct 2009 10:39:23 -0500 Subject: [PATCH 3/8] fix x86 uchar %scalar>integer --- basis/cpu/x86/x86.factor | 1 + 1 file changed, 1 insertion(+) diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index dbd34c774a..dab7d9d52b 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -1288,6 +1288,7 @@ M:: x86 %scalar>integer ( dst src rep -- ) ] with-small-register ] } { uchar-scalar-rep [ + dst 32-bit-version-of src MOVD dst { } 8 [| tmp-dst | tmp-dst dst int-rep %copy tmp-dst tmp-dst 8-bit-version-of MOVZX From d9002127fae73af40dcb79c3d64a4c3d8301de16 Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Sat, 10 Oct 2009 10:40:09 -0500 Subject: [PATCH 4/8] have vshuffle accept simd-128 variable byte shuffles --- .../compiler/cfg/intrinsics/intrinsics.factor | 3 +- .../compiler/cfg/intrinsics/simd/simd.factor | 48 +++++++++---------- .../tree/propagation/simd/simd.factor | 3 +- .../math/vectors/simd/functor/functor.factor | 8 +++- .../vectors/simd/intrinsics/intrinsics.factor | 8 ++-- basis/math/vectors/simd/simd-tests.factor | 19 +++++++- .../specialization/specialization.factor | 5 +- basis/math/vectors/vectors.factor | 18 ++++++- .../specialized-arrays.factor | 5 +- 9 files changed, 80 insertions(+), 37 deletions(-) diff --git a/basis/compiler/cfg/intrinsics/intrinsics.factor b/basis/compiler/cfg/intrinsics/intrinsics.factor index 9c4447e654..3b6674efee 100644 --- a/basis/compiler/cfg/intrinsics/intrinsics.factor +++ b/basis/compiler/cfg/intrinsics/intrinsics.factor @@ -194,7 +194,8 @@ IN: compiler.cfg.intrinsics { math.vectors.simd.intrinsics:(simd-with) [ [ ^^with-vector ] emit-unary-vector-op ] } { math.vectors.simd.intrinsics:(simd-gather-2) [ emit-gather-vector-2 ] } { math.vectors.simd.intrinsics:(simd-gather-4) [ emit-gather-vector-4 ] } - { math.vectors.simd.intrinsics:(simd-vshuffle) [ emit-shuffle-vector ] } + { math.vectors.simd.intrinsics:(simd-vshuffle-elements) [ emit-shuffle-vector ] } + { math.vectors.simd.intrinsics:(simd-vshuffle-bytes) [ emit-shuffle-vector-var ] } { math.vectors.simd.intrinsics:(simd-(vmerge-head)) [ [ ^^merge-vector-head ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-(vmerge-tail)) [ [ ^^merge-vector-tail ] emit-binary-vector-op ] } { math.vectors.simd.intrinsics:(simd-(v>float)) [ [ ^^integer>float-vector ] emit-unary-vector-op ] } diff --git a/basis/compiler/cfg/intrinsics/simd/simd.factor b/basis/compiler/cfg/intrinsics/simd/simd.factor index 3f7530caca..e608cf999c 100644 --- a/basis/compiler/cfg/intrinsics/simd/simd.factor +++ b/basis/compiler/cfg/intrinsics/simd/simd.factor @@ -1,8 +1,9 @@ ! Copyright (C) 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: accessors alien byte-arrays fry cpu.architecture kernel math -sequences math.vectors math.vectors.simd.intrinsics macros -generalizations combinators combinators.short-circuit arrays locals +USING: accessors alien byte-arrays fry classes.algebra +cpu.architecture kernel math sequences math.vectors +math.vectors.simd.intrinsics macros generalizations combinators +combinators.short-circuit arrays locals compiler.tree.propagation.info compiler.cfg.builder.blocks compiler.cfg.comparisons compiler.cfg.stacks compiler.cfg.stacks.local compiler.cfg.hats @@ -75,46 +76,43 @@ MACRO: if-literals-match ( quots -- ) ds-push ] emit-vector-op ; -: variable-shuffle? ( obj -- ? ) - ! the vshuffle intrinsic current doesn't allow variable shuffles - drop f ; +: shuffle? ( obj -- ? ) { [ array? ] [ [ integer? ] all? ] } 1&& ; -: immediate-shuffle? ( obj -- ? ) { [ array? ] [ [ integer? ] all? ] } 1&& ; - -: shuffle? ( obj -- ? ) { [ variable-shuffle? ] [ immediate-shuffle? ] } 1|| ; - -: (>variable-shuffle) ( shuffle rep -- shuffle ) +: >variable-shuffle ( shuffle rep -- shuffle' ) rep-component-type heap-size [ dup >byte-array ] [ iota >byte-array ] bi '[ _ n*v _ v+ ] map concat ; -: >variable-shuffle ( shuffle rep -- shuffle' ) - over immediate-shuffle? [ (>variable-shuffle) ] [ drop ] if ; - -: generate-shuffle-vector-imm? ( shuffle rep -- ? ) - { - [ drop immediate-shuffle? ] - [ nip %shuffle-vector-imm-reps member? ] - } 2&& ; - -: generate-shuffle-vector ( src shuffle rep -- dst ) - 2dup generate-shuffle-vector-imm? +: generate-shuffle-vector-imm ( src shuffle rep -- dst ) + dup %shuffle-vector-imm-reps member? [ ^^shuffle-vector-imm ] [ [ >variable-shuffle ^^load-constant ] keep ^^shuffle-vector ] if ; -: emit-shuffle-vector ( node -- ) +: emit-shuffle-vector-imm ( node -- ) ! Pad the permutation with zeroes if it's too short, since we ! can't throw an error at this point. - [ [ rep-components 0 pad-tail ] keep generate-shuffle-vector ] [unary/param] + [ [ rep-components 0 pad-tail ] keep generate-shuffle-vector-imm ] [unary/param] { [ shuffle? ] [ representation? ] } if-literals-match ; +: emit-shuffle-vector-var ( node -- ) + [ ^^shuffle-vector ] [binary] + { [ %shuffle-vector-reps member? ] } if-literals-match ; + +: emit-shuffle-vector ( node -- ) + dup node-input-infos { + [ length 3 = ] + [ first class>> byte-array class<= ] + [ second class>> byte-array class<= ] + [ third literal>> representation? ] + } 1&& [ emit-shuffle-vector-var ] [ emit-shuffle-vector-imm ] if ; + : ^^broadcast-vector ( src n rep -- dst ) [ rep-components swap ] keep - generate-shuffle-vector ; + generate-shuffle-vector-imm ; : emit-broadcast-vector ( node -- ) [ ^^broadcast-vector ] [unary/param] diff --git a/basis/compiler/tree/propagation/simd/simd.factor b/basis/compiler/tree/propagation/simd/simd.factor index 1909a83488..1637148b88 100644 --- a/basis/compiler/tree/propagation/simd/simd.factor +++ b/basis/compiler/tree/propagation/simd/simd.factor @@ -31,7 +31,8 @@ IN: compiler.tree.propagation.simd (simd-vrshift) (simd-hlshift) (simd-hrshift) - (simd-vshuffle) + (simd-vshuffle-bytes) + (simd-vshuffle-elements) (simd-(vmerge-head)) (simd-(vmerge-tail)) (simd-(v>float)) diff --git a/basis/math/vectors/simd/functor/functor.factor b/basis/math/vectors/simd/functor/functor.factor index 7f28f644e1..2ddaf2b8a5 100644 --- a/basis/math/vectors/simd/functor/functor.factor +++ b/basis/math/vectors/simd/functor/functor.factor @@ -60,7 +60,7 @@ MACRO: simd-boa ( rep class -- simd-array ) [ rep-components ] [ new ] [ '[ _ ] ] tri* swap replicate-as ; inline : simd-with/nth-fast? ( rep -- ? ) - [ \ (simd-vshuffle) supported-simd-op? ] + [ \ (simd-vshuffle-elements) supported-simd-op? ] [ rep-component-type can-be-unboxed? ] bi and ; @@ -184,6 +184,8 @@ WHERE TUPLE: A { underlying byte-array read-only initial: $[ 16 ] } ; +INSTANCE: A simd-128 + M: A clone underlying>> clone \ A boa ; inline M: A length drop N ; inline @@ -315,7 +317,7 @@ SLOT: underlying2 class c:typedef ; : (define-simd-256) ( simd -- ) - simd-ops get { vshuffle hlshift hrshift } unique assoc-diff >>ops + simd-ops get { vshuffle-elements vshuffle-bytes hlshift hrshift } unique assoc-diff >>ops [ define-simd ] [ [ class>> ] [ rep>> ] bi define-simd-256-type ] bi ; @@ -362,6 +364,8 @@ TUPLE: A { underlying1 byte-array initial: $[ 16 ] read-only } { underlying2 byte-array initial: $[ 16 ] read-only } ; +INSTANCE: A simd-256 + M: A clone [ underlying1>> clone ] [ underlying2>> clone ] bi \ A boa ; inline diff --git a/basis/math/vectors/simd/intrinsics/intrinsics.factor b/basis/math/vectors/simd/intrinsics/intrinsics.factor index deb92c2944..fab55949b4 100644 --- a/basis/math/vectors/simd/intrinsics/intrinsics.factor +++ b/basis/math/vectors/simd/intrinsics/intrinsics.factor @@ -67,7 +67,8 @@ SIMD-OP: vlshift SIMD-OP: vrshift SIMD-OP: hlshift SIMD-OP: hrshift -SIMD-OP: vshuffle +SIMD-OP: vshuffle-elements +SIMD-OP: vshuffle-bytes SIMD-OP: (vmerge-head) SIMD-OP: (vmerge-tail) SIMD-OP: v<= @@ -148,7 +149,7 @@ GENERIC# supported-simd-op? 1 ( rep intrinsic -- ? ) union { uchar-16-rep ushort-8-rep uint-4-rep ulonglong-2-rep } union ; -: (%shuffle-reps) ( -- reps ) +: (%shuffle-imm-reps) ( -- reps ) %shuffle-vector-reps %shuffle-vector-imm-reps union ; M: vector-rep supported-simd-op? @@ -182,7 +183,8 @@ M: vector-rep supported-simd-op? { \ (simd-vrshift) [ %shr-vector-reps ] } { \ (simd-hlshift) [ %horizontal-shl-vector-reps ] } { \ (simd-hrshift) [ %horizontal-shr-vector-reps ] } - { \ (simd-vshuffle) [ (%shuffle-reps) ] } + { \ (simd-vshuffle-elements) [ (%shuffle-imm-reps) ] } + { \ (simd-vshuffle-bytes) [ %shuffle-vector-reps ] } { \ (simd-(vmerge-head)) [ %merge-vector-reps ] } { \ (simd-(vmerge-tail)) [ %merge-vector-reps ] } { \ (simd-(v>float)) [ %integer>float-vector-reps ] } diff --git a/basis/math/vectors/simd/simd-tests.factor b/basis/math/vectors/simd/simd-tests.factor index 9e999ba9b7..460059809e 100644 --- a/basis/math/vectors/simd/simd-tests.factor +++ b/basis/math/vectors/simd/simd-tests.factor @@ -174,7 +174,7 @@ CONSTANT: simd-classes : remove-special-words ( alist -- alist' ) ! These have their own tests later { - hlshift hrshift vshuffle vbroadcast + hlshift hrshift vshuffle-bytes vshuffle-elements vbroadcast vany? vall? vnone? (v>float) (v>integer) (vpack-signed) (vpack-unsigned) @@ -360,6 +360,23 @@ simd-classes [ ] unit-test ] each +"== Checking variable shuffles" print + +: random-shift-vector ( class -- vec ) + new [ drop 16 random ] map ; + +:: test-shift-vector ( class -- ? ) + class random-int-vector :> src + char-16 random-shift-vector :> perm + { class char-16 } :> decl + + src perm vshuffle + src perm [ decl declare vshuffle ] compile-call + = ; inline + +{ char-16 uchar-16 short-8 ushort-8 int-4 uint-4 longlong-2 ulonglong-2 } +[ 10 swap '[ [ t ] [ _ test-shift-vector ] unit-test ] times ] each + "== Checking vector tests" print :: test-vector-tests-bool ( vector declaration -- none? any? all? ) diff --git a/basis/math/vectors/specialization/specialization.factor b/basis/math/vectors/specialization/specialization.factor index 28c3ee82c3..e51d8c4553 100644 --- a/basis/math/vectors/specialization/specialization.factor +++ b/basis/math/vectors/specialization/specialization.factor @@ -98,7 +98,8 @@ H{ { vrshift { +vector+ +scalar+ -> +vector+ } } { hlshift { +vector+ +literal+ -> +vector+ } } { hrshift { +vector+ +literal+ -> +vector+ } } - { vshuffle { +vector+ +literal+ -> +vector+ } } + { vshuffle-elements { +vector+ +literal+ -> +vector+ } } + { vshuffle-bytes { +vector+ +vector+ -> +vector+ } } { vbroadcast { +vector+ +literal+ -> +vector+ } } { (vmerge-head) { +vector+ +vector+ -> +vector+ } } { (vmerge-tail) { +vector+ +vector+ -> +vector+ } } @@ -162,7 +163,7 @@ ERROR: bad-vector-word word ; } cond ! Don't specialize horizontal shifts, shuffles, and conversions at all, they're only for SIMD { - hlshift hrshift vshuffle vbroadcast + hlshift hrshift vshuffle-elements vshuffle-bytes vbroadcast (v>integer) (v>float) (vpack-signed) (vpack-unsigned) (vunpack-head) (vunpack-tail) diff --git a/basis/math/vectors/vectors.factor b/basis/math/vectors/vectors.factor index c65009950d..4cb03af44c 100644 --- a/basis/math/vectors/vectors.factor +++ b/basis/math/vectors/vectors.factor @@ -6,6 +6,9 @@ locals ; QUALIFIED-WITH: alien.c-types c IN: math.vectors +MIXIN: simd-128 +MIXIN: simd-256 + GENERIC: element-type ( obj -- c-type ) M: object element-type drop f ; inline @@ -83,7 +86,20 @@ PRIVATE> : vbitnot ( u -- w ) dup '[ _ [ bitnot ] fp-bitwise-unary ] map ; :: vbroadcast ( u n -- v ) u length n u nth u like ; -: vshuffle ( u perm -- v ) swap [ '[ _ nth ] ] keep map-as ; + +: vshuffle-elements ( u perm -- v ) + swap [ '[ _ nth ] ] keep map-as ; + +: vshuffle-bytes ( u perm -- v ) + underlying>> [ + swap [ '[ _ nth ] ] keep map-as + ] curry change-underlying ; + +GENERIC: vshuffle ( u perm -- v ) +M: array vshuffle ( u perm -- v ) + vshuffle-elements ; inline +M: simd-128 vshuffle ( u perm -- v ) + vshuffle-bytes ; inline : vlshift ( u n -- w ) '[ _ shift ] map ; : vrshift ( u n -- w ) neg '[ _ shift ] map ; diff --git a/basis/specialized-arrays/specialized-arrays.factor b/basis/specialized-arrays/specialized-arrays.factor index a3d24c10c2..c5de95b5b5 100755 --- a/basis/specialized-arrays/specialized-arrays.factor +++ b/basis/specialized-arrays/specialized-arrays.factor @@ -2,7 +2,8 @@ ! See http://factorcode.org/license.txt for BSD license. USING: accessors alien alien.c-types alien.data alien.parser assocs byte-arrays classes compiler.units functors kernel lexer -libc math math.vectors math.vectors.specialization namespaces +libc math math.vectors math.vectors.private +math.vectors.specialization namespaces parser prettyprint.custom sequences sequences.private strings summary vocabs vocabs.loader vocabs.parser vocabs.generated words fry combinators present ; @@ -68,6 +69,8 @@ TUPLE: A [ drop \ T bad-byte-array-length ] unless ; inline +M: A new-underlying drop byte-array>A ; + M: A clone [ underlying>> clone ] [ length>> ] bi ; inline M: A length length>> ; inline From a5898dffdebab1240e7e32054dcfb49171fd1b1d Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Sat, 10 Oct 2009 12:00:47 -0500 Subject: [PATCH 5/8] don't use MOVSLDUP/MOVSHDUP to do specialized shuffles unless sse3 is available --- basis/cpu/x86/x86.factor | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index dab7d9d52b..c1acf92246 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -673,11 +673,9 @@ M: x86 %gather-vector-2-reps [ dupd SHUFPD ] } case ; -: float-4-shuffle ( dst shuffle -- ) +: sse1-float-4-shuffle ( dst shuffle -- ) { { { 0 1 2 3 } [ drop ] } - { { 0 0 2 2 } [ dup MOVSLDUP ] } - { { 1 1 3 3 } [ dup MOVSHDUP ] } { { 0 1 0 1 } [ dup MOVLHPS ] } { { 2 3 2 3 } [ dup MOVHLPS ] } { { 0 0 1 1 } [ dup UNPCKLPS ] } @@ -685,6 +683,15 @@ M: x86 %gather-vector-2-reps [ dupd SHUFPS ] } case ; +: float-4-shuffle ( dst shuffle -- ) + sse3? [ + { + { { 0 0 2 2 } [ dup MOVSLDUP ] } + { { 1 1 3 3 } [ dup MOVSHDUP ] } + [ sse1-float-4-shuffle ] + } case + ] [ sse1-float-4-shuffle ] if ; + : int-4-shuffle ( dst shuffle -- ) { { { 0 1 2 3 } [ drop ] } From 140780439346efcb184352d018eb52bbf14652cc Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Sat, 10 Oct 2009 12:23:25 -0500 Subject: [PATCH 6/8] change the simd-struct tests to cover integer vector slots --- basis/math/vectors/simd/simd-tests.factor | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/basis/math/vectors/simd/simd-tests.factor b/basis/math/vectors/simd/simd-tests.factor index 460059809e..8766056a96 100644 --- a/basis/math/vectors/simd/simd-tests.factor +++ b/basis/math/vectors/simd/simd-tests.factor @@ -529,38 +529,38 @@ SYMBOL: !!inconsistent!! STRUCT: simd-struct { x float-4 } -{ y double-2 } +{ y longlong-2 } { z double-4 } -{ w float-8 } ; +{ w int-8 } ; [ t ] [ [ simd-struct ] compile-call >c-ptr [ 0 = ] all? ] unit-test [ float-4{ 1 2 3 4 } - double-2{ 2 1 } + longlong-2{ 2 1 } double-4{ 4 3 2 1 } - float-8{ 1 2 3 4 5 6 7 8 } + int-8{ 1 2 3 4 5 6 7 8 } ] [ simd-struct float-4{ 1 2 3 4 } >>x - double-2{ 2 1 } >>y + longlong-2{ 2 1 } >>y double-4{ 4 3 2 1 } >>z - float-8{ 1 2 3 4 5 6 7 8 } >>w + int-8{ 1 2 3 4 5 6 7 8 } >>w { [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave ] unit-test [ float-4{ 1 2 3 4 } - double-2{ 2 1 } + longlong-2{ 2 1 } double-4{ 4 3 2 1 } - float-8{ 1 2 3 4 5 6 7 8 } + int-8{ 1 2 3 4 5 6 7 8 } ] [ [ simd-struct float-4{ 1 2 3 4 } >>x - double-2{ 2 1 } >>y + longlong-2{ 2 1 } >>y double-4{ 4 3 2 1 } >>z - float-8{ 1 2 3 4 5 6 7 8 } >>w + int-8{ 1 2 3 4 5 6 7 8 } >>w { [ x>> ] [ y>> ] [ z>> ] [ w>> ] } cleave ] compile-call ] unit-test From 97ab9dc4ab3bb4e6ff9a7326dca12c3d2f66dd64 Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Sat, 10 Oct 2009 12:53:10 -0500 Subject: [PATCH 7/8] only emit ##alien-vector/##set-alien-vector insns if the rep is available --- basis/compiler/cfg/intrinsics/simd/simd.factor | 7 +++++-- basis/cpu/architecture/architecture.factor | 2 ++ basis/cpu/x86/x86.factor | 6 ++++++ 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/basis/compiler/cfg/intrinsics/simd/simd.factor b/basis/compiler/cfg/intrinsics/simd/simd.factor index e608cf999c..bd851199ca 100644 --- a/basis/compiler/cfg/intrinsics/simd/simd.factor +++ b/basis/compiler/cfg/intrinsics/simd/simd.factor @@ -128,6 +128,9 @@ MACRO: if-literals-match ( quots -- ) [ ^^select-vector ] [unary/param] { [ integer? ] [ representation? ] } if-literals-match ; inline +: emit-alien-vector-op ( node quot: ( rep -- ) -- ) + { [ %alien-vector-reps member? ] } if-literals-match ; inline + : emit-alien-vector ( node -- ) dup [ '[ @@ -135,7 +138,7 @@ MACRO: if-literals-match ( quots -- ) _ ^^alien-vector ds-push ] [ inline-alien-getter? ] inline-alien - ] with emit-vector-op ; + ] with emit-alien-vector-op ; : emit-set-alien-vector ( node -- ) dup [ @@ -145,7 +148,7 @@ MACRO: if-literals-match ( quots -- ) ] [ byte-array inline-alien-setter? ] inline-alien - ] with emit-vector-op ; + ] with emit-alien-vector-op ; : generate-not-vector ( src rep -- dst ) dup %not-vector-reps member? diff --git a/basis/cpu/architecture/architecture.factor b/basis/cpu/architecture/architecture.factor index 85a43e99fd..19b38fd8f8 100644 --- a/basis/cpu/architecture/architecture.factor +++ b/basis/cpu/architecture/architecture.factor @@ -289,6 +289,7 @@ HOOK: %zero-vector-reps cpu ( -- reps ) HOOK: %fill-vector-reps cpu ( -- reps ) HOOK: %gather-vector-2-reps cpu ( -- reps ) HOOK: %gather-vector-4-reps cpu ( -- reps ) +HOOK: %alien-vector-reps cpu ( -- reps ) HOOK: %shuffle-vector-reps cpu ( -- reps ) HOOK: %shuffle-vector-imm-reps cpu ( -- reps ) HOOK: %merge-vector-reps cpu ( -- reps ) @@ -330,6 +331,7 @@ M: object %zero-vector-reps { } ; M: object %fill-vector-reps { } ; M: object %gather-vector-2-reps { } ; M: object %gather-vector-4-reps { } ; +M: object %alien-vector-reps { } ; M: object %shuffle-vector-reps { } ; M: object %shuffle-vector-imm-reps { } ; M: object %merge-vector-reps { } ; diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index c1acf92246..a163ba6482 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -562,6 +562,12 @@ MACRO: available-reps ( alist -- ) reverse [ { } ] suffix '[ _ cond ] ; +M: x86 %alien-vector-reps + { + { sse? { float-4-rep } } + { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } } + } available-reps ; + M: x86 %zero-vector { { double-2-rep [ dup XORPD ] } From 588899a1b3814194febf4e4701db2aadc67a419c Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Sat, 10 Oct 2009 13:01:13 -0500 Subject: [PATCH 8/8] fix fallbacks for vall?, vany?, vnone? --- basis/math/vectors/vectors.factor | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/basis/math/vectors/vectors.factor b/basis/math/vectors/vectors.factor index 4cb03af44c..0a984ba2e7 100644 --- a/basis/math/vectors/vectors.factor +++ b/basis/math/vectors/vectors.factor @@ -123,9 +123,9 @@ M: simd-128 vshuffle ( u perm -- v ) : vxor ( u v -- w ) over '[ [ _ element>bool ] bi@ xor ] 2map ; : vnot ( u -- w ) dup '[ _ element>bool not ] map ; -: vall? ( v -- ? ) [ ] all? ; -: vany? ( v -- ? ) [ ] any? ; -: vnone? ( v -- ? ) [ not ] all? ; +: vall? ( v -- ? ) dup '[ _ element>bool ] all? ; +: vany? ( v -- ? ) dup '[ _ element>bool ] any? ; +: vnone? ( v -- ? ) dup '[ _ element>bool not ] all? ; : v< ( u v -- w ) [ < ] 2map ; : v<= ( u v -- w ) [ <= ] 2map ;