compiler: add intrinsic for PMOVMSKB/MOVMSKP[SD]

Combined with a fast bit-count this will let us rice byte-counting.
db4
Joe Groff 2011-11-11 22:47:54 -08:00
parent d1786adfe4
commit d79b462f75
11 changed files with 41 additions and 5 deletions

View File

@ -380,6 +380,11 @@ def: dst
use: src1 src2 use: src1 src2
literal: rep cc ; literal: rep cc ;
FOLDABLE-INSN: ##move-vector-mask
def: dst/int-rep
use: src
literal: rep ;
FOLDABLE-INSN: ##test-vector FOLDABLE-INSN: ##test-vector
def: dst/tagged-rep def: dst/tagged-rep
use: src1 use: src1

View File

@ -37,6 +37,7 @@ M: ##tail>head-vector insn-available? rep>> %unpack-vector-head-reps member? ;
M: ##integer>float-vector insn-available? rep>> %integer>float-vector-reps member? ; M: ##integer>float-vector insn-available? rep>> %integer>float-vector-reps member? ;
M: ##float>integer-vector insn-available? rep>> %float>integer-vector-reps member? ; M: ##float>integer-vector insn-available? rep>> %float>integer-vector-reps member? ;
M: ##compare-vector insn-available? [ rep>> ] [ cc>> ] bi %compare-vector-reps member? ; M: ##compare-vector insn-available? [ rep>> ] [ cc>> ] bi %compare-vector-reps member? ;
M: ##move-vector-mask insn-available? rep>> %move-vector-mask-reps member? ;
M: ##test-vector insn-available? rep>> %test-vector-reps member? ; M: ##test-vector insn-available? rep>> %test-vector-reps member? ;
M: ##add-vector insn-available? rep>> %add-vector-reps member? ; M: ##add-vector insn-available? rep>> %add-vector-reps member? ;
M: ##saturated-add-vector insn-available? rep>> %saturated-add-vector-reps member? ; M: ##saturated-add-vector insn-available? rep>> %saturated-add-vector-reps member? ;

View File

@ -109,6 +109,7 @@ M: simple-ops-cpu %or-vector-reps all-reps ;
M: simple-ops-cpu %xor-vector-reps all-reps ; M: simple-ops-cpu %xor-vector-reps all-reps ;
M: simple-ops-cpu %merge-vector-reps all-reps ; M: simple-ops-cpu %merge-vector-reps all-reps ;
M: simple-ops-cpu %sqrt-vector-reps all-reps ; M: simple-ops-cpu %sqrt-vector-reps all-reps ;
M: simple-ops-cpu %move-vector-mask-reps all-reps ;
M: simple-ops-cpu %test-vector-reps all-reps ; M: simple-ops-cpu %test-vector-reps all-reps ;
M: simple-ops-cpu %signed-pack-vector-reps all-reps ; M: simple-ops-cpu %signed-pack-vector-reps all-reps ;
M: simple-ops-cpu %unsigned-pack-vector-reps all-reps ; M: simple-ops-cpu %unsigned-pack-vector-reps all-reps ;

View File

@ -555,6 +555,10 @@ PREDICATE: fixnum-vector-rep < int-vector-rep
{ {
[ vcc-none ^^test-vector ] [ vcc-none ^^test-vector ]
} emit-v-vector-op ; } emit-v-vector-op ;
: emit-simd-vgetmask ( node -- )
{
[ ^^move-vector-mask ]
} emit-v-vector-op ;
: emit-simd-v>float ( node -- ) : emit-simd-v>float ( node -- )
{ {
@ -700,6 +704,7 @@ PREDICATE: fixnum-vector-rep < int-vector-rep
{ alien-vector [ emit-alien-vector ] } { alien-vector [ emit-alien-vector ] }
{ set-alien-vector [ emit-set-alien-vector ] } { set-alien-vector [ emit-set-alien-vector ] }
{ assert-positive [ drop ] } { assert-positive [ drop ] }
{ (simd-vgetmask) [ emit-simd-vgetmask ] }
} enable-intrinsics ; } enable-intrinsics ;
enable-simd enable-simd

View File

@ -223,6 +223,7 @@ CODEGEN: ##unpack-vector-tail %unpack-vector-tail
CODEGEN: ##integer>float-vector %integer>float-vector CODEGEN: ##integer>float-vector %integer>float-vector
CODEGEN: ##float>integer-vector %float>integer-vector CODEGEN: ##float>integer-vector %float>integer-vector
CODEGEN: ##compare-vector %compare-vector CODEGEN: ##compare-vector %compare-vector
CODEGEN: ##move-vector-mask %move-vector-mask
CODEGEN: ##test-vector %test-vector CODEGEN: ##test-vector %test-vector
CODEGEN: ##add-vector %add-vector CODEGEN: ##add-vector %add-vector
CODEGEN: ##saturated-add-vector %saturated-add-vector CODEGEN: ##saturated-add-vector %saturated-add-vector

View File

@ -69,6 +69,7 @@ CONSTANT: vector-other-intrinsics
(simd-vany?) (simd-vany?)
(simd-vall?) (simd-vall?)
(simd-vnone?) (simd-vnone?)
(simd-vgetmask)
(simd-select) (simd-select)
set-alien-vector set-alien-vector
} }
@ -104,6 +105,8 @@ vector>vector-intrinsics [ { byte-array } "default-output-classes" set-word-prop
real [0,inf] <class/interval-info> value-info-intersect real [0,inf] <class/interval-info> value-info-intersect
] "outputs" set-word-prop ] "outputs" set-word-prop
\ (simd-vgetmask) { fixnum } "default-output-classes" set-word-prop
: clone-with-value-infos ( node -- node' ) : clone-with-value-infos ( node -- node' )
clone dup in-d>> extract-value-info >>info ; clone dup in-d>> extract-value-info >>info ;

View File

@ -322,6 +322,7 @@ HOOK: %unpack-vector-tail cpu ( dst src rep -- )
HOOK: %integer>float-vector cpu ( dst src rep -- ) HOOK: %integer>float-vector cpu ( dst src rep -- )
HOOK: %float>integer-vector cpu ( dst src rep -- ) HOOK: %float>integer-vector cpu ( dst src rep -- )
HOOK: %compare-vector cpu ( dst src1 src2 rep cc -- ) HOOK: %compare-vector cpu ( dst src1 src2 rep cc -- )
HOOK: %move-vector-mask cpu ( dst src rep -- )
HOOK: %test-vector cpu ( dst src1 temp rep vcc -- ) HOOK: %test-vector cpu ( dst src1 temp rep vcc -- )
HOOK: %test-vector-branch cpu ( label src1 temp rep vcc -- ) HOOK: %test-vector-branch cpu ( label src1 temp rep vcc -- )
HOOK: %add-vector cpu ( dst src1 src2 rep -- ) HOOK: %add-vector cpu ( dst src1 src2 rep -- )
@ -381,6 +382,7 @@ HOOK: %integer>float-vector-reps cpu ( -- reps )
HOOK: %float>integer-vector-reps cpu ( -- reps ) HOOK: %float>integer-vector-reps cpu ( -- reps )
HOOK: %compare-vector-reps cpu ( cc -- reps ) HOOK: %compare-vector-reps cpu ( cc -- reps )
HOOK: %compare-vector-ccs cpu ( rep cc -- {cc,swap?}s not? ) HOOK: %compare-vector-ccs cpu ( rep cc -- {cc,swap?}s not? )
HOOK: %move-vector-mask-reps cpu ( -- reps )
HOOK: %test-vector-reps cpu ( -- reps ) HOOK: %test-vector-reps cpu ( -- reps )
HOOK: %add-vector-reps cpu ( -- reps ) HOOK: %add-vector-reps cpu ( -- reps )
HOOK: %saturated-add-vector-reps cpu ( -- reps ) HOOK: %saturated-add-vector-reps cpu ( -- reps )

View File

@ -476,15 +476,24 @@ M: x86 %compare-vector-ccs
{ vcc-notall [ dst mask CMP dst temp \ CMOVNE (%boolean) ] } { vcc-notall [ dst mask CMP dst temp \ CMOVNE (%boolean) ] }
} case ; } case ;
: %move-vector-mask ( dst src rep -- mask ) : %move-vector-mask* ( dst src rep -- mask )
{ {
{ double-2-rep [ MOVMSKPS HEX: f ] } { double-2-rep [ MOVMSKPS HEX: f ] }
{ float-4-rep [ MOVMSKPS HEX: f ] } { float-4-rep [ MOVMSKPS HEX: f ] }
[ drop PMOVMSKB HEX: ffff ] [ drop PMOVMSKB HEX: ffff ]
} case ; } case ;
M: x86 %move-vector-mask ( dst src rep -- )
%move-vector-mask* drop ;
M: x86 %move-vector-mask-reps
{
{ sse? { float-4-rep } }
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
} available-reps ;
M:: x86 %test-vector ( dst src temp rep vcc -- ) M:: x86 %test-vector ( dst src temp rep vcc -- )
dst src rep %move-vector-mask :> mask dst src rep %move-vector-mask* :> mask
dst temp mask vcc %test-vector-mask ; dst temp mask vcc %test-vector-mask ;
:: %test-vector-mask-branch ( label temp mask vcc -- ) :: %test-vector-mask-branch ( label temp mask vcc -- )
@ -496,7 +505,7 @@ M:: x86 %test-vector ( dst src temp rep vcc -- )
} case ; } case ;
M:: x86 %test-vector-branch ( label src temp rep vcc -- ) M:: x86 %test-vector-branch ( label src temp rep vcc -- )
temp src rep %move-vector-mask :> mask temp src rep %move-vector-mask* :> mask
label temp mask vcc %test-vector-mask-branch ; label temp mask vcc %test-vector-mask-branch ;
M: x86 %test-vector-reps M: x86 %test-vector-reps

View File

@ -4,7 +4,8 @@ sequences.cords cpu.architecture fry generalizations grouping
kernel libc locals macros math math.libm math.order kernel libc locals macros math math.libm math.order
math.ranges math.vectors sequences sequences.generalizations math.ranges math.vectors sequences sequences.generalizations
sequences.private sequences.unrolled sequences.unrolled.private sequences.private sequences.unrolled sequences.unrolled.private
specialized-arrays vocabs words effects.parser locals.parser ; specialized-arrays vocabs words effects.parser locals.parser
math.bitwise ;
QUALIFIED-WITH: alien.c-types c QUALIFIED-WITH: alien.c-types c
SPECIALIZED-ARRAYS: SPECIALIZED-ARRAYS:
c:char c:short c:int c:longlong c:char c:short c:int c:longlong
@ -127,6 +128,8 @@ SYNTAX: SIMD-INTRINSIC::
! XXX ! XXX
: bitwise-components-reduce ( a rep quot -- x ) : bitwise-components-reduce ( a rep quot -- x )
[ >bitwise-vector-rep >rep-array [ ] ] dip map-reduce ; inline [ >bitwise-vector-rep >rep-array [ ] ] dip map-reduce ; inline
: bitwise-components-reduce* ( a rep identity quot -- x )
[ >bitwise-vector-rep >rep-array ] 2dip reduce ; inline
:: (vshuffle) ( a elts rep -- c ) :: (vshuffle) ( a elts rep -- c )
a rep >rep-array :> a' a rep >rep-array :> a'
@ -259,6 +262,7 @@ SIMD-INTRINSIC: (simd-vunordered?) ( a b rep -- c )
SIMD-INTRINSIC: (simd-vany?) ( a rep -- ? ) [ bitor ] bitwise-components-reduce zero? not ; SIMD-INTRINSIC: (simd-vany?) ( a rep -- ? ) [ bitor ] bitwise-components-reduce zero? not ;
SIMD-INTRINSIC: (simd-vall?) ( a rep -- ? ) [ bitand ] bitwise-components-reduce zero? not ; SIMD-INTRINSIC: (simd-vall?) ( a rep -- ? ) [ bitand ] bitwise-components-reduce zero? not ;
SIMD-INTRINSIC: (simd-vnone?) ( a rep -- ? ) [ bitor ] bitwise-components-reduce zero? ; SIMD-INTRINSIC: (simd-vnone?) ( a rep -- ? ) [ bitor ] bitwise-components-reduce zero? ;
SIMD-INTRINSIC: (simd-vgetmask) ( a rep -- n ) 0 [ [ 1 shift ] [ zero? 0 1 ? ] bi* bitor ] bitwise-components-reduce* ;
SIMD-INTRINSIC: (simd-v>float) ( a rep -- c ) SIMD-INTRINSIC: (simd-v>float) ( a rep -- c )
[ [ >rep-array ] [ rep-length ] bi [ >float ] ] [ [ >rep-array ] [ rep-length ] bi [ >float ] ]
[ >float-vector-rep <rep-array> ] bi unrolled-map-as-unsafe underlying>> ; [ >float-vector-rep <rep-array> ] bi unrolled-map-as-unsafe underlying>> ;

View File

@ -4,7 +4,7 @@ generic.parser kernel lexer literals locals macros math math.functions
math.vectors math.vectors.private math.vectors.simd.intrinsics math.vectors math.vectors.private math.vectors.simd.intrinsics
namespaces parser prettyprint.custom quotations sequences namespaces parser prettyprint.custom quotations sequences
sequences.generalizations sequences.private vocabs vocabs.loader sequences.generalizations sequences.private vocabs vocabs.loader
words ; words math.bitwise ;
QUALIFIED-WITH: alien.c-types c QUALIFIED-WITH: alien.c-types c
IN: math.vectors.simd IN: math.vectors.simd
@ -221,6 +221,8 @@ M: simd-128 vany?
dup simd-rep [ (simd-vany?) ] [ call-next-method ] v->x-op ; inline dup simd-rep [ (simd-vany?) ] [ call-next-method ] v->x-op ; inline
M: simd-128 vall? M: simd-128 vall?
dup simd-rep [ (simd-vall?) ] [ call-next-method ] v->x-op ; inline dup simd-rep [ (simd-vall?) ] [ call-next-method ] v->x-op ; inline
M: simd-128 vcount
dup simd-rep [ (simd-vgetmask) assert-positive ] [ call-next-method ] v->x-op bit-count ; inline
M: simd-128 vnone? M: simd-128 vnone?
dup simd-rep [ (simd-vnone?) ] [ call-next-method ] v->x-op ; inline dup simd-rep [ (simd-vnone?) ] [ call-next-method ] v->x-op ; inline

View File

@ -162,6 +162,9 @@ M: object vnot [ not ] map ; inline
GENERIC: vall? ( v -- ? ) GENERIC: vall? ( v -- ? )
M: object vall? [ ] all? ; inline M: object vall? [ ] all? ; inline
GENERIC: vcount ( v -- count )
M: object vcount [ ] count ; inline
GENERIC: vany? ( v -- ? ) GENERIC: vany? ( v -- ? )
M: object vany? [ ] any? ; inline M: object vany? [ ] any? ; inline