compiler: add intrinsic for PMOVMSKB/MOVMSKP[SD]
Combined with a fast bit-count this will let us rice byte-counting.db4
parent
d1786adfe4
commit
d79b462f75
|
@ -380,6 +380,11 @@ def: dst
|
||||||
use: src1 src2
|
use: src1 src2
|
||||||
literal: rep cc ;
|
literal: rep cc ;
|
||||||
|
|
||||||
|
FOLDABLE-INSN: ##move-vector-mask
|
||||||
|
def: dst/int-rep
|
||||||
|
use: src
|
||||||
|
literal: rep ;
|
||||||
|
|
||||||
FOLDABLE-INSN: ##test-vector
|
FOLDABLE-INSN: ##test-vector
|
||||||
def: dst/tagged-rep
|
def: dst/tagged-rep
|
||||||
use: src1
|
use: src1
|
||||||
|
|
|
@ -37,6 +37,7 @@ M: ##tail>head-vector insn-available? rep>> %unpack-vector-head-reps member? ;
|
||||||
M: ##integer>float-vector insn-available? rep>> %integer>float-vector-reps member? ;
|
M: ##integer>float-vector insn-available? rep>> %integer>float-vector-reps member? ;
|
||||||
M: ##float>integer-vector insn-available? rep>> %float>integer-vector-reps member? ;
|
M: ##float>integer-vector insn-available? rep>> %float>integer-vector-reps member? ;
|
||||||
M: ##compare-vector insn-available? [ rep>> ] [ cc>> ] bi %compare-vector-reps member? ;
|
M: ##compare-vector insn-available? [ rep>> ] [ cc>> ] bi %compare-vector-reps member? ;
|
||||||
|
M: ##move-vector-mask insn-available? rep>> %move-vector-mask-reps member? ;
|
||||||
M: ##test-vector insn-available? rep>> %test-vector-reps member? ;
|
M: ##test-vector insn-available? rep>> %test-vector-reps member? ;
|
||||||
M: ##add-vector insn-available? rep>> %add-vector-reps member? ;
|
M: ##add-vector insn-available? rep>> %add-vector-reps member? ;
|
||||||
M: ##saturated-add-vector insn-available? rep>> %saturated-add-vector-reps member? ;
|
M: ##saturated-add-vector insn-available? rep>> %saturated-add-vector-reps member? ;
|
||||||
|
|
|
@ -109,6 +109,7 @@ M: simple-ops-cpu %or-vector-reps all-reps ;
|
||||||
M: simple-ops-cpu %xor-vector-reps all-reps ;
|
M: simple-ops-cpu %xor-vector-reps all-reps ;
|
||||||
M: simple-ops-cpu %merge-vector-reps all-reps ;
|
M: simple-ops-cpu %merge-vector-reps all-reps ;
|
||||||
M: simple-ops-cpu %sqrt-vector-reps all-reps ;
|
M: simple-ops-cpu %sqrt-vector-reps all-reps ;
|
||||||
|
M: simple-ops-cpu %move-vector-mask-reps all-reps ;
|
||||||
M: simple-ops-cpu %test-vector-reps all-reps ;
|
M: simple-ops-cpu %test-vector-reps all-reps ;
|
||||||
M: simple-ops-cpu %signed-pack-vector-reps all-reps ;
|
M: simple-ops-cpu %signed-pack-vector-reps all-reps ;
|
||||||
M: simple-ops-cpu %unsigned-pack-vector-reps all-reps ;
|
M: simple-ops-cpu %unsigned-pack-vector-reps all-reps ;
|
||||||
|
|
|
@ -555,6 +555,10 @@ PREDICATE: fixnum-vector-rep < int-vector-rep
|
||||||
{
|
{
|
||||||
[ vcc-none ^^test-vector ]
|
[ vcc-none ^^test-vector ]
|
||||||
} emit-v-vector-op ;
|
} emit-v-vector-op ;
|
||||||
|
: emit-simd-vgetmask ( node -- )
|
||||||
|
{
|
||||||
|
[ ^^move-vector-mask ]
|
||||||
|
} emit-v-vector-op ;
|
||||||
|
|
||||||
: emit-simd-v>float ( node -- )
|
: emit-simd-v>float ( node -- )
|
||||||
{
|
{
|
||||||
|
@ -700,6 +704,7 @@ PREDICATE: fixnum-vector-rep < int-vector-rep
|
||||||
{ alien-vector [ emit-alien-vector ] }
|
{ alien-vector [ emit-alien-vector ] }
|
||||||
{ set-alien-vector [ emit-set-alien-vector ] }
|
{ set-alien-vector [ emit-set-alien-vector ] }
|
||||||
{ assert-positive [ drop ] }
|
{ assert-positive [ drop ] }
|
||||||
|
{ (simd-vgetmask) [ emit-simd-vgetmask ] }
|
||||||
} enable-intrinsics ;
|
} enable-intrinsics ;
|
||||||
|
|
||||||
enable-simd
|
enable-simd
|
||||||
|
|
|
@ -223,6 +223,7 @@ CODEGEN: ##unpack-vector-tail %unpack-vector-tail
|
||||||
CODEGEN: ##integer>float-vector %integer>float-vector
|
CODEGEN: ##integer>float-vector %integer>float-vector
|
||||||
CODEGEN: ##float>integer-vector %float>integer-vector
|
CODEGEN: ##float>integer-vector %float>integer-vector
|
||||||
CODEGEN: ##compare-vector %compare-vector
|
CODEGEN: ##compare-vector %compare-vector
|
||||||
|
CODEGEN: ##move-vector-mask %move-vector-mask
|
||||||
CODEGEN: ##test-vector %test-vector
|
CODEGEN: ##test-vector %test-vector
|
||||||
CODEGEN: ##add-vector %add-vector
|
CODEGEN: ##add-vector %add-vector
|
||||||
CODEGEN: ##saturated-add-vector %saturated-add-vector
|
CODEGEN: ##saturated-add-vector %saturated-add-vector
|
||||||
|
|
|
@ -69,6 +69,7 @@ CONSTANT: vector-other-intrinsics
|
||||||
(simd-vany?)
|
(simd-vany?)
|
||||||
(simd-vall?)
|
(simd-vall?)
|
||||||
(simd-vnone?)
|
(simd-vnone?)
|
||||||
|
(simd-vgetmask)
|
||||||
(simd-select)
|
(simd-select)
|
||||||
set-alien-vector
|
set-alien-vector
|
||||||
}
|
}
|
||||||
|
@ -104,6 +105,8 @@ vector>vector-intrinsics [ { byte-array } "default-output-classes" set-word-prop
|
||||||
real [0,inf] <class/interval-info> value-info-intersect
|
real [0,inf] <class/interval-info> value-info-intersect
|
||||||
] "outputs" set-word-prop
|
] "outputs" set-word-prop
|
||||||
|
|
||||||
|
\ (simd-vgetmask) { fixnum } "default-output-classes" set-word-prop
|
||||||
|
|
||||||
: clone-with-value-infos ( node -- node' )
|
: clone-with-value-infos ( node -- node' )
|
||||||
clone dup in-d>> extract-value-info >>info ;
|
clone dup in-d>> extract-value-info >>info ;
|
||||||
|
|
||||||
|
|
|
@ -322,6 +322,7 @@ HOOK: %unpack-vector-tail cpu ( dst src rep -- )
|
||||||
HOOK: %integer>float-vector cpu ( dst src rep -- )
|
HOOK: %integer>float-vector cpu ( dst src rep -- )
|
||||||
HOOK: %float>integer-vector cpu ( dst src rep -- )
|
HOOK: %float>integer-vector cpu ( dst src rep -- )
|
||||||
HOOK: %compare-vector cpu ( dst src1 src2 rep cc -- )
|
HOOK: %compare-vector cpu ( dst src1 src2 rep cc -- )
|
||||||
|
HOOK: %move-vector-mask cpu ( dst src rep -- )
|
||||||
HOOK: %test-vector cpu ( dst src1 temp rep vcc -- )
|
HOOK: %test-vector cpu ( dst src1 temp rep vcc -- )
|
||||||
HOOK: %test-vector-branch cpu ( label src1 temp rep vcc -- )
|
HOOK: %test-vector-branch cpu ( label src1 temp rep vcc -- )
|
||||||
HOOK: %add-vector cpu ( dst src1 src2 rep -- )
|
HOOK: %add-vector cpu ( dst src1 src2 rep -- )
|
||||||
|
@ -381,6 +382,7 @@ HOOK: %integer>float-vector-reps cpu ( -- reps )
|
||||||
HOOK: %float>integer-vector-reps cpu ( -- reps )
|
HOOK: %float>integer-vector-reps cpu ( -- reps )
|
||||||
HOOK: %compare-vector-reps cpu ( cc -- reps )
|
HOOK: %compare-vector-reps cpu ( cc -- reps )
|
||||||
HOOK: %compare-vector-ccs cpu ( rep cc -- {cc,swap?}s not? )
|
HOOK: %compare-vector-ccs cpu ( rep cc -- {cc,swap?}s not? )
|
||||||
|
HOOK: %move-vector-mask-reps cpu ( -- reps )
|
||||||
HOOK: %test-vector-reps cpu ( -- reps )
|
HOOK: %test-vector-reps cpu ( -- reps )
|
||||||
HOOK: %add-vector-reps cpu ( -- reps )
|
HOOK: %add-vector-reps cpu ( -- reps )
|
||||||
HOOK: %saturated-add-vector-reps cpu ( -- reps )
|
HOOK: %saturated-add-vector-reps cpu ( -- reps )
|
||||||
|
|
|
@ -476,15 +476,24 @@ M: x86 %compare-vector-ccs
|
||||||
{ vcc-notall [ dst mask CMP dst temp \ CMOVNE (%boolean) ] }
|
{ vcc-notall [ dst mask CMP dst temp \ CMOVNE (%boolean) ] }
|
||||||
} case ;
|
} case ;
|
||||||
|
|
||||||
: %move-vector-mask ( dst src rep -- mask )
|
: %move-vector-mask* ( dst src rep -- mask )
|
||||||
{
|
{
|
||||||
{ double-2-rep [ MOVMSKPS HEX: f ] }
|
{ double-2-rep [ MOVMSKPS HEX: f ] }
|
||||||
{ float-4-rep [ MOVMSKPS HEX: f ] }
|
{ float-4-rep [ MOVMSKPS HEX: f ] }
|
||||||
[ drop PMOVMSKB HEX: ffff ]
|
[ drop PMOVMSKB HEX: ffff ]
|
||||||
} case ;
|
} case ;
|
||||||
|
|
||||||
|
M: x86 %move-vector-mask ( dst src rep -- )
|
||||||
|
%move-vector-mask* drop ;
|
||||||
|
|
||||||
|
M: x86 %move-vector-mask-reps
|
||||||
|
{
|
||||||
|
{ sse? { float-4-rep } }
|
||||||
|
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
|
||||||
|
} available-reps ;
|
||||||
|
|
||||||
M:: x86 %test-vector ( dst src temp rep vcc -- )
|
M:: x86 %test-vector ( dst src temp rep vcc -- )
|
||||||
dst src rep %move-vector-mask :> mask
|
dst src rep %move-vector-mask* :> mask
|
||||||
dst temp mask vcc %test-vector-mask ;
|
dst temp mask vcc %test-vector-mask ;
|
||||||
|
|
||||||
:: %test-vector-mask-branch ( label temp mask vcc -- )
|
:: %test-vector-mask-branch ( label temp mask vcc -- )
|
||||||
|
@ -496,7 +505,7 @@ M:: x86 %test-vector ( dst src temp rep vcc -- )
|
||||||
} case ;
|
} case ;
|
||||||
|
|
||||||
M:: x86 %test-vector-branch ( label src temp rep vcc -- )
|
M:: x86 %test-vector-branch ( label src temp rep vcc -- )
|
||||||
temp src rep %move-vector-mask :> mask
|
temp src rep %move-vector-mask* :> mask
|
||||||
label temp mask vcc %test-vector-mask-branch ;
|
label temp mask vcc %test-vector-mask-branch ;
|
||||||
|
|
||||||
M: x86 %test-vector-reps
|
M: x86 %test-vector-reps
|
||||||
|
|
|
@ -4,7 +4,8 @@ sequences.cords cpu.architecture fry generalizations grouping
|
||||||
kernel libc locals macros math math.libm math.order
|
kernel libc locals macros math math.libm math.order
|
||||||
math.ranges math.vectors sequences sequences.generalizations
|
math.ranges math.vectors sequences sequences.generalizations
|
||||||
sequences.private sequences.unrolled sequences.unrolled.private
|
sequences.private sequences.unrolled sequences.unrolled.private
|
||||||
specialized-arrays vocabs words effects.parser locals.parser ;
|
specialized-arrays vocabs words effects.parser locals.parser
|
||||||
|
math.bitwise ;
|
||||||
QUALIFIED-WITH: alien.c-types c
|
QUALIFIED-WITH: alien.c-types c
|
||||||
SPECIALIZED-ARRAYS:
|
SPECIALIZED-ARRAYS:
|
||||||
c:char c:short c:int c:longlong
|
c:char c:short c:int c:longlong
|
||||||
|
@ -127,6 +128,8 @@ SYNTAX: SIMD-INTRINSIC::
|
||||||
! XXX
|
! XXX
|
||||||
: bitwise-components-reduce ( a rep quot -- x )
|
: bitwise-components-reduce ( a rep quot -- x )
|
||||||
[ >bitwise-vector-rep >rep-array [ ] ] dip map-reduce ; inline
|
[ >bitwise-vector-rep >rep-array [ ] ] dip map-reduce ; inline
|
||||||
|
: bitwise-components-reduce* ( a rep identity quot -- x )
|
||||||
|
[ >bitwise-vector-rep >rep-array ] 2dip reduce ; inline
|
||||||
|
|
||||||
:: (vshuffle) ( a elts rep -- c )
|
:: (vshuffle) ( a elts rep -- c )
|
||||||
a rep >rep-array :> a'
|
a rep >rep-array :> a'
|
||||||
|
@ -259,6 +262,7 @@ SIMD-INTRINSIC: (simd-vunordered?) ( a b rep -- c )
|
||||||
SIMD-INTRINSIC: (simd-vany?) ( a rep -- ? ) [ bitor ] bitwise-components-reduce zero? not ;
|
SIMD-INTRINSIC: (simd-vany?) ( a rep -- ? ) [ bitor ] bitwise-components-reduce zero? not ;
|
||||||
SIMD-INTRINSIC: (simd-vall?) ( a rep -- ? ) [ bitand ] bitwise-components-reduce zero? not ;
|
SIMD-INTRINSIC: (simd-vall?) ( a rep -- ? ) [ bitand ] bitwise-components-reduce zero? not ;
|
||||||
SIMD-INTRINSIC: (simd-vnone?) ( a rep -- ? ) [ bitor ] bitwise-components-reduce zero? ;
|
SIMD-INTRINSIC: (simd-vnone?) ( a rep -- ? ) [ bitor ] bitwise-components-reduce zero? ;
|
||||||
|
SIMD-INTRINSIC: (simd-vgetmask) ( a rep -- n ) 0 [ [ 1 shift ] [ zero? 0 1 ? ] bi* bitor ] bitwise-components-reduce* ;
|
||||||
SIMD-INTRINSIC: (simd-v>float) ( a rep -- c )
|
SIMD-INTRINSIC: (simd-v>float) ( a rep -- c )
|
||||||
[ [ >rep-array ] [ rep-length ] bi [ >float ] ]
|
[ [ >rep-array ] [ rep-length ] bi [ >float ] ]
|
||||||
[ >float-vector-rep <rep-array> ] bi unrolled-map-as-unsafe underlying>> ;
|
[ >float-vector-rep <rep-array> ] bi unrolled-map-as-unsafe underlying>> ;
|
||||||
|
|
|
@ -4,7 +4,7 @@ generic.parser kernel lexer literals locals macros math math.functions
|
||||||
math.vectors math.vectors.private math.vectors.simd.intrinsics
|
math.vectors math.vectors.private math.vectors.simd.intrinsics
|
||||||
namespaces parser prettyprint.custom quotations sequences
|
namespaces parser prettyprint.custom quotations sequences
|
||||||
sequences.generalizations sequences.private vocabs vocabs.loader
|
sequences.generalizations sequences.private vocabs vocabs.loader
|
||||||
words ;
|
words math.bitwise ;
|
||||||
QUALIFIED-WITH: alien.c-types c
|
QUALIFIED-WITH: alien.c-types c
|
||||||
IN: math.vectors.simd
|
IN: math.vectors.simd
|
||||||
|
|
||||||
|
@ -221,6 +221,8 @@ M: simd-128 vany?
|
||||||
dup simd-rep [ (simd-vany?) ] [ call-next-method ] v->x-op ; inline
|
dup simd-rep [ (simd-vany?) ] [ call-next-method ] v->x-op ; inline
|
||||||
M: simd-128 vall?
|
M: simd-128 vall?
|
||||||
dup simd-rep [ (simd-vall?) ] [ call-next-method ] v->x-op ; inline
|
dup simd-rep [ (simd-vall?) ] [ call-next-method ] v->x-op ; inline
|
||||||
|
M: simd-128 vcount
|
||||||
|
dup simd-rep [ (simd-vgetmask) assert-positive ] [ call-next-method ] v->x-op bit-count ; inline
|
||||||
M: simd-128 vnone?
|
M: simd-128 vnone?
|
||||||
dup simd-rep [ (simd-vnone?) ] [ call-next-method ] v->x-op ; inline
|
dup simd-rep [ (simd-vnone?) ] [ call-next-method ] v->x-op ; inline
|
||||||
|
|
||||||
|
|
|
@ -162,6 +162,9 @@ M: object vnot [ not ] map ; inline
|
||||||
GENERIC: vall? ( v -- ? )
|
GENERIC: vall? ( v -- ? )
|
||||||
M: object vall? [ ] all? ; inline
|
M: object vall? [ ] all? ; inline
|
||||||
|
|
||||||
|
GENERIC: vcount ( v -- count )
|
||||||
|
M: object vcount [ ] count ; inline
|
||||||
|
|
||||||
GENERIC: vany? ( v -- ? )
|
GENERIC: vany? ( v -- ? )
|
||||||
M: object vany? [ ] any? ; inline
|
M: object vany? [ ] any? ; inline
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue