Merge branch 'master' of git://factorcode.org/git/factor
commit
e758d4061a
|
@ -280,6 +280,11 @@ def: dst
|
||||||
use: src
|
use: src
|
||||||
literal: shuffle rep ;
|
literal: shuffle rep ;
|
||||||
|
|
||||||
|
PURE-INSN: ##tail>head-vector
|
||||||
|
def: dst
|
||||||
|
use: src
|
||||||
|
literal: rep ;
|
||||||
|
|
||||||
PURE-INSN: ##merge-vector-head
|
PURE-INSN: ##merge-vector-head
|
||||||
def: dst
|
def: dst
|
||||||
use: src1 src2
|
use: src1 src2
|
||||||
|
@ -303,13 +308,11 @@ literal: rep ;
|
||||||
PURE-INSN: ##unpack-vector-head
|
PURE-INSN: ##unpack-vector-head
|
||||||
def: dst
|
def: dst
|
||||||
use: src
|
use: src
|
||||||
temp: temp
|
|
||||||
literal: rep ;
|
literal: rep ;
|
||||||
|
|
||||||
PURE-INSN: ##unpack-vector-tail
|
PURE-INSN: ##unpack-vector-tail
|
||||||
def: dst
|
def: dst
|
||||||
use: src
|
use: src
|
||||||
temp: temp
|
|
||||||
literal: rep ;
|
literal: rep ;
|
||||||
|
|
||||||
PURE-INSN: ##integer>float-vector
|
PURE-INSN: ##integer>float-vector
|
||||||
|
@ -325,7 +328,6 @@ literal: rep ;
|
||||||
PURE-INSN: ##compare-vector
|
PURE-INSN: ##compare-vector
|
||||||
def: dst
|
def: dst
|
||||||
use: src1 src2
|
use: src1 src2
|
||||||
temp: temp
|
|
||||||
literal: rep cc ;
|
literal: rep cc ;
|
||||||
|
|
||||||
PURE-INSN: ##test-vector
|
PURE-INSN: ##test-vector
|
||||||
|
@ -813,7 +815,6 @@ UNION: kill-vreg-insn
|
||||||
UNION: def-is-use-insn
|
UNION: def-is-use-insn
|
||||||
##box-alien
|
##box-alien
|
||||||
##box-displaced-alien
|
##box-displaced-alien
|
||||||
##compare-vector
|
|
||||||
##string-nth
|
##string-nth
|
||||||
##unbox-any-c-ptr ;
|
##unbox-any-c-ptr ;
|
||||||
|
|
||||||
|
|
|
@ -177,12 +177,12 @@ IN: compiler.cfg.intrinsics
|
||||||
{ math.vectors.simd.intrinsics:(simd-vor) [ [ ^^or-vector ] emit-binary-vector-op ] }
|
{ math.vectors.simd.intrinsics:(simd-vor) [ [ ^^or-vector ] emit-binary-vector-op ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-vxor) [ [ ^^xor-vector ] emit-binary-vector-op ] }
|
{ math.vectors.simd.intrinsics:(simd-vxor) [ [ ^^xor-vector ] emit-binary-vector-op ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-vnot) [ [ generate-not-vector ] emit-unary-vector-op ] }
|
{ math.vectors.simd.intrinsics:(simd-vnot) [ [ generate-not-vector ] emit-unary-vector-op ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-v<=) [ [ cc<= ^^compare-vector ] emit-binary-vector-op ] }
|
{ math.vectors.simd.intrinsics:(simd-v<=) [ [ cc<= generate-compare-vector ] emit-binary-vector-op ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-v<) [ [ cc< ^^compare-vector ] emit-binary-vector-op ] }
|
{ math.vectors.simd.intrinsics:(simd-v<) [ [ cc< generate-compare-vector ] emit-binary-vector-op ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-v=) [ [ cc= ^^compare-vector ] emit-binary-vector-op ] }
|
{ math.vectors.simd.intrinsics:(simd-v=) [ [ cc= generate-compare-vector ] emit-binary-vector-op ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-v>) [ [ cc> ^^compare-vector ] emit-binary-vector-op ] }
|
{ math.vectors.simd.intrinsics:(simd-v>) [ [ cc> generate-compare-vector ] emit-binary-vector-op ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-v>=) [ [ cc>= ^^compare-vector ] emit-binary-vector-op ] }
|
{ math.vectors.simd.intrinsics:(simd-v>=) [ [ cc>= generate-compare-vector ] emit-binary-vector-op ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-vunordered?) [ [ cc/<>= ^^compare-vector ] emit-binary-vector-op ] }
|
{ math.vectors.simd.intrinsics:(simd-vunordered?) [ [ cc/<>= generate-compare-vector ] emit-binary-vector-op ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-vany?) [ [ vcc-any ^^test-vector ] emit-unary-vector-op ] }
|
{ math.vectors.simd.intrinsics:(simd-vany?) [ [ vcc-any ^^test-vector ] emit-unary-vector-op ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-vall?) [ [ vcc-all ^^test-vector ] emit-unary-vector-op ] }
|
{ math.vectors.simd.intrinsics:(simd-vall?) [ [ vcc-all ^^test-vector ] emit-unary-vector-op ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-vnone?) [ [ vcc-none ^^test-vector ] emit-unary-vector-op ] }
|
{ math.vectors.simd.intrinsics:(simd-vnone?) [ [ vcc-none ^^test-vector ] emit-unary-vector-op ] }
|
||||||
|
@ -200,8 +200,8 @@ IN: compiler.cfg.intrinsics
|
||||||
{ math.vectors.simd.intrinsics:(simd-(v>integer)) [ [ ^^float>integer-vector ] emit-unary-vector-op ] }
|
{ math.vectors.simd.intrinsics:(simd-(v>integer)) [ [ ^^float>integer-vector ] emit-unary-vector-op ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-(vpack-signed)) [ [ ^^signed-pack-vector ] emit-binary-vector-op ] }
|
{ math.vectors.simd.intrinsics:(simd-(vpack-signed)) [ [ ^^signed-pack-vector ] emit-binary-vector-op ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-(vpack-unsigned)) [ [ ^^unsigned-pack-vector ] emit-binary-vector-op ] }
|
{ math.vectors.simd.intrinsics:(simd-(vpack-unsigned)) [ [ ^^unsigned-pack-vector ] emit-binary-vector-op ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-(vunpack-head)) [ [ ^^unpack-vector-head ] emit-unary-vector-op ] }
|
{ math.vectors.simd.intrinsics:(simd-(vunpack-head)) [ [ generate-unpack-vector-head ] emit-unary-vector-op ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-(vunpack-tail)) [ [ ^^unpack-vector-tail ] emit-unary-vector-op ] }
|
{ math.vectors.simd.intrinsics:(simd-(vunpack-tail)) [ [ generate-unpack-vector-tail ] emit-unary-vector-op ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-select) [ emit-select-vector ] }
|
{ math.vectors.simd.intrinsics:(simd-select) [ emit-select-vector ] }
|
||||||
{ math.vectors.simd.intrinsics:(simd-sum) [ [ ^^horizontal-add-vector ] emit-unary-vector-op ] }
|
{ math.vectors.simd.intrinsics:(simd-sum) [ [ ^^horizontal-add-vector ] emit-unary-vector-op ] }
|
||||||
{ math.vectors.simd.intrinsics:alien-vector [ emit-alien-vector ] }
|
{ math.vectors.simd.intrinsics:alien-vector [ emit-alien-vector ] }
|
||||||
|
|
|
@ -2,8 +2,9 @@
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: accessors byte-arrays fry cpu.architecture kernel math
|
USING: accessors byte-arrays fry cpu.architecture kernel math
|
||||||
sequences math.vectors.simd.intrinsics macros generalizations
|
sequences math.vectors.simd.intrinsics macros generalizations
|
||||||
combinators combinators.short-circuit arrays
|
combinators combinators.short-circuit arrays locals
|
||||||
compiler.tree.propagation.info compiler.cfg.builder.blocks
|
compiler.tree.propagation.info compiler.cfg.builder.blocks
|
||||||
|
compiler.cfg.comparisons
|
||||||
compiler.cfg.stacks compiler.cfg.stacks.local compiler.cfg.hats
|
compiler.cfg.stacks compiler.cfg.stacks.local compiler.cfg.hats
|
||||||
compiler.cfg.instructions compiler.cfg.registers
|
compiler.cfg.instructions compiler.cfg.registers
|
||||||
compiler.cfg.intrinsics.alien ;
|
compiler.cfg.intrinsics.alien ;
|
||||||
|
@ -121,3 +122,58 @@ MACRO: if-literals-match ( quots -- )
|
||||||
[ ^^not-vector ]
|
[ ^^not-vector ]
|
||||||
[ [ ^^fill-vector ] [ ^^xor-vector ] bi ] if ;
|
[ [ ^^fill-vector ] [ ^^xor-vector ] bi ] if ;
|
||||||
|
|
||||||
|
:: (generate-compare-vector) ( src1 src2 rep {cc,swap} -- dst )
|
||||||
|
{cc,swap} first2 :> swap? :> cc
|
||||||
|
swap?
|
||||||
|
[ src2 src1 rep cc ^^compare-vector ]
|
||||||
|
[ src1 src2 rep cc ^^compare-vector ] if ;
|
||||||
|
|
||||||
|
:: generate-compare-vector ( src1 src2 rep orig-cc -- dst )
|
||||||
|
rep orig-cc %compare-vector-ccs :> not? :> ccs
|
||||||
|
|
||||||
|
ccs empty?
|
||||||
|
[ rep not? [ ^^fill-vector ] [ ^^zero-vector ] if ]
|
||||||
|
[
|
||||||
|
ccs unclip :> first-cc :> rest-ccs
|
||||||
|
src1 src2 rep first-cc (generate-compare-vector) :> first-dst
|
||||||
|
|
||||||
|
rest-ccs first-dst
|
||||||
|
[ [ src1 src2 rep ] dip (generate-compare-vector) rep ^^or-vector ]
|
||||||
|
reduce
|
||||||
|
|
||||||
|
not? [ rep generate-not-vector ] when
|
||||||
|
] if ;
|
||||||
|
|
||||||
|
:: generate-unpack-vector-head ( src rep -- dst )
|
||||||
|
{
|
||||||
|
{
|
||||||
|
[ rep %unpack-vector-head-reps member? ]
|
||||||
|
[ src rep ^^unpack-vector-head ]
|
||||||
|
}
|
||||||
|
[
|
||||||
|
rep ^^zero-vector :> zero
|
||||||
|
zero src rep cc> ^^compare-vector :> sign
|
||||||
|
src sign rep ^^merge-vector-head
|
||||||
|
]
|
||||||
|
} cond ;
|
||||||
|
|
||||||
|
:: generate-unpack-vector-tail ( src rep -- dst )
|
||||||
|
{
|
||||||
|
{
|
||||||
|
[ rep %unpack-vector-tail-reps member? ]
|
||||||
|
[ src rep ^^unpack-vector-tail ]
|
||||||
|
}
|
||||||
|
{
|
||||||
|
[ rep %unpack-vector-head-reps member? ]
|
||||||
|
[
|
||||||
|
src rep ^^tail>head-vector :> tail
|
||||||
|
tail rep ^^unpack-vector-head
|
||||||
|
]
|
||||||
|
}
|
||||||
|
[
|
||||||
|
rep ^^zero-vector :> zero
|
||||||
|
zero src rep cc> ^^compare-vector :> sign
|
||||||
|
src sign rep ^^merge-vector-tail
|
||||||
|
]
|
||||||
|
} cond ;
|
||||||
|
|
||||||
|
|
|
@ -236,7 +236,7 @@ M: ##phi conversions-for-insn
|
||||||
: convert-to-fill-vector? ( insn -- ? )
|
: convert-to-fill-vector? ( insn -- ? )
|
||||||
{
|
{
|
||||||
[ dst>> rep-of vector-rep? ]
|
[ dst>> rep-of vector-rep? ]
|
||||||
[ obj>> B{ 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 } = ]
|
[ obj>> B{ 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 } = ]
|
||||||
} 1&& ;
|
} 1&& ;
|
||||||
|
|
||||||
: (convert-to-zero/fill-vector) ( insn -- dst rep )
|
: (convert-to-zero/fill-vector) ( insn -- dst rep )
|
||||||
|
|
|
@ -164,6 +164,7 @@ CODEGEN: ##fill-vector %fill-vector
|
||||||
CODEGEN: ##gather-vector-2 %gather-vector-2
|
CODEGEN: ##gather-vector-2 %gather-vector-2
|
||||||
CODEGEN: ##gather-vector-4 %gather-vector-4
|
CODEGEN: ##gather-vector-4 %gather-vector-4
|
||||||
CODEGEN: ##shuffle-vector %shuffle-vector
|
CODEGEN: ##shuffle-vector %shuffle-vector
|
||||||
|
CODEGEN: ##tail>head-vector %tail>head-vector
|
||||||
CODEGEN: ##merge-vector-head %merge-vector-head
|
CODEGEN: ##merge-vector-head %merge-vector-head
|
||||||
CODEGEN: ##merge-vector-tail %merge-vector-tail
|
CODEGEN: ##merge-vector-tail %merge-vector-tail
|
||||||
CODEGEN: ##signed-pack-vector %signed-pack-vector
|
CODEGEN: ##signed-pack-vector %signed-pack-vector
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
! Copyright (C) 2006, 2009 Slava Pestov.
|
! Copyright (C) 2006, 2009 Slava Pestov.
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
! See http://factorcode.org/license.txt for BSD license.
|
||||||
USING: accessors arrays generic kernel kernel.private math
|
USING: accessors arrays assocs generic kernel kernel.private
|
||||||
memory namespaces make sequences layouts system hashtables
|
math memory namespaces make sequences layouts system hashtables
|
||||||
classes alien byte-arrays combinators words sets fry ;
|
classes alien byte-arrays combinators words sets fry ;
|
||||||
IN: cpu.architecture
|
IN: cpu.architecture
|
||||||
|
|
||||||
|
@ -95,6 +95,18 @@ double-rep
|
||||||
vector-rep
|
vector-rep
|
||||||
scalar-rep ;
|
scalar-rep ;
|
||||||
|
|
||||||
|
: unsign-rep ( rep -- rep' )
|
||||||
|
{
|
||||||
|
{ uint-4-rep int-4-rep }
|
||||||
|
{ ulonglong-2-rep longlong-2-rep }
|
||||||
|
{ ushort-8-rep short-8-rep }
|
||||||
|
{ uchar-16-rep char-16-rep }
|
||||||
|
{ uchar-scalar-rep char-scalar-rep }
|
||||||
|
{ ushort-scalar-rep short-scalar-rep }
|
||||||
|
{ uint-scalar-rep int-scalar-rep }
|
||||||
|
{ ulonglong-scalar-rep longlong-scalar-rep }
|
||||||
|
} ?at drop ;
|
||||||
|
|
||||||
! Register classes
|
! Register classes
|
||||||
SINGLETONS: int-regs float-regs ;
|
SINGLETONS: int-regs float-regs ;
|
||||||
|
|
||||||
|
@ -230,15 +242,16 @@ HOOK: %fill-vector cpu ( dst rep -- )
|
||||||
HOOK: %gather-vector-2 cpu ( dst src1 src2 rep -- )
|
HOOK: %gather-vector-2 cpu ( dst src1 src2 rep -- )
|
||||||
HOOK: %gather-vector-4 cpu ( dst src1 src2 src3 src4 rep -- )
|
HOOK: %gather-vector-4 cpu ( dst src1 src2 src3 src4 rep -- )
|
||||||
HOOK: %shuffle-vector cpu ( dst src shuffle rep -- )
|
HOOK: %shuffle-vector cpu ( dst src shuffle rep -- )
|
||||||
|
HOOK: %tail>head-vector cpu ( dst src rep -- )
|
||||||
HOOK: %merge-vector-head cpu ( dst src1 src2 rep -- )
|
HOOK: %merge-vector-head cpu ( dst src1 src2 rep -- )
|
||||||
HOOK: %merge-vector-tail cpu ( dst src1 src2 rep -- )
|
HOOK: %merge-vector-tail cpu ( dst src1 src2 rep -- )
|
||||||
HOOK: %signed-pack-vector cpu ( dst src1 src2 rep -- )
|
HOOK: %signed-pack-vector cpu ( dst src1 src2 rep -- )
|
||||||
HOOK: %unsigned-pack-vector cpu ( dst src1 src2 rep -- )
|
HOOK: %unsigned-pack-vector cpu ( dst src1 src2 rep -- )
|
||||||
HOOK: %unpack-vector-head cpu ( dst src temp rep -- )
|
HOOK: %unpack-vector-head cpu ( dst src rep -- )
|
||||||
HOOK: %unpack-vector-tail cpu ( dst src temp rep -- )
|
HOOK: %unpack-vector-tail cpu ( dst src rep -- )
|
||||||
HOOK: %integer>float-vector cpu ( dst src rep -- )
|
HOOK: %integer>float-vector cpu ( dst src rep -- )
|
||||||
HOOK: %float>integer-vector cpu ( dst src rep -- )
|
HOOK: %float>integer-vector cpu ( dst src rep -- )
|
||||||
HOOK: %compare-vector cpu ( dst src1 src2 temp rep cc -- )
|
HOOK: %compare-vector cpu ( dst src1 src2 rep cc -- )
|
||||||
HOOK: %test-vector cpu ( dst src1 temp rep vcc -- )
|
HOOK: %test-vector cpu ( dst src1 temp rep vcc -- )
|
||||||
HOOK: %test-vector-branch cpu ( label src1 temp rep vcc -- )
|
HOOK: %test-vector-branch cpu ( label src1 temp rep vcc -- )
|
||||||
HOOK: %add-vector cpu ( dst src1 src2 rep -- )
|
HOOK: %add-vector cpu ( dst src1 src2 rep -- )
|
||||||
|
@ -279,10 +292,12 @@ HOOK: %shuffle-vector-reps cpu ( -- reps )
|
||||||
HOOK: %merge-vector-reps cpu ( -- reps )
|
HOOK: %merge-vector-reps cpu ( -- reps )
|
||||||
HOOK: %signed-pack-vector-reps cpu ( -- reps )
|
HOOK: %signed-pack-vector-reps cpu ( -- reps )
|
||||||
HOOK: %unsigned-pack-vector-reps cpu ( -- reps )
|
HOOK: %unsigned-pack-vector-reps cpu ( -- reps )
|
||||||
HOOK: %unpack-vector-reps cpu ( -- reps )
|
HOOK: %unpack-vector-head-reps cpu ( -- reps )
|
||||||
|
HOOK: %unpack-vector-tail-reps cpu ( -- reps )
|
||||||
HOOK: %integer>float-vector-reps cpu ( -- reps )
|
HOOK: %integer>float-vector-reps cpu ( -- reps )
|
||||||
HOOK: %float>integer-vector-reps cpu ( -- reps )
|
HOOK: %float>integer-vector-reps cpu ( -- reps )
|
||||||
HOOK: %compare-vector-reps cpu ( cc -- reps )
|
HOOK: %compare-vector-reps cpu ( cc -- reps )
|
||||||
|
HOOK: %compare-vector-ccs cpu ( rep cc -- {cc,swap?}s not? )
|
||||||
HOOK: %test-vector-reps cpu ( -- reps )
|
HOOK: %test-vector-reps cpu ( -- reps )
|
||||||
HOOK: %add-vector-reps cpu ( -- reps )
|
HOOK: %add-vector-reps cpu ( -- reps )
|
||||||
HOOK: %saturated-add-vector-reps cpu ( -- reps )
|
HOOK: %saturated-add-vector-reps cpu ( -- reps )
|
||||||
|
|
|
@ -588,14 +588,6 @@ M: x86 %fill-vector-reps
|
||||||
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
|
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
|
||||||
} available-reps ;
|
} available-reps ;
|
||||||
|
|
||||||
: unsign-rep ( rep -- rep' )
|
|
||||||
{
|
|
||||||
{ uint-4-rep int-4-rep }
|
|
||||||
{ ulonglong-2-rep longlong-2-rep }
|
|
||||||
{ ushort-8-rep short-8-rep }
|
|
||||||
{ uchar-16-rep char-16-rep }
|
|
||||||
} ?at drop ;
|
|
||||||
|
|
||||||
! M:: x86 %broadcast-vector ( dst src rep -- )
|
! M:: x86 %broadcast-vector ( dst src rep -- )
|
||||||
! rep unsign-rep {
|
! rep unsign-rep {
|
||||||
! { float-4-rep [
|
! { float-4-rep [
|
||||||
|
@ -774,45 +766,32 @@ M: x86 %unsigned-pack-vector-reps
|
||||||
{ sse4.1? { int-4-rep } }
|
{ sse4.1? { int-4-rep } }
|
||||||
} available-reps ;
|
} available-reps ;
|
||||||
|
|
||||||
:: %sign-extension-vector ( dst src rep -- )
|
M: x86 %tail>head-vector ( dst src rep -- )
|
||||||
dst rep %zero-vector
|
dup {
|
||||||
dst src rep {
|
{ float-4-rep [ drop MOVHLPS ] }
|
||||||
{ char-16-rep [ PCMPGTB ] }
|
{ double-2-rep [ [ %copy ] [ drop UNPCKHPD ] 3bi ] }
|
||||||
{ short-8-rep [ PCMPGTW ] }
|
[ drop [ %copy ] [ drop PUNPCKHQDQ ] 3bi ]
|
||||||
{ int-4-rep [ PCMPGTD ] }
|
|
||||||
{ longlong-2-rep [ PCMPGTQ ] }
|
|
||||||
} case ;
|
} case ;
|
||||||
|
|
||||||
:: (%unpack-vector-signs) ( dst src rep -- )
|
M: x86 %unpack-vector-head ( dst src rep -- )
|
||||||
dst rep signed-int-vector-rep?
|
|
||||||
[ src rep %sign-extension-vector ]
|
|
||||||
[ rep %zero-vector ] if ;
|
|
||||||
|
|
||||||
M:: x86 %unpack-vector-head ( dst src temp rep -- )
|
|
||||||
temp src rep (%unpack-vector-signs)
|
|
||||||
dst src rep %copy
|
|
||||||
dst temp rep unsign-rep {
|
|
||||||
{ char-16-rep [ PUNPCKLBW ] }
|
|
||||||
{ short-8-rep [ PUNPCKLWD ] }
|
|
||||||
{ int-4-rep [ PUNPCKLDQ ] }
|
|
||||||
{ longlong-2-rep [ PUNPCKLQDQ ] }
|
|
||||||
} case ;
|
|
||||||
|
|
||||||
M:: x86 %unpack-vector-tail ( dst src temp rep -- )
|
|
||||||
temp src rep (%unpack-vector-signs)
|
|
||||||
dst src rep %copy
|
|
||||||
dst temp rep unsign-rep {
|
|
||||||
{ char-16-rep [ PUNPCKHBW ] }
|
|
||||||
{ short-8-rep [ PUNPCKHWD ] }
|
|
||||||
{ int-4-rep [ PUNPCKHDQ ] }
|
|
||||||
{ longlong-2-rep [ PUNPCKHQDQ ] }
|
|
||||||
} case ;
|
|
||||||
|
|
||||||
M: x86 %unpack-vector-reps ( -- reps )
|
|
||||||
{
|
{
|
||||||
{ sse2? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
|
{ char-16-rep [ PMOVSXBW ] }
|
||||||
|
{ uchar-16-rep [ PMOVZXBW ] }
|
||||||
|
{ short-8-rep [ PMOVSXWD ] }
|
||||||
|
{ ushort-8-rep [ PMOVZXWD ] }
|
||||||
|
{ int-4-rep [ PMOVSXDQ ] }
|
||||||
|
{ uint-4-rep [ PMOVZXDQ ] }
|
||||||
|
{ float-4-rep [ CVTPS2PD ] }
|
||||||
|
} case ;
|
||||||
|
|
||||||
|
M: x86 %unpack-vector-head-reps ( -- reps )
|
||||||
|
{
|
||||||
|
{ sse2? { float-4-rep } }
|
||||||
|
{ sse4.1? { char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
|
||||||
} available-reps ;
|
} available-reps ;
|
||||||
|
|
||||||
|
M: x86 %unpack-vector-tail-reps ( -- reps ) { } ;
|
||||||
|
|
||||||
M: x86 %integer>float-vector ( dst src rep -- )
|
M: x86 %integer>float-vector ( dst src rep -- )
|
||||||
{
|
{
|
||||||
{ int-4-rep [ CVTDQ2PS ] }
|
{ int-4-rep [ CVTDQ2PS ] }
|
||||||
|
@ -833,14 +812,10 @@ M: x86 %float>integer-vector-reps
|
||||||
{ sse2? { float-4-rep } }
|
{ sse2? { float-4-rep } }
|
||||||
} available-reps ;
|
} available-reps ;
|
||||||
|
|
||||||
:: compare-float-v-operands ( dst src1 src2 temp rep cc -- dst' src' rep cc' )
|
|
||||||
cc { cc> cc>= cc/> cc/>= } member?
|
|
||||||
[ dst src2 src1 rep two-operand rep cc swap-cc ]
|
|
||||||
[ dst src1 src2 rep two-operand rep cc ] if ;
|
|
||||||
: (%compare-float-vector) ( dst src rep double single -- )
|
: (%compare-float-vector) ( dst src rep double single -- )
|
||||||
[ double-2-rep eq? ] 2dip if ; inline
|
[ double-2-rep eq? ] 2dip if ; inline
|
||||||
: %compare-float-vector ( dst src1 src2 temp rep cc -- )
|
: %compare-float-vector ( dst src rep cc -- )
|
||||||
compare-float-v-operands {
|
{
|
||||||
{ cc< [ [ CMPLTPD ] [ CMPLTPS ] (%compare-float-vector) ] }
|
{ cc< [ [ CMPLTPD ] [ CMPLTPS ] (%compare-float-vector) ] }
|
||||||
{ cc<= [ [ CMPLEPD ] [ CMPLEPS ] (%compare-float-vector) ] }
|
{ cc<= [ [ CMPLEPD ] [ CMPLEPS ] (%compare-float-vector) ] }
|
||||||
{ cc= [ [ CMPEQPD ] [ CMPEQPS ] (%compare-float-vector) ] }
|
{ cc= [ [ CMPEQPD ] [ CMPEQPS ] (%compare-float-vector) ] }
|
||||||
|
@ -851,16 +826,6 @@ M: x86 %float>integer-vector-reps
|
||||||
{ cc/<>= [ [ CMPUNORDPD ] [ CMPUNORDPS ] (%compare-float-vector) ] }
|
{ cc/<>= [ [ CMPUNORDPD ] [ CMPUNORDPS ] (%compare-float-vector) ] }
|
||||||
} case ;
|
} case ;
|
||||||
|
|
||||||
:: compare-int-v-operands ( dst src1 src2 temp rep cc -- not-dst/f cmp-dst src' rep cc' )
|
|
||||||
cc order-cc :> occ
|
|
||||||
occ {
|
|
||||||
{ cc= [ f dst src1 src2 rep two-operand rep cc= ] }
|
|
||||||
{ cc/= [ dst temp src1 src2 rep two-operand rep cc= ] }
|
|
||||||
{ cc<= [ dst temp src1 src2 rep two-operand rep cc> ] }
|
|
||||||
{ cc< [ f dst src2 src1 rep two-operand rep cc> ] }
|
|
||||||
{ cc> [ f dst src1 src2 rep two-operand rep cc> ] }
|
|
||||||
{ cc>= [ dst temp src2 src1 rep two-operand rep cc> ] }
|
|
||||||
} case ;
|
|
||||||
:: (%compare-int-vector) ( dst src rep int64 int32 int16 int8 -- )
|
:: (%compare-int-vector) ( dst src rep int64 int32 int16 int8 -- )
|
||||||
rep unsign-rep :> rep'
|
rep unsign-rep :> rep'
|
||||||
dst src rep' {
|
dst src rep' {
|
||||||
|
@ -869,18 +834,14 @@ M: x86 %float>integer-vector-reps
|
||||||
{ short-8-rep [ int16 call ] }
|
{ short-8-rep [ int16 call ] }
|
||||||
{ char-16-rep [ int8 call ] }
|
{ char-16-rep [ int8 call ] }
|
||||||
} case ; inline
|
} case ; inline
|
||||||
:: (%not-vector) ( dst src rep -- )
|
: %compare-int-vector ( dst src rep cc -- )
|
||||||
dst rep %fill-vector
|
{
|
||||||
dst dst src rep %xor-vector ;
|
|
||||||
:: %compare-int-vector ( dst src1 src2 temp rep cc -- )
|
|
||||||
dst src1 src2 temp rep cc compare-int-v-operands :> cc' :> rep :> src' :> cmp-dst :> not-dst
|
|
||||||
cmp-dst src' rep cc' {
|
|
||||||
{ cc= [ [ PCMPEQQ ] [ PCMPEQD ] [ PCMPEQW ] [ PCMPEQB ] (%compare-int-vector) ] }
|
{ cc= [ [ PCMPEQQ ] [ PCMPEQD ] [ PCMPEQW ] [ PCMPEQB ] (%compare-int-vector) ] }
|
||||||
{ cc> [ [ PCMPGTQ ] [ PCMPGTD ] [ PCMPGTW ] [ PCMPGTB ] (%compare-int-vector) ] }
|
{ cc> [ [ PCMPGTQ ] [ PCMPGTD ] [ PCMPGTW ] [ PCMPGTB ] (%compare-int-vector) ] }
|
||||||
} case
|
} case ;
|
||||||
not-dst [ cmp-dst rep (%not-vector) ] when* ;
|
|
||||||
|
|
||||||
M: x86 %compare-vector ( dst src1 src2 temp rep cc -- )
|
M: x86 %compare-vector ( dst src1 src2 rep cc -- )
|
||||||
|
[ [ two-operand ] keep ] dip
|
||||||
over float-vector-rep?
|
over float-vector-rep?
|
||||||
[ %compare-float-vector ]
|
[ %compare-float-vector ]
|
||||||
[ %compare-int-vector ] if ;
|
[ %compare-int-vector ] if ;
|
||||||
|
@ -891,11 +852,6 @@ M: x86 %compare-vector ( dst src1 src2 temp rep cc -- )
|
||||||
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
|
{ sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
|
||||||
{ sse4.1? { longlong-2-rep ulonglong-2-rep } }
|
{ sse4.1? { longlong-2-rep ulonglong-2-rep } }
|
||||||
} available-reps ;
|
} available-reps ;
|
||||||
: %compare-vector-unord-reps ( -- reps )
|
|
||||||
{
|
|
||||||
{ sse? { float-4-rep } }
|
|
||||||
{ sse2? { double-2-rep } }
|
|
||||||
} available-reps ;
|
|
||||||
: %compare-vector-ord-reps ( -- reps )
|
: %compare-vector-ord-reps ( -- reps )
|
||||||
{
|
{
|
||||||
{ sse? { float-4-rep } }
|
{ sse? { float-4-rep } }
|
||||||
|
@ -906,10 +862,44 @@ M: x86 %compare-vector ( dst src1 src2 temp rep cc -- )
|
||||||
M: x86 %compare-vector-reps
|
M: x86 %compare-vector-reps
|
||||||
{
|
{
|
||||||
{ [ dup { cc= cc/= } memq? ] [ drop %compare-vector-eq-reps ] }
|
{ [ dup { cc= cc/= } memq? ] [ drop %compare-vector-eq-reps ] }
|
||||||
{ [ dup { cc<>= cc/<>= } memq? ] [ drop %compare-vector-unord-reps ] }
|
|
||||||
[ drop %compare-vector-ord-reps ]
|
[ drop %compare-vector-ord-reps ]
|
||||||
} cond ;
|
} cond ;
|
||||||
|
|
||||||
|
: %compare-float-vector-ccs ( cc -- ccs not? )
|
||||||
|
{
|
||||||
|
{ cc< [ { { cc< f } } f ] }
|
||||||
|
{ cc<= [ { { cc<= f } } f ] }
|
||||||
|
{ cc> [ { { cc< t } } f ] }
|
||||||
|
{ cc>= [ { { cc<= t } } f ] }
|
||||||
|
{ cc= [ { { cc= f } } f ] }
|
||||||
|
{ cc<> [ { { cc< f } { cc< t } } f ] }
|
||||||
|
{ cc<>= [ { { cc<>= f } } f ] }
|
||||||
|
{ cc/< [ { { cc/< f } } f ] }
|
||||||
|
{ cc/<= [ { { cc/<= f } } f ] }
|
||||||
|
{ cc/> [ { { cc/< t } } f ] }
|
||||||
|
{ cc/>= [ { { cc/<= t } } f ] }
|
||||||
|
{ cc/= [ { { cc/= f } } f ] }
|
||||||
|
{ cc/<> [ { { cc/= f } { cc/<>= f } } f ] }
|
||||||
|
{ cc/<>= [ { { cc/<>= f } } f ] }
|
||||||
|
} case ;
|
||||||
|
|
||||||
|
: %compare-int-vector-ccs ( cc -- ccs not? )
|
||||||
|
order-cc {
|
||||||
|
{ cc< [ { { cc> t } } f ] }
|
||||||
|
{ cc<= [ { { cc> f } } t ] }
|
||||||
|
{ cc> [ { { cc> f } } f ] }
|
||||||
|
{ cc>= [ { { cc> t } } t ] }
|
||||||
|
{ cc= [ { { cc= f } } f ] }
|
||||||
|
{ cc/= [ { { cc= f } } t ] }
|
||||||
|
{ t [ { } t ] }
|
||||||
|
{ f [ { } f ] }
|
||||||
|
} case ;
|
||||||
|
|
||||||
|
M: x86 %compare-vector-ccs
|
||||||
|
swap float-vector-rep?
|
||||||
|
[ %compare-float-vector-ccs ]
|
||||||
|
[ %compare-int-vector-ccs ] if ;
|
||||||
|
|
||||||
:: %test-vector-mask ( dst temp mask vcc -- )
|
:: %test-vector-mask ( dst temp mask vcc -- )
|
||||||
vcc {
|
vcc {
|
||||||
{ vcc-any [ dst dst TEST dst temp \ CMOVNE %boolean ] }
|
{ vcc-any [ dst dst TEST dst temp \ CMOVNE %boolean ] }
|
||||||
|
|
|
@ -4,7 +4,7 @@ USING: alien alien.c-types alien.data assocs combinators
|
||||||
cpu.architecture compiler.cfg.comparisons fry generalizations
|
cpu.architecture compiler.cfg.comparisons fry generalizations
|
||||||
kernel libc macros math
|
kernel libc macros math
|
||||||
math.vectors.conversion.backend
|
math.vectors.conversion.backend
|
||||||
sequences effects accessors namespaces
|
sequences sets effects accessors namespaces
|
||||||
lexer parser vocabs.parser words arrays math.vectors ;
|
lexer parser vocabs.parser words arrays math.vectors ;
|
||||||
IN: math.vectors.simd.intrinsics
|
IN: math.vectors.simd.intrinsics
|
||||||
|
|
||||||
|
@ -137,6 +137,10 @@ MACRO: (simd-boa) ( rep -- quot )
|
||||||
|
|
||||||
GENERIC# supported-simd-op? 1 ( rep intrinsic -- ? )
|
GENERIC# supported-simd-op? 1 ( rep intrinsic -- ? )
|
||||||
|
|
||||||
|
: (%unpack-reps) ( -- reps )
|
||||||
|
%merge-vector-reps [ int-vector-rep? ] filter
|
||||||
|
%unpack-vector-head-reps union ;
|
||||||
|
|
||||||
M: vector-rep supported-simd-op?
|
M: vector-rep supported-simd-op?
|
||||||
{
|
{
|
||||||
{ \ (simd-v+) [ %add-vector-reps ] }
|
{ \ (simd-v+) [ %add-vector-reps ] }
|
||||||
|
@ -174,8 +178,8 @@ M: vector-rep supported-simd-op?
|
||||||
{ \ (simd-(v>integer)) [ %float>integer-vector-reps ] }
|
{ \ (simd-(v>integer)) [ %float>integer-vector-reps ] }
|
||||||
{ \ (simd-(vpack-signed)) [ %signed-pack-vector-reps ] }
|
{ \ (simd-(vpack-signed)) [ %signed-pack-vector-reps ] }
|
||||||
{ \ (simd-(vpack-unsigned)) [ %unsigned-pack-vector-reps ] }
|
{ \ (simd-(vpack-unsigned)) [ %unsigned-pack-vector-reps ] }
|
||||||
{ \ (simd-(vunpack-head)) [ %unpack-vector-reps ] }
|
{ \ (simd-(vunpack-head)) [ (%unpack-reps) ] }
|
||||||
{ \ (simd-(vunpack-tail)) [ %unpack-vector-reps ] }
|
{ \ (simd-(vunpack-tail)) [ (%unpack-reps) ] }
|
||||||
{ \ (simd-v<=) [ cc<= %compare-vector-reps ] }
|
{ \ (simd-v<=) [ cc<= %compare-vector-reps ] }
|
||||||
{ \ (simd-v<) [ cc< %compare-vector-reps ] }
|
{ \ (simd-v<) [ cc< %compare-vector-reps ] }
|
||||||
{ \ (simd-v=) [ cc= %compare-vector-reps ] }
|
{ \ (simd-v=) [ cc= %compare-vector-reps ] }
|
||||||
|
|
Loading…
Reference in New Issue