compiler.cfg.ssa.destruction: more aggressive coalescing work in progress
parent
77fa16c76f
commit
1e841e5086
|
@ -707,7 +707,10 @@ UNION: kill-vreg-insn
|
||||||
UNION: def-is-use-insn
|
UNION: def-is-use-insn
|
||||||
##integer>bignum
|
##integer>bignum
|
||||||
##bignum>integer
|
##bignum>integer
|
||||||
##unbox-any-c-ptr ;
|
##string-nth
|
||||||
|
##unbox-any-c-ptr
|
||||||
|
##unary-float-function
|
||||||
|
##binary-float-function ;
|
||||||
|
|
||||||
SYMBOL: vreg-insn
|
SYMBOL: vreg-insn
|
||||||
|
|
||||||
|
|
|
@ -12,7 +12,6 @@ compiler.cfg.copy-prop
|
||||||
compiler.cfg.dce
|
compiler.cfg.dce
|
||||||
compiler.cfg.write-barrier
|
compiler.cfg.write-barrier
|
||||||
compiler.cfg.representations
|
compiler.cfg.representations
|
||||||
compiler.cfg.two-operand
|
|
||||||
compiler.cfg.ssa.destruction
|
compiler.cfg.ssa.destruction
|
||||||
compiler.cfg.empty-blocks
|
compiler.cfg.empty-blocks
|
||||||
compiler.cfg.checker ;
|
compiler.cfg.checker ;
|
||||||
|
@ -37,7 +36,6 @@ SYMBOL: check-optimizer?
|
||||||
eliminate-dead-code
|
eliminate-dead-code
|
||||||
eliminate-write-barriers
|
eliminate-write-barriers
|
||||||
select-representations
|
select-representations
|
||||||
convert-two-operand
|
|
||||||
destruct-ssa
|
destruct-ssa
|
||||||
delete-empty-blocks
|
delete-empty-blocks
|
||||||
?check ;
|
?check ;
|
||||||
|
|
|
@ -6,6 +6,7 @@ sets vectors
|
||||||
compiler.cfg.rpo
|
compiler.cfg.rpo
|
||||||
compiler.cfg.def-use
|
compiler.cfg.def-use
|
||||||
compiler.cfg.renaming
|
compiler.cfg.renaming
|
||||||
|
compiler.cfg.registers
|
||||||
compiler.cfg.dominance
|
compiler.cfg.dominance
|
||||||
compiler.cfg.instructions
|
compiler.cfg.instructions
|
||||||
compiler.cfg.liveness.ssa
|
compiler.cfg.liveness.ssa
|
||||||
|
@ -60,15 +61,23 @@ SYMBOL: copies
|
||||||
|
|
||||||
GENERIC: prepare-insn ( insn -- )
|
GENERIC: prepare-insn ( insn -- )
|
||||||
|
|
||||||
|
: try-to-coalesce ( dst src -- ) 2array copies get push ;
|
||||||
|
|
||||||
|
M: insn prepare-insn
|
||||||
|
[ defs-vreg ] [ uses-vregs ] bi
|
||||||
|
2dup empty? not and [
|
||||||
|
first
|
||||||
|
2dup [ rep-of ] bi@ eq?
|
||||||
|
[ try-to-coalesce ] [ 2drop ] if
|
||||||
|
] [ 2drop ] if ;
|
||||||
|
|
||||||
M: ##copy prepare-insn
|
M: ##copy prepare-insn
|
||||||
[ dst>> ] [ src>> ] bi 2array copies get push ;
|
[ dst>> ] [ src>> ] bi try-to-coalesce ;
|
||||||
|
|
||||||
M: ##phi prepare-insn
|
M: ##phi prepare-insn
|
||||||
[ dst>> ] [ inputs>> values ] bi
|
[ dst>> ] [ inputs>> values ] bi
|
||||||
[ eliminate-copy ] with each ;
|
[ eliminate-copy ] with each ;
|
||||||
|
|
||||||
M: insn prepare-insn drop ;
|
|
||||||
|
|
||||||
: prepare-block ( bb -- )
|
: prepare-block ( bb -- )
|
||||||
instructions>> [ prepare-insn ] each ;
|
instructions>> [ prepare-insn ] each ;
|
||||||
|
|
||||||
|
|
|
@ -11,28 +11,25 @@ IN: compiler.cfg.ssa.interference.live-ranges
|
||||||
|
|
||||||
SYMBOLS: local-def-indices local-kill-indices ;
|
SYMBOLS: local-def-indices local-kill-indices ;
|
||||||
|
|
||||||
: record-def ( n vreg -- )
|
: record-def ( n insn -- )
|
||||||
! We allow multiple defs of a vreg as long as they're
|
! We allow multiple defs of a vreg as long as they're
|
||||||
! all in the same basic block
|
! all in the same basic block
|
||||||
dup [
|
defs-vreg dup [
|
||||||
local-def-indices get 2dup key?
|
local-def-indices get 2dup key?
|
||||||
[ 3drop ] [ set-at ] if
|
[ 3drop ] [ set-at ] if
|
||||||
] [ 2drop ] if ;
|
] [ 2drop ] if ;
|
||||||
|
|
||||||
: record-uses ( n vregs -- )
|
: record-uses ( n insn -- )
|
||||||
local-kill-indices get '[ _ set-at ] with each ;
|
! Record live intervals so that all but the first input interfere
|
||||||
|
! with the output. This lets us coalesce the output with the
|
||||||
|
! first input.
|
||||||
|
[ uses-vregs ] [ def-is-use-insn? ] bi over empty? [ 3drop ] [
|
||||||
|
[ [ first local-kill-indices get set-at ] [ rest-slice ] 2bi ] unless
|
||||||
|
[ 1 + ] dip [ local-kill-indices get set-at ] with each
|
||||||
|
] if ;
|
||||||
|
|
||||||
: visit-insn ( insn n -- )
|
: visit-insn ( insn n -- )
|
||||||
! Instructions are numbered 2 apart. If the instruction requires
|
2 * swap [ record-def ] [ record-uses ] 2bi ;
|
||||||
! that outputs are in different registers than the inputs, then
|
|
||||||
! a use will be registered for every output immediately after
|
|
||||||
! this instruction and before the next one, ensuring that outputs
|
|
||||||
! interfere with inputs.
|
|
||||||
2 *
|
|
||||||
[ swap defs-vreg record-def ]
|
|
||||||
[ swap uses-vregs record-uses ]
|
|
||||||
[ over def-is-use-insn? [ 1 + swap defs-vreg 1array record-uses ] [ 2drop ] if ]
|
|
||||||
2tri ;
|
|
||||||
|
|
||||||
SYMBOLS: def-indices kill-indices ;
|
SYMBOLS: def-indices kill-indices ;
|
||||||
|
|
||||||
|
|
|
@ -1 +0,0 @@
|
||||||
Converting three-operand instructions into two-operand form
|
|
|
@ -1,52 +0,0 @@
|
||||||
USING: kernel compiler.cfg.two-operand compiler.cfg.instructions
|
|
||||||
compiler.cfg.registers cpu.architecture namespaces tools.test ;
|
|
||||||
IN: compiler.cfg.two-operand.tests
|
|
||||||
|
|
||||||
3 vreg-counter set-global
|
|
||||||
|
|
||||||
[
|
|
||||||
V{
|
|
||||||
T{ ##copy f 1 2 int-rep }
|
|
||||||
T{ ##sub f 1 1 3 }
|
|
||||||
}
|
|
||||||
] [
|
|
||||||
H{
|
|
||||||
{ 1 int-rep }
|
|
||||||
{ 2 int-rep }
|
|
||||||
{ 3 int-rep }
|
|
||||||
} clone representations set
|
|
||||||
{
|
|
||||||
T{ ##sub f 1 2 3 }
|
|
||||||
} (convert-two-operand)
|
|
||||||
] unit-test
|
|
||||||
|
|
||||||
[
|
|
||||||
V{
|
|
||||||
T{ ##copy f 1 2 double-rep }
|
|
||||||
T{ ##sub-float f 1 1 3 }
|
|
||||||
}
|
|
||||||
] [
|
|
||||||
H{
|
|
||||||
{ 1 double-rep }
|
|
||||||
{ 2 double-rep }
|
|
||||||
{ 3 double-rep }
|
|
||||||
} clone representations set
|
|
||||||
{
|
|
||||||
T{ ##sub-float f 1 2 3 }
|
|
||||||
} (convert-two-operand)
|
|
||||||
] unit-test
|
|
||||||
|
|
||||||
[
|
|
||||||
V{
|
|
||||||
T{ ##copy f 1 2 double-rep }
|
|
||||||
T{ ##mul-float f 1 1 1 }
|
|
||||||
}
|
|
||||||
] [
|
|
||||||
H{
|
|
||||||
{ 1 double-rep }
|
|
||||||
{ 2 double-rep }
|
|
||||||
} clone representations set
|
|
||||||
{
|
|
||||||
T{ ##mul-float f 1 2 2 }
|
|
||||||
} (convert-two-operand)
|
|
||||||
] unit-test
|
|
|
@ -1,90 +0,0 @@
|
||||||
! Copyright (C) 2008, 2009 Slava Pestov.
|
|
||||||
! See http://factorcode.org/license.txt for BSD license.
|
|
||||||
USING: accessors kernel sequences make combinators
|
|
||||||
compiler.cfg.registers compiler.cfg.instructions
|
|
||||||
compiler.cfg.rpo cpu.architecture ;
|
|
||||||
IN: compiler.cfg.two-operand
|
|
||||||
|
|
||||||
! This pass runs before SSA coalescing and normalizes instructions
|
|
||||||
! to fit the x86 two-address scheme. Since the input is in SSA,
|
|
||||||
! it suffices to convert
|
|
||||||
!
|
|
||||||
! x = y op z
|
|
||||||
!
|
|
||||||
! to
|
|
||||||
!
|
|
||||||
! x = y
|
|
||||||
! x = x op z
|
|
||||||
!
|
|
||||||
! We don't bother with ##add, ##add-imm, ##sub-imm or ##mul-imm
|
|
||||||
! since x86 has LEA and IMUL instructions which are effectively
|
|
||||||
! three-operand addition and multiplication, respectively.
|
|
||||||
|
|
||||||
UNION: two-operand-insn
|
|
||||||
##sub
|
|
||||||
##mul
|
|
||||||
##and
|
|
||||||
##and-imm
|
|
||||||
##or
|
|
||||||
##or-imm
|
|
||||||
##xor
|
|
||||||
##xor-imm
|
|
||||||
##shl
|
|
||||||
##shl-imm
|
|
||||||
##shr
|
|
||||||
##shr-imm
|
|
||||||
##sar
|
|
||||||
##sar-imm
|
|
||||||
##min
|
|
||||||
##max
|
|
||||||
##fixnum-add
|
|
||||||
##fixnum-sub
|
|
||||||
##fixnum-mul
|
|
||||||
##add-float
|
|
||||||
##sub-float
|
|
||||||
##mul-float
|
|
||||||
##div-float
|
|
||||||
##min-float
|
|
||||||
##max-float
|
|
||||||
##add-vector
|
|
||||||
##saturated-add-vector
|
|
||||||
##add-sub-vector
|
|
||||||
##sub-vector
|
|
||||||
##saturated-sub-vector
|
|
||||||
##mul-vector
|
|
||||||
##saturated-mul-vector
|
|
||||||
##div-vector
|
|
||||||
##min-vector
|
|
||||||
##max-vector
|
|
||||||
##and-vector
|
|
||||||
##or-vector
|
|
||||||
##xor-vector
|
|
||||||
##shl-vector
|
|
||||||
##shr-vector ;
|
|
||||||
|
|
||||||
GENERIC: convert-two-operand* ( insn -- )
|
|
||||||
|
|
||||||
: emit-copy ( dst src -- )
|
|
||||||
dup rep-of ##copy ; inline
|
|
||||||
|
|
||||||
M: two-operand-insn convert-two-operand*
|
|
||||||
[ [ dst>> ] [ src1>> ] bi emit-copy ]
|
|
||||||
[
|
|
||||||
dup [ src1>> ] [ src2>> ] bi = [ dup dst>> >>src2 ] when
|
|
||||||
dup dst>> >>src1 ,
|
|
||||||
] bi ;
|
|
||||||
|
|
||||||
M: ##not convert-two-operand*
|
|
||||||
[ [ dst>> ] [ src>> ] bi emit-copy ]
|
|
||||||
[ dup dst>> >>src , ]
|
|
||||||
bi ;
|
|
||||||
|
|
||||||
M: insn convert-two-operand* , ;
|
|
||||||
|
|
||||||
: (convert-two-operand) ( insns -- insns' )
|
|
||||||
dup first kill-vreg-insn? [
|
|
||||||
[ [ convert-two-operand* ] each ] V{ } make
|
|
||||||
] unless ;
|
|
||||||
|
|
||||||
: convert-two-operand ( cfg -- cfg' )
|
|
||||||
two-operand? [ [ (convert-two-operand) ] local-optimization ] when ;
|
|
|
@ -135,8 +135,6 @@ M: ulonglong-2-rep scalar-rep-of drop ulonglong-scalar-rep ;
|
||||||
! Mapping from register class to machine registers
|
! Mapping from register class to machine registers
|
||||||
HOOK: machine-registers cpu ( -- assoc )
|
HOOK: machine-registers cpu ( -- assoc )
|
||||||
|
|
||||||
HOOK: two-operand? cpu ( -- ? )
|
|
||||||
|
|
||||||
HOOK: %load-immediate cpu ( reg obj -- )
|
HOOK: %load-immediate cpu ( reg obj -- )
|
||||||
HOOK: %load-reference cpu ( reg obj -- )
|
HOOK: %load-reference cpu ( reg obj -- )
|
||||||
|
|
||||||
|
|
|
@ -49,8 +49,6 @@ M: ppc machine-registers
|
||||||
CONSTANT: scratch-reg 30
|
CONSTANT: scratch-reg 30
|
||||||
CONSTANT: fp-scratch-reg 30
|
CONSTANT: fp-scratch-reg 30
|
||||||
|
|
||||||
M: ppc two-operand? f ;
|
|
||||||
|
|
||||||
M: ppc %load-immediate ( reg n -- ) swap LOAD ;
|
M: ppc %load-immediate ( reg n -- ) swap LOAD ;
|
||||||
|
|
||||||
M: ppc %load-reference ( reg obj -- )
|
M: ppc %load-reference ( reg obj -- )
|
||||||
|
|
|
@ -20,8 +20,6 @@ IN: cpu.x86
|
||||||
M: label JMP 0 JMP rc-relative label-fixup ;
|
M: label JMP 0 JMP rc-relative label-fixup ;
|
||||||
M: label JUMPcc [ 0 ] dip JUMPcc rc-relative label-fixup ;
|
M: label JUMPcc [ 0 ] dip JUMPcc rc-relative label-fixup ;
|
||||||
|
|
||||||
M: x86 two-operand? t ;
|
|
||||||
|
|
||||||
M: x86 vector-regs float-regs ;
|
M: x86 vector-regs float-regs ;
|
||||||
|
|
||||||
HOOK: stack-reg cpu ( -- reg )
|
HOOK: stack-reg cpu ( -- reg )
|
||||||
|
@ -102,26 +100,35 @@ M: x86 %slot-imm ( dst obj slot tag -- ) (%slot-imm) MOV ;
|
||||||
M: x86 %set-slot ( src obj slot -- ) [+] swap MOV ;
|
M: x86 %set-slot ( src obj slot -- ) [+] swap MOV ;
|
||||||
M: x86 %set-slot-imm ( src obj slot tag -- ) (%slot-imm) swap MOV ;
|
M: x86 %set-slot-imm ( src obj slot tag -- ) (%slot-imm) swap MOV ;
|
||||||
|
|
||||||
|
:: two-operand ( dst src1 src2 rep -- dst src )
|
||||||
|
dst src2 eq? [ "Cannot handle this case" throw ] when
|
||||||
|
dst src1 rep %copy
|
||||||
|
dst src2 ; inline
|
||||||
|
|
||||||
|
:: one-operand ( dst src rep -- dst )
|
||||||
|
dst src rep %copy
|
||||||
|
dst ; inline
|
||||||
|
|
||||||
M: x86 %add 2over eq? [ nip ADD ] [ [+] LEA ] if ;
|
M: x86 %add 2over eq? [ nip ADD ] [ [+] LEA ] if ;
|
||||||
M: x86 %add-imm 2over eq? [ nip ADD ] [ [+] LEA ] if ;
|
M: x86 %add-imm 2over eq? [ nip ADD ] [ [+] LEA ] if ;
|
||||||
M: x86 %sub nip SUB ;
|
M: x86 %sub int-rep two-operand SUB ;
|
||||||
M: x86 %sub-imm 2over eq? [ nip SUB ] [ neg [+] LEA ] if ;
|
M: x86 %sub-imm 2over eq? [ nip SUB ] [ neg [+] LEA ] if ;
|
||||||
M: x86 %mul nip swap IMUL2 ;
|
M: x86 %mul int-rep two-operand swap IMUL2 ;
|
||||||
M: x86 %mul-imm IMUL3 ;
|
M: x86 %mul-imm IMUL3 ;
|
||||||
M: x86 %and nip AND ;
|
M: x86 %and int-rep two-operand AND ;
|
||||||
M: x86 %and-imm nip AND ;
|
M: x86 %and-imm int-rep two-operand AND ;
|
||||||
M: x86 %or nip OR ;
|
M: x86 %or int-rep two-operand OR ;
|
||||||
M: x86 %or-imm nip OR ;
|
M: x86 %or-imm int-rep two-operand OR ;
|
||||||
M: x86 %xor nip XOR ;
|
M: x86 %xor int-rep two-operand XOR ;
|
||||||
M: x86 %xor-imm nip XOR ;
|
M: x86 %xor-imm int-rep two-operand XOR ;
|
||||||
M: x86 %shl-imm nip SHL ;
|
M: x86 %shl-imm int-rep two-operand SHL ;
|
||||||
M: x86 %shr-imm nip SHR ;
|
M: x86 %shr-imm int-rep two-operand SHR ;
|
||||||
M: x86 %sar-imm nip SAR ;
|
M: x86 %sar-imm int-rep two-operand SAR ;
|
||||||
|
|
||||||
M: x86 %min nip [ CMP ] [ CMOVG ] 2bi ;
|
M: x86 %min int-rep two-operand [ CMP ] [ CMOVG ] 2bi ;
|
||||||
M: x86 %max nip [ CMP ] [ CMOVL ] 2bi ;
|
M: x86 %max int-rep two-operand [ CMP ] [ CMOVL ] 2bi ;
|
||||||
|
|
||||||
M: x86 %not drop NOT ;
|
M: x86 %not int-rep one-operand NOT ;
|
||||||
M: x86 %log2 BSR ;
|
M: x86 %log2 BSR ;
|
||||||
|
|
||||||
GENERIC: copy-register* ( dst src rep -- )
|
GENERIC: copy-register* ( dst src rep -- )
|
||||||
|
@ -137,18 +144,14 @@ M: vector-rep copy-register* drop MOVDQU ;
|
||||||
M: x86 %copy ( dst src rep -- )
|
M: x86 %copy ( dst src rep -- )
|
||||||
2over eq? [ 3drop ] [ copy-register* ] if ;
|
2over eq? [ 3drop ] [ copy-register* ] if ;
|
||||||
|
|
||||||
:: overflow-template ( label dst src1 src2 insn -- )
|
|
||||||
src1 src2 insn call
|
|
||||||
label JO ; inline
|
|
||||||
|
|
||||||
M: x86 %fixnum-add ( label dst src1 src2 -- )
|
M: x86 %fixnum-add ( label dst src1 src2 -- )
|
||||||
[ ADD ] overflow-template ;
|
int-rep two-operand ADD JO ;
|
||||||
|
|
||||||
M: x86 %fixnum-sub ( label dst src1 src2 -- )
|
M: x86 %fixnum-sub ( label dst src1 src2 -- )
|
||||||
[ SUB ] overflow-template ;
|
int-rep two-operand SUB JO ;
|
||||||
|
|
||||||
M: x86 %fixnum-mul ( label dst src1 src2 -- )
|
M: x86 %fixnum-mul ( label dst src1 src2 -- )
|
||||||
[ swap IMUL2 ] overflow-template ;
|
int-rep two-operand swap IMUL2 JO ;
|
||||||
|
|
||||||
: bignum@ ( reg n -- op )
|
: bignum@ ( reg n -- op )
|
||||||
cells bignum tag-number - [+] ; inline
|
cells bignum tag-number - [+] ; inline
|
||||||
|
@ -210,12 +213,12 @@ M:: x86 %bignum>integer ( dst src temp -- )
|
||||||
"end" resolve-label
|
"end" resolve-label
|
||||||
] with-scope ;
|
] with-scope ;
|
||||||
|
|
||||||
M: x86 %add-float nip ADDSD ;
|
M: x86 %add-float double-rep two-operand ADDSD ;
|
||||||
M: x86 %sub-float nip SUBSD ;
|
M: x86 %sub-float double-rep two-operand SUBSD ;
|
||||||
M: x86 %mul-float nip MULSD ;
|
M: x86 %mul-float double-rep two-operand MULSD ;
|
||||||
M: x86 %div-float nip DIVSD ;
|
M: x86 %div-float double-rep two-operand DIVSD ;
|
||||||
M: x86 %min-float nip MINSD ;
|
M: x86 %min-float double-rep two-operand MINSD ;
|
||||||
M: x86 %max-float nip MAXSD ;
|
M: x86 %max-float double-rep two-operand MAXSD ;
|
||||||
M: x86 %sqrt SQRTSD ;
|
M: x86 %sqrt SQRTSD ;
|
||||||
|
|
||||||
M: x86 %single>double-float CVTSS2SD ;
|
M: x86 %single>double-float CVTSS2SD ;
|
||||||
|
@ -299,6 +302,7 @@ M: x86 %gather-vector-2-reps
|
||||||
} available-reps ;
|
} available-reps ;
|
||||||
|
|
||||||
M: x86 %add-vector ( dst src1 src2 rep -- )
|
M: x86 %add-vector ( dst src1 src2 rep -- )
|
||||||
|
[ two-operand ] keep
|
||||||
{
|
{
|
||||||
{ float-4-rep [ ADDPS ] }
|
{ float-4-rep [ ADDPS ] }
|
||||||
{ double-2-rep [ ADDPD ] }
|
{ double-2-rep [ ADDPD ] }
|
||||||
|
@ -310,7 +314,7 @@ M: x86 %add-vector ( dst src1 src2 rep -- )
|
||||||
{ uint-4-rep [ PADDD ] }
|
{ uint-4-rep [ PADDD ] }
|
||||||
{ longlong-2-rep [ PADDQ ] }
|
{ longlong-2-rep [ PADDQ ] }
|
||||||
{ ulonglong-2-rep [ PADDQ ] }
|
{ ulonglong-2-rep [ PADDQ ] }
|
||||||
} case drop ;
|
} case ;
|
||||||
|
|
||||||
M: x86 %add-vector-reps
|
M: x86 %add-vector-reps
|
||||||
{
|
{
|
||||||
|
@ -319,12 +323,13 @@ M: x86 %add-vector-reps
|
||||||
} available-reps ;
|
} available-reps ;
|
||||||
|
|
||||||
M: x86 %saturated-add-vector ( dst src1 src2 rep -- )
|
M: x86 %saturated-add-vector ( dst src1 src2 rep -- )
|
||||||
|
[ two-operand ] keep
|
||||||
{
|
{
|
||||||
{ char-16-rep [ PADDSB ] }
|
{ char-16-rep [ PADDSB ] }
|
||||||
{ uchar-16-rep [ PADDUSB ] }
|
{ uchar-16-rep [ PADDUSB ] }
|
||||||
{ short-8-rep [ PADDSW ] }
|
{ short-8-rep [ PADDSW ] }
|
||||||
{ ushort-8-rep [ PADDUSW ] }
|
{ ushort-8-rep [ PADDUSW ] }
|
||||||
} case drop ;
|
} case ;
|
||||||
|
|
||||||
M: x86 %saturated-add-vector-reps
|
M: x86 %saturated-add-vector-reps
|
||||||
{
|
{
|
||||||
|
@ -332,10 +337,11 @@ M: x86 %saturated-add-vector-reps
|
||||||
} available-reps ;
|
} available-reps ;
|
||||||
|
|
||||||
M: x86 %add-sub-vector ( dst src1 src2 rep -- )
|
M: x86 %add-sub-vector ( dst src1 src2 rep -- )
|
||||||
|
[ two-operand ] keep
|
||||||
{
|
{
|
||||||
{ float-4-rep [ ADDSUBPS ] }
|
{ float-4-rep [ ADDSUBPS ] }
|
||||||
{ double-2-rep [ ADDSUBPD ] }
|
{ double-2-rep [ ADDSUBPD ] }
|
||||||
} case drop ;
|
} case ;
|
||||||
|
|
||||||
M: x86 %add-sub-vector-reps
|
M: x86 %add-sub-vector-reps
|
||||||
{
|
{
|
||||||
|
@ -343,6 +349,7 @@ M: x86 %add-sub-vector-reps
|
||||||
} available-reps ;
|
} available-reps ;
|
||||||
|
|
||||||
M: x86 %sub-vector ( dst src1 src2 rep -- )
|
M: x86 %sub-vector ( dst src1 src2 rep -- )
|
||||||
|
[ two-operand ] keep
|
||||||
{
|
{
|
||||||
{ float-4-rep [ SUBPS ] }
|
{ float-4-rep [ SUBPS ] }
|
||||||
{ double-2-rep [ SUBPD ] }
|
{ double-2-rep [ SUBPD ] }
|
||||||
|
@ -354,7 +361,7 @@ M: x86 %sub-vector ( dst src1 src2 rep -- )
|
||||||
{ uint-4-rep [ PSUBD ] }
|
{ uint-4-rep [ PSUBD ] }
|
||||||
{ longlong-2-rep [ PSUBQ ] }
|
{ longlong-2-rep [ PSUBQ ] }
|
||||||
{ ulonglong-2-rep [ PSUBQ ] }
|
{ ulonglong-2-rep [ PSUBQ ] }
|
||||||
} case drop ;
|
} case ;
|
||||||
|
|
||||||
M: x86 %sub-vector-reps
|
M: x86 %sub-vector-reps
|
||||||
{
|
{
|
||||||
|
@ -363,12 +370,13 @@ M: x86 %sub-vector-reps
|
||||||
} available-reps ;
|
} available-reps ;
|
||||||
|
|
||||||
M: x86 %saturated-sub-vector ( dst src1 src2 rep -- )
|
M: x86 %saturated-sub-vector ( dst src1 src2 rep -- )
|
||||||
|
[ two-operand ] keep
|
||||||
{
|
{
|
||||||
{ char-16-rep [ PSUBSB ] }
|
{ char-16-rep [ PSUBSB ] }
|
||||||
{ uchar-16-rep [ PSUBUSB ] }
|
{ uchar-16-rep [ PSUBUSB ] }
|
||||||
{ short-8-rep [ PSUBSW ] }
|
{ short-8-rep [ PSUBSW ] }
|
||||||
{ ushort-8-rep [ PSUBUSW ] }
|
{ ushort-8-rep [ PSUBUSW ] }
|
||||||
} case drop ;
|
} case ;
|
||||||
|
|
||||||
M: x86 %saturated-sub-vector-reps
|
M: x86 %saturated-sub-vector-reps
|
||||||
{
|
{
|
||||||
|
@ -376,6 +384,7 @@ M: x86 %saturated-sub-vector-reps
|
||||||
} available-reps ;
|
} available-reps ;
|
||||||
|
|
||||||
M: x86 %mul-vector ( dst src1 src2 rep -- )
|
M: x86 %mul-vector ( dst src1 src2 rep -- )
|
||||||
|
[ two-operand ] keep
|
||||||
{
|
{
|
||||||
{ float-4-rep [ MULPS ] }
|
{ float-4-rep [ MULPS ] }
|
||||||
{ double-2-rep [ MULPD ] }
|
{ double-2-rep [ MULPD ] }
|
||||||
|
@ -383,7 +392,7 @@ M: x86 %mul-vector ( dst src1 src2 rep -- )
|
||||||
{ ushort-8-rep [ PMULLW ] }
|
{ ushort-8-rep [ PMULLW ] }
|
||||||
{ int-4-rep [ PMULLD ] }
|
{ int-4-rep [ PMULLD ] }
|
||||||
{ uint-4-rep [ PMULLD ] }
|
{ uint-4-rep [ PMULLD ] }
|
||||||
} case drop ;
|
} case ;
|
||||||
|
|
||||||
M: x86 %mul-vector-reps
|
M: x86 %mul-vector-reps
|
||||||
{
|
{
|
||||||
|
@ -397,10 +406,11 @@ M: x86 %saturated-mul-vector-reps
|
||||||
{ } ;
|
{ } ;
|
||||||
|
|
||||||
M: x86 %div-vector ( dst src1 src2 rep -- )
|
M: x86 %div-vector ( dst src1 src2 rep -- )
|
||||||
|
[ two-operand ] keep
|
||||||
{
|
{
|
||||||
{ float-4-rep [ DIVPS ] }
|
{ float-4-rep [ DIVPS ] }
|
||||||
{ double-2-rep [ DIVPD ] }
|
{ double-2-rep [ DIVPD ] }
|
||||||
} case drop ;
|
} case ;
|
||||||
|
|
||||||
M: x86 %div-vector-reps
|
M: x86 %div-vector-reps
|
||||||
{
|
{
|
||||||
|
@ -409,6 +419,7 @@ M: x86 %div-vector-reps
|
||||||
} available-reps ;
|
} available-reps ;
|
||||||
|
|
||||||
M: x86 %min-vector ( dst src1 src2 rep -- )
|
M: x86 %min-vector ( dst src1 src2 rep -- )
|
||||||
|
[ two-operand ] keep
|
||||||
{
|
{
|
||||||
{ char-16-rep [ PMINSB ] }
|
{ char-16-rep [ PMINSB ] }
|
||||||
{ uchar-16-rep [ PMINUB ] }
|
{ uchar-16-rep [ PMINUB ] }
|
||||||
|
@ -418,7 +429,7 @@ M: x86 %min-vector ( dst src1 src2 rep -- )
|
||||||
{ uint-4-rep [ PMINUD ] }
|
{ uint-4-rep [ PMINUD ] }
|
||||||
{ float-4-rep [ MINPS ] }
|
{ float-4-rep [ MINPS ] }
|
||||||
{ double-2-rep [ MINPD ] }
|
{ double-2-rep [ MINPD ] }
|
||||||
} case drop ;
|
} case ;
|
||||||
|
|
||||||
M: x86 %min-vector-reps
|
M: x86 %min-vector-reps
|
||||||
{
|
{
|
||||||
|
@ -428,6 +439,7 @@ M: x86 %min-vector-reps
|
||||||
} available-reps ;
|
} available-reps ;
|
||||||
|
|
||||||
M: x86 %max-vector ( dst src1 src2 rep -- )
|
M: x86 %max-vector ( dst src1 src2 rep -- )
|
||||||
|
[ two-operand ] keep
|
||||||
{
|
{
|
||||||
{ char-16-rep [ PMAXSB ] }
|
{ char-16-rep [ PMAXSB ] }
|
||||||
{ uchar-16-rep [ PMAXUB ] }
|
{ uchar-16-rep [ PMAXUB ] }
|
||||||
|
@ -437,7 +449,7 @@ M: x86 %max-vector ( dst src1 src2 rep -- )
|
||||||
{ uint-4-rep [ PMAXUD ] }
|
{ uint-4-rep [ PMAXUD ] }
|
||||||
{ float-4-rep [ MAXPS ] }
|
{ float-4-rep [ MAXPS ] }
|
||||||
{ double-2-rep [ MAXPD ] }
|
{ double-2-rep [ MAXPD ] }
|
||||||
} case drop ;
|
} case ;
|
||||||
|
|
||||||
M: x86 %max-vector-reps
|
M: x86 %max-vector-reps
|
||||||
{
|
{
|
||||||
|
@ -482,11 +494,12 @@ M: x86 %sqrt-vector-reps
|
||||||
} available-reps ;
|
} available-reps ;
|
||||||
|
|
||||||
M: x86 %and-vector ( dst src1 src2 rep -- )
|
M: x86 %and-vector ( dst src1 src2 rep -- )
|
||||||
|
[ two-operand ] keep
|
||||||
{
|
{
|
||||||
{ float-4-rep [ ANDPS ] }
|
{ float-4-rep [ ANDPS ] }
|
||||||
{ double-2-rep [ ANDPD ] }
|
{ double-2-rep [ ANDPD ] }
|
||||||
[ drop PAND ]
|
[ drop PAND ]
|
||||||
} case drop ;
|
} case ;
|
||||||
|
|
||||||
M: x86 %and-vector-reps
|
M: x86 %and-vector-reps
|
||||||
{
|
{
|
||||||
|
@ -495,11 +508,12 @@ M: x86 %and-vector-reps
|
||||||
} available-reps ;
|
} available-reps ;
|
||||||
|
|
||||||
M: x86 %or-vector ( dst src1 src2 rep -- )
|
M: x86 %or-vector ( dst src1 src2 rep -- )
|
||||||
|
[ two-operand ] keep
|
||||||
{
|
{
|
||||||
{ float-4-rep [ ORPS ] }
|
{ float-4-rep [ ORPS ] }
|
||||||
{ double-2-rep [ ORPD ] }
|
{ double-2-rep [ ORPD ] }
|
||||||
[ drop POR ]
|
[ drop POR ]
|
||||||
} case drop ;
|
} case ;
|
||||||
|
|
||||||
M: x86 %or-vector-reps
|
M: x86 %or-vector-reps
|
||||||
{
|
{
|
||||||
|
@ -508,11 +522,12 @@ M: x86 %or-vector-reps
|
||||||
} available-reps ;
|
} available-reps ;
|
||||||
|
|
||||||
M: x86 %xor-vector ( dst src1 src2 rep -- )
|
M: x86 %xor-vector ( dst src1 src2 rep -- )
|
||||||
|
[ two-operand ] keep
|
||||||
{
|
{
|
||||||
{ float-4-rep [ XORPS ] }
|
{ float-4-rep [ XORPS ] }
|
||||||
{ double-2-rep [ XORPD ] }
|
{ double-2-rep [ XORPD ] }
|
||||||
[ drop PXOR ]
|
[ drop PXOR ]
|
||||||
} case drop ;
|
} case ;
|
||||||
|
|
||||||
M: x86 %xor-vector-reps
|
M: x86 %xor-vector-reps
|
||||||
{
|
{
|
||||||
|
@ -521,6 +536,7 @@ M: x86 %xor-vector-reps
|
||||||
} available-reps ;
|
} available-reps ;
|
||||||
|
|
||||||
M: x86 %shl-vector ( dst src1 src2 rep -- )
|
M: x86 %shl-vector ( dst src1 src2 rep -- )
|
||||||
|
[ two-operand ] keep
|
||||||
{
|
{
|
||||||
{ short-8-rep [ PSLLW ] }
|
{ short-8-rep [ PSLLW ] }
|
||||||
{ ushort-8-rep [ PSLLW ] }
|
{ ushort-8-rep [ PSLLW ] }
|
||||||
|
@ -528,7 +544,7 @@ M: x86 %shl-vector ( dst src1 src2 rep -- )
|
||||||
{ uint-4-rep [ PSLLD ] }
|
{ uint-4-rep [ PSLLD ] }
|
||||||
{ longlong-2-rep [ PSLLQ ] }
|
{ longlong-2-rep [ PSLLQ ] }
|
||||||
{ ulonglong-2-rep [ PSLLQ ] }
|
{ ulonglong-2-rep [ PSLLQ ] }
|
||||||
} case drop ;
|
} case ;
|
||||||
|
|
||||||
M: x86 %shl-vector-reps
|
M: x86 %shl-vector-reps
|
||||||
{
|
{
|
||||||
|
@ -536,13 +552,14 @@ M: x86 %shl-vector-reps
|
||||||
} available-reps ;
|
} available-reps ;
|
||||||
|
|
||||||
M: x86 %shr-vector ( dst src1 src2 rep -- )
|
M: x86 %shr-vector ( dst src1 src2 rep -- )
|
||||||
|
[ two-operand ] keep
|
||||||
{
|
{
|
||||||
{ short-8-rep [ PSRAW ] }
|
{ short-8-rep [ PSRAW ] }
|
||||||
{ ushort-8-rep [ PSRLW ] }
|
{ ushort-8-rep [ PSRLW ] }
|
||||||
{ int-4-rep [ PSRAD ] }
|
{ int-4-rep [ PSRAD ] }
|
||||||
{ uint-4-rep [ PSRLD ] }
|
{ uint-4-rep [ PSRLD ] }
|
||||||
{ ulonglong-2-rep [ PSRLQ ] }
|
{ ulonglong-2-rep [ PSRLQ ] }
|
||||||
} case drop ;
|
} case ;
|
||||||
|
|
||||||
M: x86 %shr-vector-reps
|
M: x86 %shr-vector-reps
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in New Issue