From 1e841e50865a703d5a96517b8fbc064085093bf7 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Sun, 27 Sep 2009 17:17:26 -0500 Subject: [PATCH] compiler.cfg.ssa.destruction: more aggressive coalescing work in progress --- .../cfg/instructions/instructions.factor | 5 +- basis/compiler/cfg/optimizer/optimizer.factor | 2 - .../cfg/ssa/destruction/destruction.factor | 17 ++- .../live-ranges/live-ranges.factor | 25 ++--- basis/compiler/cfg/two-operand/summary.txt | 1 - .../cfg/two-operand/two-operand-tests.factor | 52 --------- .../cfg/two-operand/two-operand.factor | 90 --------------- basis/cpu/architecture/architecture.factor | 2 - basis/cpu/ppc/ppc.factor | 2 - basis/cpu/x86/x86.factor | 103 ++++++++++-------- 10 files changed, 88 insertions(+), 211 deletions(-) delete mode 100644 basis/compiler/cfg/two-operand/summary.txt delete mode 100644 basis/compiler/cfg/two-operand/two-operand-tests.factor delete mode 100644 basis/compiler/cfg/two-operand/two-operand.factor diff --git a/basis/compiler/cfg/instructions/instructions.factor b/basis/compiler/cfg/instructions/instructions.factor index 6f5a05c672..6f9e7d03a9 100644 --- a/basis/compiler/cfg/instructions/instructions.factor +++ b/basis/compiler/cfg/instructions/instructions.factor @@ -707,7 +707,10 @@ UNION: kill-vreg-insn UNION: def-is-use-insn ##integer>bignum ##bignum>integer -##unbox-any-c-ptr ; +##string-nth +##unbox-any-c-ptr +##unary-float-function +##binary-float-function ; SYMBOL: vreg-insn diff --git a/basis/compiler/cfg/optimizer/optimizer.factor b/basis/compiler/cfg/optimizer/optimizer.factor index 649032b469..84726a9b99 100644 --- a/basis/compiler/cfg/optimizer/optimizer.factor +++ b/basis/compiler/cfg/optimizer/optimizer.factor @@ -12,7 +12,6 @@ compiler.cfg.copy-prop compiler.cfg.dce compiler.cfg.write-barrier compiler.cfg.representations -compiler.cfg.two-operand compiler.cfg.ssa.destruction compiler.cfg.empty-blocks compiler.cfg.checker ; @@ -37,7 +36,6 @@ SYMBOL: check-optimizer? eliminate-dead-code eliminate-write-barriers select-representations - convert-two-operand destruct-ssa delete-empty-blocks ?check ; diff --git a/basis/compiler/cfg/ssa/destruction/destruction.factor b/basis/compiler/cfg/ssa/destruction/destruction.factor index 424be91e2b..76061f237b 100644 --- a/basis/compiler/cfg/ssa/destruction/destruction.factor +++ b/basis/compiler/cfg/ssa/destruction/destruction.factor @@ -6,6 +6,7 @@ sets vectors compiler.cfg.rpo compiler.cfg.def-use compiler.cfg.renaming +compiler.cfg.registers compiler.cfg.dominance compiler.cfg.instructions compiler.cfg.liveness.ssa @@ -60,16 +61,24 @@ SYMBOL: copies GENERIC: prepare-insn ( insn -- ) +: try-to-coalesce ( dst src -- ) 2array copies get push ; + +M: insn prepare-insn + [ defs-vreg ] [ uses-vregs ] bi + 2dup empty? not and [ + first + 2dup [ rep-of ] bi@ eq? + [ try-to-coalesce ] [ 2drop ] if + ] [ 2drop ] if ; + M: ##copy prepare-insn - [ dst>> ] [ src>> ] bi 2array copies get push ; + [ dst>> ] [ src>> ] bi try-to-coalesce ; M: ##phi prepare-insn [ dst>> ] [ inputs>> values ] bi [ eliminate-copy ] with each ; -M: insn prepare-insn drop ; - -: prepare-block ( bb -- ) + : prepare-block ( bb -- ) instructions>> [ prepare-insn ] each ; : prepare-coalescing ( cfg -- ) diff --git a/basis/compiler/cfg/ssa/interference/live-ranges/live-ranges.factor b/basis/compiler/cfg/ssa/interference/live-ranges/live-ranges.factor index fd1f09a900..ef24914269 100644 --- a/basis/compiler/cfg/ssa/interference/live-ranges/live-ranges.factor +++ b/basis/compiler/cfg/ssa/interference/live-ranges/live-ranges.factor @@ -11,28 +11,25 @@ IN: compiler.cfg.ssa.interference.live-ranges SYMBOLS: local-def-indices local-kill-indices ; -: record-def ( n vreg -- ) +: record-def ( n insn -- ) ! We allow multiple defs of a vreg as long as they're ! all in the same basic block - dup [ + defs-vreg dup [ local-def-indices get 2dup key? [ 3drop ] [ set-at ] if ] [ 2drop ] if ; -: record-uses ( n vregs -- ) - local-kill-indices get '[ _ set-at ] with each ; +: record-uses ( n insn -- ) + ! Record live intervals so that all but the first input interfere + ! with the output. This lets us coalesce the output with the + ! first input. + [ uses-vregs ] [ def-is-use-insn? ] bi over empty? [ 3drop ] [ + [ [ first local-kill-indices get set-at ] [ rest-slice ] 2bi ] unless + [ 1 + ] dip [ local-kill-indices get set-at ] with each + ] if ; : visit-insn ( insn n -- ) - ! Instructions are numbered 2 apart. If the instruction requires - ! that outputs are in different registers than the inputs, then - ! a use will be registered for every output immediately after - ! this instruction and before the next one, ensuring that outputs - ! interfere with inputs. - 2 * - [ swap defs-vreg record-def ] - [ swap uses-vregs record-uses ] - [ over def-is-use-insn? [ 1 + swap defs-vreg 1array record-uses ] [ 2drop ] if ] - 2tri ; + 2 * swap [ record-def ] [ record-uses ] 2bi ; SYMBOLS: def-indices kill-indices ; diff --git a/basis/compiler/cfg/two-operand/summary.txt b/basis/compiler/cfg/two-operand/summary.txt deleted file mode 100644 index 6c9154d306..0000000000 --- a/basis/compiler/cfg/two-operand/summary.txt +++ /dev/null @@ -1 +0,0 @@ -Converting three-operand instructions into two-operand form diff --git a/basis/compiler/cfg/two-operand/two-operand-tests.factor b/basis/compiler/cfg/two-operand/two-operand-tests.factor deleted file mode 100644 index 41094cfac4..0000000000 --- a/basis/compiler/cfg/two-operand/two-operand-tests.factor +++ /dev/null @@ -1,52 +0,0 @@ -USING: kernel compiler.cfg.two-operand compiler.cfg.instructions -compiler.cfg.registers cpu.architecture namespaces tools.test ; -IN: compiler.cfg.two-operand.tests - -3 vreg-counter set-global - -[ - V{ - T{ ##copy f 1 2 int-rep } - T{ ##sub f 1 1 3 } - } -] [ - H{ - { 1 int-rep } - { 2 int-rep } - { 3 int-rep } - } clone representations set - { - T{ ##sub f 1 2 3 } - } (convert-two-operand) -] unit-test - -[ - V{ - T{ ##copy f 1 2 double-rep } - T{ ##sub-float f 1 1 3 } - } -] [ - H{ - { 1 double-rep } - { 2 double-rep } - { 3 double-rep } - } clone representations set - { - T{ ##sub-float f 1 2 3 } - } (convert-two-operand) -] unit-test - -[ - V{ - T{ ##copy f 1 2 double-rep } - T{ ##mul-float f 1 1 1 } - } -] [ - H{ - { 1 double-rep } - { 2 double-rep } - } clone representations set - { - T{ ##mul-float f 1 2 2 } - } (convert-two-operand) -] unit-test diff --git a/basis/compiler/cfg/two-operand/two-operand.factor b/basis/compiler/cfg/two-operand/two-operand.factor deleted file mode 100644 index 4434e0b7b8..0000000000 --- a/basis/compiler/cfg/two-operand/two-operand.factor +++ /dev/null @@ -1,90 +0,0 @@ -! Copyright (C) 2008, 2009 Slava Pestov. -! See http://factorcode.org/license.txt for BSD license. -USING: accessors kernel sequences make combinators -compiler.cfg.registers compiler.cfg.instructions -compiler.cfg.rpo cpu.architecture ; -IN: compiler.cfg.two-operand - -! This pass runs before SSA coalescing and normalizes instructions -! to fit the x86 two-address scheme. Since the input is in SSA, -! it suffices to convert -! -! x = y op z -! -! to -! -! x = y -! x = x op z -! -! We don't bother with ##add, ##add-imm, ##sub-imm or ##mul-imm -! since x86 has LEA and IMUL instructions which are effectively -! three-operand addition and multiplication, respectively. - -UNION: two-operand-insn - ##sub - ##mul - ##and - ##and-imm - ##or - ##or-imm - ##xor - ##xor-imm - ##shl - ##shl-imm - ##shr - ##shr-imm - ##sar - ##sar-imm - ##min - ##max - ##fixnum-add - ##fixnum-sub - ##fixnum-mul - ##add-float - ##sub-float - ##mul-float - ##div-float - ##min-float - ##max-float - ##add-vector - ##saturated-add-vector - ##add-sub-vector - ##sub-vector - ##saturated-sub-vector - ##mul-vector - ##saturated-mul-vector - ##div-vector - ##min-vector - ##max-vector - ##and-vector - ##or-vector - ##xor-vector - ##shl-vector - ##shr-vector ; - -GENERIC: convert-two-operand* ( insn -- ) - -: emit-copy ( dst src -- ) - dup rep-of ##copy ; inline - -M: two-operand-insn convert-two-operand* - [ [ dst>> ] [ src1>> ] bi emit-copy ] - [ - dup [ src1>> ] [ src2>> ] bi = [ dup dst>> >>src2 ] when - dup dst>> >>src1 , - ] bi ; - -M: ##not convert-two-operand* - [ [ dst>> ] [ src>> ] bi emit-copy ] - [ dup dst>> >>src , ] - bi ; - -M: insn convert-two-operand* , ; - -: (convert-two-operand) ( insns -- insns' ) - dup first kill-vreg-insn? [ - [ [ convert-two-operand* ] each ] V{ } make - ] unless ; - -: convert-two-operand ( cfg -- cfg' ) - two-operand? [ [ (convert-two-operand) ] local-optimization ] when ; \ No newline at end of file diff --git a/basis/cpu/architecture/architecture.factor b/basis/cpu/architecture/architecture.factor index c27aacb875..0ddd477b89 100644 --- a/basis/cpu/architecture/architecture.factor +++ b/basis/cpu/architecture/architecture.factor @@ -135,8 +135,6 @@ M: ulonglong-2-rep scalar-rep-of drop ulonglong-scalar-rep ; ! Mapping from register class to machine registers HOOK: machine-registers cpu ( -- assoc ) -HOOK: two-operand? cpu ( -- ? ) - HOOK: %load-immediate cpu ( reg obj -- ) HOOK: %load-reference cpu ( reg obj -- ) diff --git a/basis/cpu/ppc/ppc.factor b/basis/cpu/ppc/ppc.factor index 64df207975..01e8513b2f 100644 --- a/basis/cpu/ppc/ppc.factor +++ b/basis/cpu/ppc/ppc.factor @@ -49,8 +49,6 @@ M: ppc machine-registers CONSTANT: scratch-reg 30 CONSTANT: fp-scratch-reg 30 -M: ppc two-operand? f ; - M: ppc %load-immediate ( reg n -- ) swap LOAD ; M: ppc %load-reference ( reg obj -- ) diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index d89e360d09..3a41b331d9 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -20,8 +20,6 @@ IN: cpu.x86 M: label JMP 0 JMP rc-relative label-fixup ; M: label JUMPcc [ 0 ] dip JUMPcc rc-relative label-fixup ; -M: x86 two-operand? t ; - M: x86 vector-regs float-regs ; HOOK: stack-reg cpu ( -- reg ) @@ -102,26 +100,35 @@ M: x86 %slot-imm ( dst obj slot tag -- ) (%slot-imm) MOV ; M: x86 %set-slot ( src obj slot -- ) [+] swap MOV ; M: x86 %set-slot-imm ( src obj slot tag -- ) (%slot-imm) swap MOV ; +:: two-operand ( dst src1 src2 rep -- dst src ) + dst src2 eq? [ "Cannot handle this case" throw ] when + dst src1 rep %copy + dst src2 ; inline + +:: one-operand ( dst src rep -- dst ) + dst src rep %copy + dst ; inline + M: x86 %add 2over eq? [ nip ADD ] [ [+] LEA ] if ; M: x86 %add-imm 2over eq? [ nip ADD ] [ [+] LEA ] if ; -M: x86 %sub nip SUB ; +M: x86 %sub int-rep two-operand SUB ; M: x86 %sub-imm 2over eq? [ nip SUB ] [ neg [+] LEA ] if ; -M: x86 %mul nip swap IMUL2 ; +M: x86 %mul int-rep two-operand swap IMUL2 ; M: x86 %mul-imm IMUL3 ; -M: x86 %and nip AND ; -M: x86 %and-imm nip AND ; -M: x86 %or nip OR ; -M: x86 %or-imm nip OR ; -M: x86 %xor nip XOR ; -M: x86 %xor-imm nip XOR ; -M: x86 %shl-imm nip SHL ; -M: x86 %shr-imm nip SHR ; -M: x86 %sar-imm nip SAR ; +M: x86 %and int-rep two-operand AND ; +M: x86 %and-imm int-rep two-operand AND ; +M: x86 %or int-rep two-operand OR ; +M: x86 %or-imm int-rep two-operand OR ; +M: x86 %xor int-rep two-operand XOR ; +M: x86 %xor-imm int-rep two-operand XOR ; +M: x86 %shl-imm int-rep two-operand SHL ; +M: x86 %shr-imm int-rep two-operand SHR ; +M: x86 %sar-imm int-rep two-operand SAR ; -M: x86 %min nip [ CMP ] [ CMOVG ] 2bi ; -M: x86 %max nip [ CMP ] [ CMOVL ] 2bi ; +M: x86 %min int-rep two-operand [ CMP ] [ CMOVG ] 2bi ; +M: x86 %max int-rep two-operand [ CMP ] [ CMOVL ] 2bi ; -M: x86 %not drop NOT ; +M: x86 %not int-rep one-operand NOT ; M: x86 %log2 BSR ; GENERIC: copy-register* ( dst src rep -- ) @@ -137,18 +144,14 @@ M: vector-rep copy-register* drop MOVDQU ; M: x86 %copy ( dst src rep -- ) 2over eq? [ 3drop ] [ copy-register* ] if ; -:: overflow-template ( label dst src1 src2 insn -- ) - src1 src2 insn call - label JO ; inline - M: x86 %fixnum-add ( label dst src1 src2 -- ) - [ ADD ] overflow-template ; + int-rep two-operand ADD JO ; M: x86 %fixnum-sub ( label dst src1 src2 -- ) - [ SUB ] overflow-template ; + int-rep two-operand SUB JO ; M: x86 %fixnum-mul ( label dst src1 src2 -- ) - [ swap IMUL2 ] overflow-template ; + int-rep two-operand swap IMUL2 JO ; : bignum@ ( reg n -- op ) cells bignum tag-number - [+] ; inline @@ -210,12 +213,12 @@ M:: x86 %bignum>integer ( dst src temp -- ) "end" resolve-label ] with-scope ; -M: x86 %add-float nip ADDSD ; -M: x86 %sub-float nip SUBSD ; -M: x86 %mul-float nip MULSD ; -M: x86 %div-float nip DIVSD ; -M: x86 %min-float nip MINSD ; -M: x86 %max-float nip MAXSD ; +M: x86 %add-float double-rep two-operand ADDSD ; +M: x86 %sub-float double-rep two-operand SUBSD ; +M: x86 %mul-float double-rep two-operand MULSD ; +M: x86 %div-float double-rep two-operand DIVSD ; +M: x86 %min-float double-rep two-operand MINSD ; +M: x86 %max-float double-rep two-operand MAXSD ; M: x86 %sqrt SQRTSD ; M: x86 %single>double-float CVTSS2SD ; @@ -299,6 +302,7 @@ M: x86 %gather-vector-2-reps } available-reps ; M: x86 %add-vector ( dst src1 src2 rep -- ) + [ two-operand ] keep { { float-4-rep [ ADDPS ] } { double-2-rep [ ADDPD ] } @@ -310,7 +314,7 @@ M: x86 %add-vector ( dst src1 src2 rep -- ) { uint-4-rep [ PADDD ] } { longlong-2-rep [ PADDQ ] } { ulonglong-2-rep [ PADDQ ] } - } case drop ; + } case ; M: x86 %add-vector-reps { @@ -319,12 +323,13 @@ M: x86 %add-vector-reps } available-reps ; M: x86 %saturated-add-vector ( dst src1 src2 rep -- ) + [ two-operand ] keep { { char-16-rep [ PADDSB ] } { uchar-16-rep [ PADDUSB ] } { short-8-rep [ PADDSW ] } { ushort-8-rep [ PADDUSW ] } - } case drop ; + } case ; M: x86 %saturated-add-vector-reps { @@ -332,10 +337,11 @@ M: x86 %saturated-add-vector-reps } available-reps ; M: x86 %add-sub-vector ( dst src1 src2 rep -- ) + [ two-operand ] keep { { float-4-rep [ ADDSUBPS ] } { double-2-rep [ ADDSUBPD ] } - } case drop ; + } case ; M: x86 %add-sub-vector-reps { @@ -343,6 +349,7 @@ M: x86 %add-sub-vector-reps } available-reps ; M: x86 %sub-vector ( dst src1 src2 rep -- ) + [ two-operand ] keep { { float-4-rep [ SUBPS ] } { double-2-rep [ SUBPD ] } @@ -354,7 +361,7 @@ M: x86 %sub-vector ( dst src1 src2 rep -- ) { uint-4-rep [ PSUBD ] } { longlong-2-rep [ PSUBQ ] } { ulonglong-2-rep [ PSUBQ ] } - } case drop ; + } case ; M: x86 %sub-vector-reps { @@ -363,12 +370,13 @@ M: x86 %sub-vector-reps } available-reps ; M: x86 %saturated-sub-vector ( dst src1 src2 rep -- ) + [ two-operand ] keep { { char-16-rep [ PSUBSB ] } { uchar-16-rep [ PSUBUSB ] } { short-8-rep [ PSUBSW ] } { ushort-8-rep [ PSUBUSW ] } - } case drop ; + } case ; M: x86 %saturated-sub-vector-reps { @@ -376,6 +384,7 @@ M: x86 %saturated-sub-vector-reps } available-reps ; M: x86 %mul-vector ( dst src1 src2 rep -- ) + [ two-operand ] keep { { float-4-rep [ MULPS ] } { double-2-rep [ MULPD ] } @@ -383,7 +392,7 @@ M: x86 %mul-vector ( dst src1 src2 rep -- ) { ushort-8-rep [ PMULLW ] } { int-4-rep [ PMULLD ] } { uint-4-rep [ PMULLD ] } - } case drop ; + } case ; M: x86 %mul-vector-reps { @@ -397,10 +406,11 @@ M: x86 %saturated-mul-vector-reps { } ; M: x86 %div-vector ( dst src1 src2 rep -- ) + [ two-operand ] keep { { float-4-rep [ DIVPS ] } { double-2-rep [ DIVPD ] } - } case drop ; + } case ; M: x86 %div-vector-reps { @@ -409,6 +419,7 @@ M: x86 %div-vector-reps } available-reps ; M: x86 %min-vector ( dst src1 src2 rep -- ) + [ two-operand ] keep { { char-16-rep [ PMINSB ] } { uchar-16-rep [ PMINUB ] } @@ -418,7 +429,7 @@ M: x86 %min-vector ( dst src1 src2 rep -- ) { uint-4-rep [ PMINUD ] } { float-4-rep [ MINPS ] } { double-2-rep [ MINPD ] } - } case drop ; + } case ; M: x86 %min-vector-reps { @@ -428,6 +439,7 @@ M: x86 %min-vector-reps } available-reps ; M: x86 %max-vector ( dst src1 src2 rep -- ) + [ two-operand ] keep { { char-16-rep [ PMAXSB ] } { uchar-16-rep [ PMAXUB ] } @@ -437,7 +449,7 @@ M: x86 %max-vector ( dst src1 src2 rep -- ) { uint-4-rep [ PMAXUD ] } { float-4-rep [ MAXPS ] } { double-2-rep [ MAXPD ] } - } case drop ; + } case ; M: x86 %max-vector-reps { @@ -482,11 +494,12 @@ M: x86 %sqrt-vector-reps } available-reps ; M: x86 %and-vector ( dst src1 src2 rep -- ) + [ two-operand ] keep { { float-4-rep [ ANDPS ] } { double-2-rep [ ANDPD ] } [ drop PAND ] - } case drop ; + } case ; M: x86 %and-vector-reps { @@ -495,11 +508,12 @@ M: x86 %and-vector-reps } available-reps ; M: x86 %or-vector ( dst src1 src2 rep -- ) + [ two-operand ] keep { { float-4-rep [ ORPS ] } { double-2-rep [ ORPD ] } [ drop POR ] - } case drop ; + } case ; M: x86 %or-vector-reps { @@ -508,11 +522,12 @@ M: x86 %or-vector-reps } available-reps ; M: x86 %xor-vector ( dst src1 src2 rep -- ) + [ two-operand ] keep { { float-4-rep [ XORPS ] } { double-2-rep [ XORPD ] } [ drop PXOR ] - } case drop ; + } case ; M: x86 %xor-vector-reps { @@ -521,6 +536,7 @@ M: x86 %xor-vector-reps } available-reps ; M: x86 %shl-vector ( dst src1 src2 rep -- ) + [ two-operand ] keep { { short-8-rep [ PSLLW ] } { ushort-8-rep [ PSLLW ] } @@ -528,7 +544,7 @@ M: x86 %shl-vector ( dst src1 src2 rep -- ) { uint-4-rep [ PSLLD ] } { longlong-2-rep [ PSLLQ ] } { ulonglong-2-rep [ PSLLQ ] } - } case drop ; + } case ; M: x86 %shl-vector-reps { @@ -536,13 +552,14 @@ M: x86 %shl-vector-reps } available-reps ; M: x86 %shr-vector ( dst src1 src2 rep -- ) + [ two-operand ] keep { { short-8-rep [ PSRAW ] } { ushort-8-rep [ PSRLW ] } { int-4-rep [ PSRAD ] } { uint-4-rep [ PSRLD ] } { ulonglong-2-rep [ PSRLQ ] } - } case drop ; + } case ; M: x86 %shr-vector-reps {