compiler.cfg.representations: add more peephole optimizations to reduce fixnum tagging and untagging overhead

db4
Slava Pestov 2010-04-24 20:05:52 -04:00
parent a141df595b
commit 456743a6ce
6 changed files with 493 additions and 123 deletions

View File

@ -79,6 +79,8 @@ PRIVATE>
: loop-nesting-at ( bb -- n ) loop-nesting get at 0 or ;
: current-loop-nesting ( -- n ) basic-block get loop-nesting-at ;
: needs-loops ( cfg -- cfg' )
needs-predecessors
dup loops-valid?>> [ detect-loops t >>loops-valid? ] unless ;

View File

@ -1,20 +1,43 @@
! Copyright (C) 2010 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: accessors combinators combinators.short-circuit kernel
layouts math namespaces cpu.architecture
layouts locals make math namespaces sequences cpu.architecture
compiler.cfg.registers
compiler.cfg.instructions
compiler.cfg.representations.rewrite ;
compiler.cfg.representations.rewrite
compiler.cfg.representations.selection ;
IN: compiler.cfg.representations.peephole
! Representation selection performs some peephole optimizations
! when inserting conversions to optimize for a few common cases
M: ##load-integer conversions-for-insn
GENERIC: optimize-insn ( insn -- )
SYMBOL: insn-index
: here ( -- )
building get length 1 - insn-index set ;
: finish ( insn -- ) , here ;
: unchanged ( insn -- )
[ no-use-conversion ] [ finish ] [ no-def-conversion ] tri ;
: last-insn ( -- insn ) insn-index get building get nth ;
M: vreg-insn conversions-for-insn
init-renaming-set
optimize-insn
last-insn perform-renaming ;
M: vreg-insn optimize-insn
[ emit-use-conversion ] [ finish ] [ emit-def-conversion ] tri ;
M: ##load-integer optimize-insn
{
{
[ dup dst>> rep-of tagged-rep? ]
[ [ dst>> ] [ val>> tag-fixnum ] bi ##load-tagged ]
[ [ dst>> ] [ val>> tag-fixnum ] bi ##load-tagged here ]
}
[ call-next-method ]
} cond ;
@ -48,19 +71,19 @@ M: ##load-integer conversions-for-insn
: (convert-to-zero/fill-vector) ( insn -- dst rep )
dst>> dup rep-of ; inline
M: ##load-reference conversions-for-insn
M: ##load-reference optimize-insn
{
{
[ dup convert-to-load-double? ]
[ (convert-to-load-double) ##load-double ]
[ (convert-to-load-double) ##load-double here ]
}
{
[ dup convert-to-zero-vector? ]
[ (convert-to-zero/fill-vector) ##zero-vector ]
[ (convert-to-zero/fill-vector) ##zero-vector here ]
}
{
[ dup convert-to-fill-vector? ]
[ (convert-to-zero/fill-vector) ##fill-vector ]
[ (convert-to-zero/fill-vector) ##fill-vector here ]
}
[ call-next-method ]
} cond ;
@ -71,21 +94,42 @@ M: ##load-reference conversions-for-insn
! Into either
! ##shl-imm by X - tag-bits, or
! ##sar-imm by tag-bits - X.
: combine-shl-imm? ( insn -- ? )
src1>> rep-of tagged-rep? ;
: combine-shl-imm-input? ( insn -- ? )
;
: combine-shl-imm ( insn -- )
: combine-shl-imm-input ( insn -- )
[ dst>> ] [ src1>> ] [ src2>> ] tri tag-bits get {
{ [ 2dup < ] [ swap - ##sar-imm ] }
{ [ 2dup > ] [ - ##shl-imm ] }
[ 2drop int-rep ##copy ]
{ [ 2dup < ] [ swap - ##sar-imm here ] }
{ [ 2dup > ] [ - ##shl-imm here ] }
[ 2drop int-rep ##copy here ]
} cond ;
M: ##shl-imm conversions-for-insn
: inert-tag/untag-imm? ( insn -- ? )
[ dst>> ] [ src1>> ] bi [ rep-of tagged-rep? ] both? ;
M: ##shl-imm optimize-insn
{
{
[ dup combine-shl-imm? ]
[ [ combine-shl-imm ] [ emit-def-conversion ] bi ]
[ dup inert-tag/untag-imm? ]
[ unchanged ]
}
{
[ dup dst>> rep-of tagged-rep? ]
[
[ emit-use-conversion ]
[ [ tag-bits get + ] change-src2 finish ]
[ no-def-conversion ]
tri
]
}
{
[ dup src1>> rep-of tagged-rep? ]
[
[ no-use-conversion ]
[ combine-shl-imm-input ]
[ emit-def-conversion ]
tri
]
}
[ call-next-method ]
} cond ;
@ -103,13 +147,90 @@ M: ##shl-imm conversions-for-insn
} 1&& ;
: combine-sar-imm ( insn -- )
[ dst>> ] [ src1>> ] [ src2>> tag-bits get + ] tri ##sar-imm ;
[ dst>> ] [ src1>> ] [ src2>> tag-bits get + ] tri ##sar-imm here ;
M: ##sar-imm conversions-for-insn
M: ##sar-imm optimize-insn
{
{
[ dup combine-sar-imm? ]
[ [ combine-sar-imm ] [ emit-def-conversion ] bi ]
[
[ no-use-conversion ]
[ combine-sar-imm ]
[ emit-def-conversion ]
tri
]
}
[ call-next-method ]
} cond ;
! Peephole optimization: for X = add, sub, and, or, xor, min, max
! we have
! tag(untag(a) X untag(b)) = a X b
!
! so if all inputs and outputs of ##X or ##X-imm are tagged,
! don't have to insert any conversions
: inert-tag/untag? ( insn -- ? )
{
[ dst>> rep-of tagged-rep? ]
[ src1>> rep-of tagged-rep? ]
[ src2>> rep-of tagged-rep? ]
} 1&& ;
M: inert-tag-untag-insn optimize-insn
{
{ [ dup inert-tag/untag? ] [ unchanged ] }
[ call-next-method ]
} cond ;
! -imm variant of above
M: inert-tag-untag-imm-insn optimize-insn
{
{ [ dup inert-tag/untag-imm? ] [ [ tag-fixnum ] change-src2 unchanged ] }
[ call-next-method ]
} cond ;
M: ##mul-imm optimize-insn
{
{ [ dup inert-tag/untag-imm? ] [ unchanged ] }
{ [ dup dst>> rep-of tagged-rep? ] [ [ tag-fixnum ] change-src2 unchanged ] }
[ call-next-method ]
} cond ;
: inert-tag/untag-unary? ( insn -- ? )
[ dst>> ] [ src>> ] bi [ rep-of tagged-rep? ] both? ;
: combine-neg-tag ( insn -- )
[ dst>> ] [ src>> ] bi tag-bits get 2^ neg ##mul-imm here ;
M: ##neg optimize-insn
{
{ [ dup inert-tag/untag-unary? ] [ unchanged ] }
{
[ dup dst>> rep-of tagged-rep? ]
[
[ emit-use-conversion ]
[ combine-neg-tag ]
[ no-def-conversion ] tri
]
}
[ call-next-method ]
} cond ;
:: emit-tagged-not ( insn -- )
tagged-rep next-vreg-rep :> temp
temp insn src>> ##not
insn dst>> temp tag-mask get ##xor-imm here ;
M: ##not optimize-insn
{
{
[ dup inert-tag/untag-unary? ]
[
[ no-use-conversion ]
[ emit-tagged-not ]
[ no-def-conversion ]
tri
]
}
[ call-next-method ]
} cond ;

View File

@ -79,15 +79,52 @@ V{
[ 1 ] [ 1 get instructions>> [ ##allot? ] count ] unit-test
! Converting a ##load-integer into a ##load-tagged
! Don't dereference the result of a peek
V{
T{ ##prologue }
T{ ##branch }
} 0 test-bb
V{
T{ ##load-integer f 1 100 }
T{ ##replace f 1 D 0 }
T{ ##peek f 1 D 0 }
T{ ##branch }
} 1 test-bb
V{
T{ ##add-float f 2 1 1 }
T{ ##replace f 2 D 0 }
T{ ##epilogue }
T{ ##return }
} 2 test-bb
V{
T{ ##add-float f 3 1 1 }
T{ ##replace f 3 D 0 }
T{ ##epilogue }
T{ ##return }
} 3 test-bb
0 1 edge
1 { 2 3 } edges
[ ] [ test-representations ] unit-test
[
V{
T{ ##peek f 1 D 0 }
T{ ##branch }
}
] [ 1 get instructions>> ] unit-test
! But its ok to untag-fixnum the result of a peek if there are
! no usages of it as a tagged-rep
V{
T{ ##prologue }
T{ ##branch }
} 0 test-bb
V{
T{ ##peek f 1 D 0 }
T{ ##branch }
} 1 test-bb
@ -96,14 +133,33 @@ V{
T{ ##return }
} 2 test-bb
V{
T{ ##add f 2 1 1 }
T{ ##replace f 2 D 0 }
T{ ##branch }
} 3 test-bb
V{
T{ ##epilogue }
T{ ##return }
} 4 test-bb
0 1 edge
1 2 edge
1 { 2 3 } edges
3 { 3 4 } edges
2 4 edge
3 \ vreg-counter set-global
[ ] [ test-representations ] unit-test
[ T{ ##load-tagged f 1 $[ 100 tag-fixnum ] } ]
[ 1 get instructions>> first ]
unit-test
[
V{
T{ ##peek f 4 D 0 }
T{ ##sar-imm f 1 4 $[ tag-bits get ] }
T{ ##branch }
}
] [ 1 get instructions>> ] unit-test
! scalar-rep => int-rep conversion
V{
@ -115,8 +171,7 @@ V{
T{ ##peek f 1 D 0 }
T{ ##peek f 2 D 0 }
T{ ##vector>scalar f 3 2 int-4-rep }
T{ ##shl f 4 1 3 }
T{ ##replace f 4 D 0 }
T{ ##replace f 3 D 0 }
T{ ##branch }
} 1 test-bb
@ -208,75 +263,252 @@ cpu x86.32? [
[ t ] [ 4 get instructions>> first ##phi? ] unit-test
] when
! Peephole optimization if input to ##shl-imm is tagged
3 \ vreg-counter set-global
: test-peephole ( insns -- insns )
0 test-bb
test-representations
0 get instructions>> ;
! Converting a ##load-integer into a ##load-tagged
V{
T{ ##peek f 1 D 0 }
T{ ##shl-imm f 2 1 3 }
T{ ##replace f 2 D 0 }
T{ ##prologue }
T{ ##branch }
} 0 test-bb
[ ] [ test-representations ] unit-test
[
V{
T{ ##load-tagged f 1 $[ 100 tag-fixnum ] }
T{ ##replace f 1 D 0 }
}
] [
V{
T{ ##load-integer f 1 100 }
T{ ##replace f 1 D 0 }
} test-peephole
] unit-test
! Peephole optimization if input to ##shl-imm is tagged
3 \ vreg-counter set-global
[
V{
T{ ##peek f 1 D 0 }
T{ ##sar-imm f 2 1 1 }
T{ ##shl-imm f 4 2 $[ tag-bits get ] }
T{ ##replace f 4 D 0 }
T{ ##add f 4 2 2 }
T{ ##shl-imm f 3 4 $[ tag-bits get ] }
T{ ##replace f 3 D 0 }
}
] [ 0 get instructions>> ] unit-test
] [
V{
T{ ##peek f 1 D 0 }
T{ ##shl-imm f 2 1 3 }
T{ ##add f 3 2 2 }
T{ ##replace f 3 D 0 }
} test-peephole
] unit-test
V{
T{ ##peek f 1 D 0 }
T{ ##shl-imm f 2 1 10 }
T{ ##replace f 2 D 0 }
} 0 test-bb
[ ] [ test-representations ] unit-test
3 \ vreg-counter set-global
[
V{
T{ ##peek f 1 D 0 }
T{ ##shl-imm f 2 1 $[ 10 tag-bits get - ] }
T{ ##shl-imm f 5 2 $[ tag-bits get ] }
T{ ##replace f 5 D 0 }
T{ ##add f 4 2 2 }
T{ ##shl-imm f 3 4 $[ tag-bits get ] }
T{ ##replace f 3 D 0 }
}
] [ 0 get instructions>> ] unit-test
V{
T{ ##peek f 1 D 0 }
T{ ##shl-imm f 2 1 $[ tag-bits get ] }
T{ ##replace f 2 D 0 }
} 0 test-bb
[ ] [ test-representations ] unit-test
] [
V{
T{ ##peek f 1 D 0 }
T{ ##shl-imm f 2 1 10 }
T{ ##add f 3 2 2 }
T{ ##replace f 3 D 0 }
} test-peephole
] unit-test
[
V{
T{ ##peek f 1 D 0 }
T{ ##copy f 2 1 int-rep }
T{ ##shl-imm f 6 2 $[ tag-bits get ] }
T{ ##replace f 6 D 0 }
T{ ##add f 5 2 2 }
T{ ##shl-imm f 3 5 $[ tag-bits get ] }
T{ ##replace f 3 D 0 }
}
] [ 0 get instructions>> ] unit-test
] [
V{
T{ ##peek f 1 D 0 }
T{ ##shl-imm f 2 1 $[ tag-bits get ] }
T{ ##add f 3 2 2 }
T{ ##replace f 3 D 0 }
} test-peephole
] unit-test
! Peephole optimization if output of ##shl-imm needs to be tagged
[
V{
T{ ##load-integer f 1 100 }
T{ ##shl-imm f 2 1 $[ 3 tag-bits get + ] }
T{ ##replace f 2 D 0 }
}
] [
V{
T{ ##load-integer f 1 100 }
T{ ##shl-imm f 2 1 3 }
T{ ##replace f 2 D 0 }
} test-peephole
] unit-test
! Peephole optimization if both input and output of ##shl-imm
! needs to be tagged
[
V{
T{ ##peek f 0 D 0 }
T{ ##shl-imm f 1 0 3 }
T{ ##replace f 1 D 0 }
}
] [
V{
T{ ##peek f 0 D 0 }
T{ ##shl-imm f 1 0 3 }
T{ ##replace f 1 D 0 }
} test-peephole
] unit-test
6 \ vreg-counter set-global
! Peephole optimization if input to ##sar-imm is tagged
V{
T{ ##peek f 1 D 0 }
T{ ##sar-imm f 2 1 3 }
T{ ##replace f 2 D 0 }
} 0 test-bb
[ ] [ test-representations ] unit-test
[
V{
T{ ##peek f 1 D 0 }
T{ ##sar-imm f 2 1 $[ 3 tag-bits get + ] }
T{ ##shl-imm f 7 2 $[ tag-bits get ] }
T{ ##replace f 7 D 0 }
T{ ##sar-imm f 7 1 $[ 3 tag-bits get + ] }
T{ ##shl-imm f 2 7 $[ tag-bits get ] }
T{ ##replace f 2 D 0 }
}
] [ 0 get instructions>> ] unit-test
] [
V{
T{ ##peek f 1 D 0 }
T{ ##sar-imm f 2 1 3 }
T{ ##replace f 2 D 0 }
} test-peephole
] unit-test
! Tag/untag elimination
[
V{
T{ ##peek f 1 D 0 }
T{ ##add-imm f 2 1 $[ 100 tag-fixnum ] }
T{ ##replace f 2 D 0 }
}
] [
V{
T{ ##peek f 1 D 0 }
T{ ##add-imm f 2 1 100 }
T{ ##replace f 2 D 0 }
} test-peephole
] unit-test
[
V{
T{ ##peek f 0 D 0 }
T{ ##peek f 1 D 1 }
T{ ##add f 2 0 1 }
T{ ##replace f 2 D 0 }
}
] [
V{
T{ ##peek f 0 D 0 }
T{ ##peek f 1 D 1 }
T{ ##add f 2 0 1 }
T{ ##replace f 2 D 0 }
} test-peephole
] unit-test
! Tag/untag elimination for ##mul-imm
[
V{
T{ ##peek f 0 D 0 }
T{ ##mul-imm f 1 0 100 }
T{ ##replace f 1 D 0 }
}
] [
V{
T{ ##peek f 0 D 0 }
T{ ##mul-imm f 1 0 100 }
T{ ##replace f 1 D 0 }
} test-peephole
] unit-test
4 \ vreg-counter set-global
[
V{
T{ ##peek f 0 D 0 }
T{ ##peek f 1 D 1 }
T{ ##sar-imm f 5 1 $[ tag-bits get ] }
T{ ##add-imm f 2 5 30 }
T{ ##mul-imm f 3 2 $[ 100 tag-fixnum ] }
T{ ##replace f 3 D 0 }
}
] [
V{
T{ ##peek f 0 D 0 }
T{ ##peek f 1 D 1 }
T{ ##add-imm f 2 1 30 }
T{ ##mul-imm f 3 2 100 }
T{ ##replace f 3 D 0 }
} test-peephole
] unit-test
! Tag/untag elimination for ##neg
[
V{
T{ ##peek f 0 D 0 }
T{ ##neg f 1 0 }
T{ ##replace f 1 D 0 }
}
] [
V{
T{ ##peek f 0 D 0 }
T{ ##neg f 1 0 }
T{ ##replace f 1 D 0 }
} test-peephole
] unit-test
4 \ vreg-counter set-global
[
V{
T{ ##peek f 5 D 0 }
T{ ##sar-imm f 0 5 $[ tag-bits get ] }
T{ ##peek f 6 D 1 }
T{ ##sar-imm f 1 6 $[ tag-bits get ] }
T{ ##mul f 2 0 1 }
T{ ##mul-imm f 3 2 -16 }
T{ ##replace f 3 D 0 }
}
] [
V{
T{ ##peek f 0 D 0 }
T{ ##peek f 1 D 1 }
T{ ##mul f 2 0 1 }
T{ ##neg f 3 2 }
T{ ##replace f 3 D 0 }
} test-peephole
] unit-test
! Tag/untag elimination for ##not
2 \ vreg-counter set-global
[
V{
T{ ##peek f 0 D 0 }
T{ ##not f 3 0 }
T{ ##xor-imm f 1 3 $[ tag-mask get ] }
T{ ##replace f 1 D 0 }
}
] [
V{
T{ ##peek f 0 D 0 }
T{ ##not f 1 0 }
T{ ##replace f 1 D 0 }
} test-peephole
] unit-test

View File

@ -19,6 +19,7 @@ IN: compiler.cfg.representations
{
[ compute-possibilities ]
[ compute-restrictions ]
[ compute-representations ]
[ compute-phi-representations ]
[ insert-conversions ]

View File

@ -60,9 +60,15 @@ SYMBOLS: renaming-set needs-renaming? ;
: emit-use-conversion ( insn -- )
[ [ (emit-use-conversion) ] (compute-renaming-set) ] each-use-rep ;
: no-use-conversion ( insn -- )
[ drop no-renaming ] each-use-rep ;
: emit-def-conversion ( insn -- )
[ [ (emit-def-conversion) ] (compute-renaming-set) ] each-def-rep ;
: no-def-conversion ( insn -- )
[ drop no-renaming ] each-def-rep ;
: converted-value ( vreg -- vreg' )
renaming-set get pop first2 [ assert= ] dip ;
@ -75,21 +81,10 @@ RENAMING: convert [ converted-value ] [ converted-value ] [ ]
renaming-set get length 0 assert=
] [ drop ] if ;
: with-conversions ( insn -- quot )
init-renaming-set [ perform-renaming ] bi ; inline
GENERIC: conversions-for-insn ( insn -- )
M: ##phi conversions-for-insn , ;
M: vreg-insn conversions-for-insn
[
[ emit-use-conversion ]
[ , ]
[ emit-def-conversion ]
tri
] with-conversions ;
M: insn conversions-for-insn , ;
: conversions-for-block ( bb -- )

View File

@ -17,23 +17,21 @@ SYMBOL: possibilities
H{ } clone [ '[ swap _ adjoin-at ] with-vreg-reps ] keep
[ members ] assoc-map possibilities set ;
! Compute vregs which must remain tagged for their lifetime.
SYMBOL: always-boxed
:: (compute-always-boxed) ( vreg rep assoc -- )
! Compute vregs for which dereferencing cannot be hoisted past
! conditionals, because they might be immediate.
:: check-restriction ( vreg rep -- )
rep tagged-rep eq? [
tagged-rep vreg assoc set-at
vreg possibilities get
[ { tagged-rep int-rep } intersect ] change-at
] when ;
: compute-always-boxed ( cfg -- assoc )
H{ } clone [
'[
[
dup ##load-reference?
[ drop ] [ [ _ (compute-always-boxed) ] each-def-rep ] if
] each-non-phi
] each-basic-block
] keep ;
: compute-restrictions ( cfg -- )
[
[
dup ##load-reference?
[ drop ] [ [ check-restriction ] each-def-rep ] if
] each-non-phi
] each-basic-block ;
! For every vreg, compute the cost of keeping it in every possible
! representation.
@ -42,36 +40,61 @@ SYMBOL: always-boxed
SYMBOL: costs
: init-costs ( -- )
possibilities get [ drop H{ } clone ] assoc-map costs set ;
! Initialize cost as 0 for each possibility.
possibilities get [ [ 0 ] H{ } map>assoc ] assoc-map costs set ;
: record-possibility ( rep vreg -- )
costs get at [ 0 or ] change-at ;
: 10^ ( n -- x ) 10 <repetition> product ;
: increase-cost ( rep vreg -- )
: increase-cost ( rep vreg factor -- )
! Increase cost of keeping vreg in rep, making a choice of rep less
! likely.
costs get at [ 0 or basic-block get loop-nesting-at 1 + + ] change-at ;
! likely. If the rep is not in the cost alist, it means this
! representation is prohibited.
[ costs get at 2dup key? ] dip
'[ [ current-loop-nesting 10^ _ * + ] change-at ] [ 2drop ] if ;
: maybe-increase-cost ( possible vreg preferred -- )
pick eq? [ record-possibility ] [ increase-cost ] if ;
:: increase-costs ( vreg preferred factor -- )
vreg possible [
dup preferred eq? [ drop ] [ vreg factor increase-cost ] if
] each ; inline
: representation-cost ( vreg preferred -- )
! 'preferred' is a representation that the instruction can accept with no cost.
! So, for each representation that's not preferred, increase the cost of keeping
! the vreg in that representation.
[ drop possible ]
[ '[ _ _ maybe-increase-cost ] ]
2bi each ;
UNION: inert-tag-untag-insn
##add
##sub
##and
##or
##xor
##min
##max ;
UNION: inert-tag-untag-imm-insn
##add-imm
##sub-imm
##and-imm
##or-imm
##xor-imm ;
GENERIC: has-peephole-opts? ( insn -- ? )
M: insn has-peephole-opts? drop f ;
M: ##load-integer has-peephole-opts? drop t ;
M: ##load-reference has-peephole-opts? drop t ;
M: inert-tag-untag-insn has-peephole-opts? drop t ;
M: inert-tag-untag-imm-insn has-peephole-opts? drop t ;
M: ##mul-imm has-peephole-opts? drop t ;
M: ##shl-imm has-peephole-opts? drop t ;
M: ##shr-imm has-peephole-opts? drop t ;
M: ##sar-imm has-peephole-opts? drop t ;
M: ##neg has-peephole-opts? drop t ;
M: ##not has-peephole-opts? drop t ;
GENERIC: compute-insn-costs ( insn -- )
! There's no cost to converting a constant's representation
M: ##load-integer compute-insn-costs drop ;
M: ##load-reference compute-insn-costs drop ;
M: insn compute-insn-costs drop ;
M: insn compute-insn-costs [ representation-cost ] each-rep ;
M: vreg-insn compute-insn-costs
dup has-peephole-opts? 2 5 ? '[ _ increase-costs ] each-rep ;
: compute-costs ( cfg -- costs )
: compute-costs ( cfg -- )
init-costs
[
[ basic-block set ]
@ -80,8 +103,7 @@ M: insn compute-insn-costs [ representation-cost ] each-rep ;
compute-insn-costs
] each-non-phi
] bi
] each-basic-block
costs get ;
] each-basic-block ;
! For every vreg, compute preferred representation, that minimizes costs.
: minimize-costs ( costs -- representations )
@ -89,10 +111,7 @@ M: insn compute-insn-costs [ representation-cost ] each-rep ;
[ >alist alist-min first ] assoc-map ;
: compute-representations ( cfg -- )
[ compute-costs minimize-costs ]
[ compute-always-boxed ]
bi assoc-union
representations set ;
compute-costs costs get minimize-costs representations set ;
! PHI nodes require special treatment
! If the output of a phi instruction is only used as the input to another