From 2ce926594d129a62f215025e7712908c6ef57524 Mon Sep 17 00:00:00 2001
From: Slava Pestov <slava@slava-pestovs-macbook-pro.local>
Date: Fri, 23 Apr 2010 04:17:41 -0400
Subject: [PATCH] compiler.cfg.value-numbering: new optimizations;
 reassociation for shifts and redistribution for shifts/multiplies over
 additions/subtractions

---
 .../cfg/value-numbering/math/math.factor      |  73 ++++++-
 .../value-numbering-tests.factor              | 183 ++++++++++++++++++
 basis/cpu/architecture/architecture.factor    |   8 +-
 3 files changed, 257 insertions(+), 7 deletions(-)

diff --git a/basis/compiler/cfg/value-numbering/math/math.factor b/basis/compiler/cfg/value-numbering/math/math.factor
index bbc2d5a169..1ea2135db4 100644
--- a/basis/compiler/cfg/value-numbering/math/math.factor
+++ b/basis/compiler/cfg/value-numbering/math/math.factor
@@ -1,7 +1,8 @@
 ! Copyright (C) 2010 Slava Pestov.
 ! See http://factorcode.org/license.txt for BSD license.
 USING: accessors combinators cpu.architecture fry kernel layouts
-math sequences compiler.cfg.instructions
+locals make math sequences compiler.cfg.instructions
+compiler.cfg.registers
 compiler.cfg.value-numbering.expressions
 compiler.cfg.value-numbering.folding
 compiler.cfg.value-numbering.graph
@@ -9,10 +10,12 @@ compiler.cfg.value-numbering.rewrite
 compiler.cfg.value-numbering.simplify ;
 IN: compiler.cfg.value-numbering.math
 
+: f-expr? ( expr -- ? ) T{ reference-expr f f } = ;
+
 M: ##tagged>integer rewrite
     [ dst>> ] [ src>> vreg>expr ] bi {
         { [ dup integer-expr? ] [ value>> tag-fixnum \ ##load-integer new-insn ] }
-        { [ dup reference-expr? ] [ value>> [ drop f ] [ \ f type-number \ ##load-integer new-insn ] if ] }
+        { [ dup f-expr? ] [ \ f type-number \ ##load-integer new-insn ] }
         [ 2drop f ]
     } cond ;
 
@@ -22,13 +25,22 @@ M: ##neg rewrite
 M: ##not rewrite
     dup unary-constant-fold? [ unary-constant-fold ] [ drop f ] if ;
 
-: reassociate ( insn -- dst src1 src2 )
+! Reassociation converts
+! ## *-imm 2 1 X
+! ## *-imm 3 2 Y
+! into
+! ## *-imm 3 1 (X $ Y)
+! If * is associative, then $ is the same operation as *.
+! In the case of shifts, $ is addition.
+: (reassociate) ( insn -- dst src1 src2' src2'' )
     {
         [ dst>> ]
         [ src1>> vreg>expr [ src1>> vn>vreg ] [ src2>> vn>integer ] bi ]
         [ src2>> ]
-        [ ]
-    } cleave binary-constant-fold* ;
+    } cleave ; inline
+
+: reassociate ( insn -- dst src1 src2 )
+    [ (reassociate) ] keep binary-constant-fold* ;
 
 : ?new-insn ( dst src1 src2 ? class -- insn/f )
     '[ _ new-insn ] [ 3drop f ] if ; inline
@@ -39,6 +51,9 @@ M: ##not rewrite
 : reassociate-bitwise ( insn new-insn -- insn/f )
     [ reassociate dup immediate-bitwise? ] dip ?new-insn ; inline
 
+: reassociate-shift ( insn new-insn -- insn/f )
+    [ (reassociate) + dup immediate-shift-count? ] dip ?new-insn ; inline
+
 M: ##add-imm rewrite
     {
         { [ dup binary-constant-fold? ] [ binary-constant-fold ] }
@@ -56,24 +71,57 @@ M: ##sub-imm rewrite
         [ sub-imm>add-imm ]
     } cond ;
 
+! Convert ##mul-imm -1 => ##neg
 : mul-to-neg? ( insn -- ? )
     src2>> -1 = ;
 
 : mul-to-neg ( insn -- insn' )
     [ dst>> ] [ src1>> ] bi \ ##neg new-insn ;
 
+! Convert ##mul-imm 2^X => ##shl-imm X
 : mul-to-shl? ( insn -- ? )
     src2>> power-of-2? ;
 
 : mul-to-shl ( insn -- insn' )
     [ [ dst>> ] [ src1>> ] bi ] [ src2>> log2 ] bi \ ##shl-imm new-insn ;
 
+! Distribution converts
+! ##+-imm 2 1 X
+! ##*-imm 3 2 Y
+! Into
+! ##*-imm 4 1 Y
+! ##+-imm 3 4 X*Y
+! Where * is mul or shl, + is add or sub
+! Have to make sure that X*Y fits in an immediate
+:: (distribute) ( insn expr imm temp add-op mul-op -- new-insns/f )
+    imm immediate-arithmetic? [
+        [
+            temp expr src1>> vn>vreg insn src2>> mul-op execute
+            insn dst>> temp imm add-op execute
+        ] { } make
+    ] [ f ] if ;
+
+: distribute-over-add? ( insn -- ? )
+    src1>> vreg>expr add-imm-expr? ;
+
+: distribute-over-sub? ( insn -- ? )
+    src1>> vreg>expr sub-imm-expr? ;
+
+: distribute ( insn add-op mul-op -- new-insns/f )
+    [
+        dup src1>> vreg>expr
+        2dup src2>> vn>integer swap [ src2>> ] keep binary-constant-fold*
+        next-vreg
+    ] 2dip (distribute) ;
+
 M: ##mul-imm rewrite
     {
         { [ dup binary-constant-fold? ] [ binary-constant-fold ] }
         { [ dup mul-to-neg? ] [ mul-to-neg ] }
         { [ dup mul-to-shl? ] [ mul-to-shl ] }
         { [ dup src1>> vreg>expr mul-imm-expr? ] [ \ ##mul-imm reassociate-arithmetic ] }
+        { [ dup distribute-over-add? ] [ \ ##add-imm \ ##mul-imm distribute ] }
+        { [ dup distribute-over-sub? ] [ \ ##sub-imm \ ##mul-imm distribute ] }
         [ drop f ]
     } cond ;
 
@@ -101,21 +149,31 @@ M: ##xor-imm rewrite
 M: ##shl-imm rewrite
     {
         { [ dup binary-constant-fold? ] [ binary-constant-fold ] }
+        { [ dup src1>> vreg>expr shl-imm-expr? ] [ \ ##shl-imm reassociate-shift ] }
+        { [ dup distribute-over-add? ] [ \ ##add-imm \ ##shl-imm distribute ] }
+        { [ dup distribute-over-sub? ] [ \ ##sub-imm \ ##shl-imm distribute ] }
         [ drop f ]
     } cond ;
 
 M: ##shr-imm rewrite
     {
         { [ dup binary-constant-fold? ] [ binary-constant-fold ] }
+        { [ dup src1>> vreg>expr shr-imm-expr? ] [ \ ##shr-imm reassociate-shift ] }
         [ drop f ]
     } cond ;
 
 M: ##sar-imm rewrite
     {
         { [ dup binary-constant-fold? ] [ binary-constant-fold ] }
+        { [ dup src1>> vreg>expr sar-imm-expr? ] [ \ ##sar-imm reassociate-shift ] }
         [ drop f ]
     } cond ;
 
+! Convert
+! ##load-integer 2 X
+! ##* 3 1 2
+! Where * is an operation with an -imm equivalent into
+! ##*-imm 3 1 X
 : insn>imm-insn ( insn op swap? -- new-insn )
     swap [
         [ [ dst>> ] [ src1>> ] [ src2>> ] tri ] dip
@@ -129,12 +187,17 @@ M: ##add rewrite
         [ drop f ]
     } cond ;
 
+! ##sub 2 1 1 => ##load-integer 2 0
 : subtraction-identity? ( insn -- ? )
     [ src1>> ] [ src2>> ] bi [ vreg>vn ] bi@ eq? ;
 
 : rewrite-subtraction-identity ( insn -- insn' )
     dst>> 0 \ ##load-integer new-insn ;
 
+! ##load-integer 1 0
+! ##sub 3 1 2
+! =>
+! ##neg 3 2
 : sub-to-neg? ( ##sub -- ? )
     src1>> vn>expr expr-zero? ;
 
diff --git a/basis/compiler/cfg/value-numbering/value-numbering-tests.factor b/basis/compiler/cfg/value-numbering/value-numbering-tests.factor
index f18f00aa76..bdf8b330af 100644
--- a/basis/compiler/cfg/value-numbering/value-numbering-tests.factor
+++ b/basis/compiler/cfg/value-numbering/value-numbering-tests.factor
@@ -1125,6 +1125,189 @@ cpu x86.32? [
     } value-numbering-step
 ] unit-test
 
+[
+    {
+        T{ ##peek f 0 D 0 }
+        T{ ##shl-imm f 1 0 10 }
+        T{ ##shl-imm f 2 0 21 }
+        T{ ##replace f 2 D 0 }
+    }
+] [
+    {
+        T{ ##peek f 0 D 0 }
+        T{ ##shl-imm f 1 0 10 }
+        T{ ##shl-imm f 2 1 11 }
+        T{ ##replace f 2 D 0 }
+    } value-numbering-step
+] unit-test
+
+[
+    {
+        T{ ##peek f 0 D 0 }
+        T{ ##shl-imm f 1 0 10 }
+        T{ ##shl-imm f 2 1 $[ cell-bits 1 - ] }
+        T{ ##replace f 2 D 0 }
+    }
+] [
+    {
+        T{ ##peek f 0 D 0 }
+        T{ ##shl-imm f 1 0 10 }
+        T{ ##shl-imm f 2 1 $[ cell-bits 1 - ] }
+        T{ ##replace f 2 D 0 }
+    } value-numbering-step
+] unit-test
+
+[
+    {
+        T{ ##peek f 0 D 0 }
+        T{ ##sar-imm f 1 0 10 }
+        T{ ##sar-imm f 2 0 21 }
+        T{ ##replace f 2 D 0 }
+    }
+] [
+    {
+        T{ ##peek f 0 D 0 }
+        T{ ##sar-imm f 1 0 10 }
+        T{ ##sar-imm f 2 1 11 }
+        T{ ##replace f 2 D 0 }
+    } value-numbering-step
+] unit-test
+
+[
+    {
+        T{ ##peek f 0 D 0 }
+        T{ ##sar-imm f 1 0 10 }
+        T{ ##sar-imm f 2 1 $[ cell-bits 1 - ] }
+        T{ ##replace f 2 D 0 }
+    }
+] [
+    {
+        T{ ##peek f 0 D 0 }
+        T{ ##sar-imm f 1 0 10 }
+        T{ ##sar-imm f 2 1 $[ cell-bits 1 - ] }
+        T{ ##replace f 2 D 0 }
+    } value-numbering-step
+] unit-test
+
+[
+    {
+        T{ ##peek f 0 D 0 }
+        T{ ##shr-imm f 1 0 10 }
+        T{ ##shr-imm f 2 0 21 }
+        T{ ##replace f 2 D 0 }
+    }
+] [
+    {
+        T{ ##peek f 0 D 0 }
+        T{ ##shr-imm f 1 0 10 }
+        T{ ##shr-imm f 2 1 11 }
+        T{ ##replace f 2 D 0 }
+    } value-numbering-step
+] unit-test
+
+[
+    {
+        T{ ##peek f 0 D 0 }
+        T{ ##shr-imm f 1 0 10 }
+        T{ ##shr-imm f 2 1 $[ cell-bits 1 - ] }
+        T{ ##replace f 2 D 0 }
+    }
+] [
+    {
+        T{ ##peek f 0 D 0 }
+        T{ ##shr-imm f 1 0 10 }
+        T{ ##shr-imm f 2 1 $[ cell-bits 1 - ] }
+        T{ ##replace f 2 D 0 }
+    } value-numbering-step
+] unit-test
+
+[
+    {
+        T{ ##peek f 0 D 0 }
+        T{ ##shr-imm f 1 0 10 }
+        T{ ##sar-imm f 2 1 11 }
+        T{ ##replace f 2 D 0 }
+    }
+] [
+    {
+        T{ ##peek f 0 D 0 }
+        T{ ##shr-imm f 1 0 10 }
+        T{ ##sar-imm f 2 1 11 }
+        T{ ##replace f 2 D 0 }
+    } value-numbering-step
+] unit-test
+
+! Distributive law
+2 \ vreg-counter set-global
+
+[
+    {
+        T{ ##peek f 0 D 0 }
+        T{ ##add-imm f 1 0 10 }
+        T{ ##shl-imm f 3 0 2 }
+        T{ ##add-imm f 2 3 40 }
+        T{ ##replace f 2 D 0 }
+    }
+] [
+    {
+        T{ ##peek f 0 D 0 }
+        T{ ##add-imm f 1 0 10 }
+        T{ ##shl-imm f 2 1 2 }
+        T{ ##replace f 2 D 0 }
+    } value-numbering-step
+] unit-test
+
+[
+    {
+        T{ ##peek f 0 D 0 }
+        T{ ##add-imm f 1 0 10 }
+        T{ ##mul-imm f 4 0 3 }
+        T{ ##add-imm f 2 4 30 }
+        T{ ##replace f 2 D 0 }
+    }
+] [
+    {
+        T{ ##peek f 0 D 0 }
+        T{ ##add-imm f 1 0 10 }
+        T{ ##mul-imm f 2 1 3 }
+        T{ ##replace f 2 D 0 }
+    } value-numbering-step
+] unit-test
+
+[
+    {
+        T{ ##peek f 0 D 0 }
+        T{ ##add-imm f 1 0 -10 }
+        T{ ##shl-imm f 5 0 2 }
+        T{ ##add-imm f 2 5 -40 }
+        T{ ##replace f 2 D 0 }
+    }
+] [
+    {
+        T{ ##peek f 0 D 0 }
+        T{ ##sub-imm f 1 0 10 }
+        T{ ##shl-imm f 2 1 2 }
+        T{ ##replace f 2 D 0 }
+    } value-numbering-step
+] unit-test
+
+[
+    {
+        T{ ##peek f 0 D 0 }
+        T{ ##add-imm f 1 0 -10 }
+        T{ ##mul-imm f 6 0 3 }
+        T{ ##add-imm f 2 6 -30 }
+        T{ ##replace f 2 D 0 }
+    }
+] [
+    {
+        T{ ##peek f 0 D 0 }
+        T{ ##sub-imm f 1 0 10 }
+        T{ ##mul-imm f 2 1 3 }
+        T{ ##replace f 2 D 0 }
+    } value-numbering-step
+] unit-test
+
 ! Simplification
 [
     {
diff --git a/basis/cpu/architecture/architecture.factor b/basis/cpu/architecture/architecture.factor
index 57a04d4c65..a2e5050edf 100644
--- a/basis/cpu/architecture/architecture.factor
+++ b/basis/cpu/architecture/architecture.factor
@@ -1,8 +1,9 @@
 ! Copyright (C) 2006, 2010 Slava Pestov.
 ! See http://factorcode.org/license.txt for BSD license.
 USING: accessors arrays assocs generic kernel kernel.private
-math memory namespaces make sequences layouts system hashtables
-classes alien byte-arrays combinators words sets fry ;
+math math.order memory namespaces make sequences layouts system
+hashtables classes alien byte-arrays combinators words sets fry
+;
 IN: cpu.architecture
 
 ! Representations -- these are like low-level types
@@ -523,6 +524,9 @@ M: object immediate-comparand? ( n -- ? )
         [ drop f ]
     } cond ;
 
+: immediate-shift-count? ( n -- ? )
+    0 cell-bits 1 - between? ;
+
 ! What c-type describes the implicit struct return pointer for
 ! large structs?
 HOOK: struct-return-pointer-type cpu ( -- c-type )