From e95cd256ec093a0768a730fb70114c8c1d97d143 Mon Sep 17 00:00:00 2001
From: Slava Pestov <slava@slava-pestovs-macbook-pro.local>
Date: Fri, 23 Apr 2010 04:18:31 -0400
Subject: [PATCH] compiler.cfg.representations: peephole optimizations are used
 to simplify the case where the input to a ##shl-imm or ##sar-imm needs to be
 untagged

---
 .../cfg/representations/peephole/authors.txt  |   1 +
 .../representations/peephole/peephole.factor  | 115 ++++++++++++++++++
 .../representations-tests.factor              |  78 +++++++++++-
 .../representations/representations.factor    |  14 ++-
 .../representations/rewrite/rewrite.factor    |  85 +++----------
 5 files changed, 220 insertions(+), 73 deletions(-)
 create mode 100644 basis/compiler/cfg/representations/peephole/authors.txt
 create mode 100644 basis/compiler/cfg/representations/peephole/peephole.factor

diff --git a/basis/compiler/cfg/representations/peephole/authors.txt b/basis/compiler/cfg/representations/peephole/authors.txt
new file mode 100644
index 0000000000..1901f27a24
--- /dev/null
+++ b/basis/compiler/cfg/representations/peephole/authors.txt
@@ -0,0 +1 @@
+Slava Pestov
diff --git a/basis/compiler/cfg/representations/peephole/peephole.factor b/basis/compiler/cfg/representations/peephole/peephole.factor
new file mode 100644
index 0000000000..94f9dd8aeb
--- /dev/null
+++ b/basis/compiler/cfg/representations/peephole/peephole.factor
@@ -0,0 +1,115 @@
+! Copyright (C) 2010 Slava Pestov.
+! See http://factorcode.org/license.txt for BSD license.
+USING: accessors combinators combinators.short-circuit kernel
+layouts math namespaces cpu.architecture
+compiler.cfg.registers
+compiler.cfg.instructions
+compiler.cfg.representations.rewrite ;
+IN: compiler.cfg.representations.peephole
+
+! Representation selection performs some peephole optimizations
+! when inserting conversions to optimize for a few common cases
+
+M: ##load-integer conversions-for-insn
+    {
+        {
+            [ dup dst>> rep-of tagged-rep? ]
+            [ [ dst>> ] [ val>> tag-fixnum ] bi ##load-tagged ]
+        }
+        [ call-next-method ]
+    } cond ;
+
+! When a float is unboxed, we replace the ##load-reference with a ##load-double
+! if the architecture supports it
+: convert-to-load-double? ( insn -- ? )
+    {
+        [ drop load-double? ]
+        [ dst>> rep-of double-rep? ]
+        [ obj>> float? ]
+    } 1&& ;
+
+! When a literal zeroes/ones vector is unboxed, we replace the ##load-reference
+! with a ##zero-vector or ##fill-vector instruction since this is more efficient.
+: convert-to-zero-vector? ( insn -- ? )
+    {
+        [ dst>> rep-of vector-rep? ]
+        [ obj>> B{ 0 0 0 0  0 0 0 0  0 0 0 0  0 0 0 0 } = ]
+    } 1&& ;
+
+: convert-to-fill-vector? ( insn -- ? )
+    {
+        [ dst>> rep-of vector-rep? ]
+        [ obj>> B{ 255 255 255 255  255 255 255 255  255 255 255 255  255 255 255 255 } = ]
+    } 1&& ;
+
+: (convert-to-load-double) ( insn -- dst val )
+    [ dst>> ] [ obj>> ] bi ; inline
+
+: (convert-to-zero/fill-vector) ( insn -- dst rep )
+    dst>> dup rep-of ; inline
+
+M: ##load-reference conversions-for-insn
+    {
+        {
+            [ dup convert-to-load-double? ]
+            [ (convert-to-load-double) ##load-double ]
+        }
+        {
+            [ dup convert-to-zero-vector? ]
+            [ (convert-to-zero/fill-vector) ##zero-vector ]
+        }
+        {
+            [ dup convert-to-fill-vector? ]
+            [ (convert-to-zero/fill-vector) ##fill-vector ]
+        }
+        [ call-next-method ]
+    } cond ;
+
+! Optimize this:
+! ##sar-imm temp src tag-bits
+! ##shl-imm dst temp X
+! Into either
+! ##shl-imm by X - tag-bits, or
+! ##sar-imm by tag-bits - X.
+: combine-shl-imm? ( insn -- ? )
+    src1>> rep-of tagged-rep? ;
+
+: combine-shl-imm ( insn -- )
+    [ dst>> ] [ src1>> ] [ src2>> ] tri tag-bits get {
+        { [ 2dup < ] [ swap - ##sar-imm ] }
+        { [ 2dup > ] [ - ##shl-imm ] }
+        [ 2drop int-rep ##copy ]
+    } cond ;
+
+M: ##shl-imm conversions-for-insn
+    {
+        {
+            [ dup combine-shl-imm? ]
+            [ [ combine-shl-imm ] [ emit-def-conversion ] bi ]
+        }
+        [ call-next-method ]
+    } cond ;
+
+! Optimize this:
+! ##sar-imm temp src tag-bits
+! ##sar-imm dst temp X
+! Into
+! ##sar-imm by X + tag-bits
+! assuming X + tag-bits is a valid shift count.
+: combine-sar-imm? ( insn -- ? )
+    {
+        [ src1>> rep-of tagged-rep? ]
+        [ src2>> tag-bits get + immediate-shift-count? ]
+    } 1&& ;
+
+: combine-sar-imm ( insn -- )
+    [ dst>> ] [ src1>> ] [ src2>> tag-bits get + ] tri ##sar-imm ;
+
+M: ##sar-imm conversions-for-insn
+    {
+        {
+            [ dup combine-sar-imm? ]
+            [ [ combine-sar-imm ] [ emit-def-conversion ] bi ]
+        }
+        [ call-next-method ]
+    } cond ;
diff --git a/basis/compiler/cfg/representations/representations-tests.factor b/basis/compiler/cfg/representations/representations-tests.factor
index 35e56f5489..fb03dfa2ea 100644
--- a/basis/compiler/cfg/representations/representations-tests.factor
+++ b/basis/compiler/cfg/representations/representations-tests.factor
@@ -1,7 +1,8 @@
 USING: accessors compiler.cfg compiler.cfg.debugger
 compiler.cfg.instructions compiler.cfg.registers
 compiler.cfg.representations.preferred cpu.architecture kernel
-namespaces tools.test sequences arrays system literals layouts ;
+namespaces tools.test sequences arrays system literals layouts
+math ;
 IN: compiler.cfg.representations
 
 [ { double-rep double-rep } ] [
@@ -177,4 +178,77 @@ cpu x86.32? [
     [ t ] [ 3 get instructions>> first ##load-double? ] unit-test
 
     [ t ] [ 4 get instructions>> first ##phi? ] unit-test
-] when
\ No newline at end of file
+] when
+
+! Peephole optimization if input to ##shl-imm is tagged
+
+3 \ vreg-counter set-global
+
+V{
+    T{ ##peek f 1 D 0 }
+    T{ ##shl-imm f 2 1 3 }
+    T{ ##replace f 2 D 0 }
+} 0 test-bb
+
+[ ] [ test-representations ] unit-test
+
+[
+    V{
+        T{ ##peek f 1 D 0 }
+        T{ ##sar-imm f 2 1 1 }
+        T{ ##shl-imm f 4 2 $[ tag-bits get ] }
+        T{ ##replace f 4 D 0 }
+    }
+] [ 0 get instructions>> ] unit-test
+
+V{
+    T{ ##peek f 1 D 0 }
+    T{ ##shl-imm f 2 1 10 }
+    T{ ##replace f 2 D 0 }
+} 0 test-bb
+
+[ ] [ test-representations ] unit-test
+
+[
+    V{
+        T{ ##peek f 1 D 0 }
+        T{ ##shl-imm f 2 1 $[ 10 tag-bits get - ] }
+        T{ ##shl-imm f 5 2 $[ tag-bits get ] }
+        T{ ##replace f 5 D 0 }
+    }
+] [ 0 get instructions>> ] unit-test
+
+V{
+    T{ ##peek f 1 D 0 }
+    T{ ##shl-imm f 2 1 $[ tag-bits get ] }
+    T{ ##replace f 2 D 0 }
+} 0 test-bb
+
+[ ] [ test-representations ] unit-test
+
+[
+    V{
+        T{ ##peek f 1 D 0 }
+        T{ ##copy f 2 1 int-rep }
+        T{ ##shl-imm f 6 2 $[ tag-bits get ] }
+        T{ ##replace f 6 D 0 }
+    }
+] [ 0 get instructions>> ] unit-test
+
+! Peephole optimization if input to ##sar-imm is tagged
+V{
+    T{ ##peek f 1 D 0 }
+    T{ ##sar-imm f 2 1 3 }
+    T{ ##replace f 2 D 0 }
+} 0 test-bb
+
+[ ] [ test-representations ] unit-test
+
+[
+    V{
+        T{ ##peek f 1 D 0 }
+        T{ ##sar-imm f 2 1 $[ 3 tag-bits get + ] }
+        T{ ##shl-imm f 7 2 $[ tag-bits get ] }
+        T{ ##replace f 7 D 0 }
+    }
+] [ 0 get instructions>> ] unit-test
\ No newline at end of file
diff --git a/basis/compiler/cfg/representations/representations.factor b/basis/compiler/cfg/representations/representations.factor
index d4c500291e..22184ca284 100644
--- a/basis/compiler/cfg/representations/representations.factor
+++ b/basis/compiler/cfg/representations/representations.factor
@@ -1,12 +1,18 @@
 ! Copyright (C) 2009, 2010 Slava Pestov
 ! See http://factorcode.org/license.txt for BSD license.
-USING: accessors combinators compiler.cfg
-compiler.cfg.loop-detection compiler.cfg.registers
+USING: accessors combinators namespaces
+compiler.cfg
+compiler.cfg.registers
+compiler.cfg.loop-detection
 compiler.cfg.representations.rewrite
-compiler.cfg.representations.selection namespaces ;
+compiler.cfg.representations.peephole
+compiler.cfg.representations.selection ;
 IN: compiler.cfg.representations
 
-! Virtual register representation selection.
+! Virtual register representation selection. This is where
+! decisions about integer tagging and float and vector boxing
+! are made. The appropriate conversion operations inserted
+! after a cost analysis.
 
 : select-representations ( cfg -- cfg' )
     needs-loops
diff --git a/basis/compiler/cfg/representations/rewrite/rewrite.factor b/basis/compiler/cfg/representations/rewrite/rewrite.factor
index d5afe1faa2..5b15e95c15 100644
--- a/basis/compiler/cfg/representations/rewrite/rewrite.factor
+++ b/basis/compiler/cfg/representations/rewrite/rewrite.factor
@@ -19,14 +19,14 @@ IN: compiler.cfg.representations.rewrite
 ! Mapping from vreg,rep pairs to vregs
 SYMBOL: alternatives
 
-:: emit-def-conversion ( dst preferred required -- new-dst' )
+:: (emit-def-conversion) ( dst preferred required -- new-dst' )
     ! If an instruction defines a register with representation 'required',
     ! but the register has preferred representation 'preferred', then
     ! we rename the instruction's definition to a new register, which
     ! becomes the input of a conversion instruction.
     dst required next-vreg-rep [ preferred required emit-conversion ] keep ;
 
-:: emit-use-conversion ( src preferred required -- new-src' )
+:: (emit-use-conversion) ( src preferred required -- new-src' )
     ! If an instruction uses a register with representation 'required',
     ! but the register has preferred representation 'preferred', then
     ! we rename the instruction's input to a new register, which
@@ -43,7 +43,7 @@ SYMBOLS: renaming-set needs-renaming? ;
 
 : init-renaming-set ( -- )
     needs-renaming? off
-    V{ } clone renaming-set set ;
+    renaming-set get delete-all ;
 
 : no-renaming ( vreg -- )
     dup 2array renaming-set get push ;
@@ -57,14 +57,11 @@ SYMBOLS: renaming-set needs-renaming? ;
     [ vreg no-renaming ]
     [ vreg vreg preferred required quot call record-renaming ] if ; inline
 
-: compute-renaming-set ( insn -- )
-    ! temp vregs don't need conversions since they're always in their
-    ! preferred representation
-    init-renaming-set
-    [ [ [ emit-use-conversion ] (compute-renaming-set) ] each-use-rep ]
-    [ , ]
-    [ [ [ emit-def-conversion ] (compute-renaming-set) ] each-def-rep ]
-    tri ;
+: emit-use-conversion ( insn -- )
+    [ [ (emit-use-conversion) ] (compute-renaming-set) ] each-use-rep ;
+
+: emit-def-conversion ( insn -- )
+    [ [ (emit-def-conversion) ] (compute-renaming-set) ] each-def-rep ;
 
 : converted-value ( vreg -- vreg' )
     renaming-set get pop first2 [ assert= ] dip ;
@@ -78,67 +75,20 @@ RENAMING: convert [ converted-value ] [ converted-value ] [ ]
         renaming-set get length 0 assert=
     ] [ drop ] if ;
 
+: with-conversions ( insn -- quot )
+    init-renaming-set [ perform-renaming ] bi ; inline
+
 GENERIC: conversions-for-insn ( insn -- )
 
 M: ##phi conversions-for-insn , ;
 
-M: ##load-integer conversions-for-insn
-    {
-        {
-            [ dup dst>> rep-of tagged-rep? ]
-            [ [ dst>> ] [ val>> tag-fixnum ] bi ##load-tagged ]
-        }
-        [ call-next-method ]
-    } cond ;
-
-! When a float is unboxed, we replace the ##load-reference with a ##load-double
-! if the architecture supports it
-: convert-to-load-double? ( insn -- ? )
-    {
-        [ drop load-double? ]
-        [ dst>> rep-of double-rep? ]
-        [ obj>> float? ]
-    } 1&& ;
-
-! When a literal zeroes/ones vector is unboxed, we replace the ##load-reference
-! with a ##zero-vector or ##fill-vector instruction since this is more efficient.
-: convert-to-zero-vector? ( insn -- ? )
-    {
-        [ dst>> rep-of vector-rep? ]
-        [ obj>> B{ 0 0 0 0  0 0 0 0  0 0 0 0  0 0 0 0 } = ]
-    } 1&& ;
-
-: convert-to-fill-vector? ( insn -- ? )
-    {
-        [ dst>> rep-of vector-rep? ]
-        [ obj>> B{ 255 255 255 255  255 255 255 255  255 255 255 255  255 255 255 255 } = ]
-    } 1&& ;
-
-: (convert-to-load-double) ( insn -- dst val )
-    [ dst>> ] [ obj>> ] bi ; inline
-
-: (convert-to-zero/fill-vector) ( insn -- dst rep )
-    dst>> dup rep-of ; inline
-
-M: ##load-reference conversions-for-insn
-    {
-        {
-            [ dup convert-to-load-double? ]
-            [ (convert-to-load-double) ##load-double ]
-        }
-        {
-            [ dup convert-to-zero-vector? ]
-            [ (convert-to-zero/fill-vector) ##zero-vector ]
-        }
-        {
-            [ dup convert-to-fill-vector? ]
-            [ (convert-to-zero/fill-vector) ##fill-vector ]
-        }
-        [ call-next-method ]
-    } cond ;
-
 M: vreg-insn conversions-for-insn
-    [ compute-renaming-set ] [ perform-renaming ] bi ;
+    [
+        [ emit-use-conversion ]
+        [ , ]
+        [ emit-def-conversion ]
+        tri
+    ] with-conversions ;
 
 M: insn conversions-for-insn , ;
 
@@ -153,4 +103,5 @@ M: insn conversions-for-insn , ;
     ] if ;
 
 : insert-conversions ( cfg -- )
+    V{ } clone renaming-set set
     [ conversions-for-block ] each-basic-block ;