From 9788323d25b76d3749eef04dfede127f811f397b Mon Sep 17 00:00:00 2001
From: Slava Pestov <slava@slava-pestovs-macbook-pro.local>
Date: Fri, 30 Apr 2010 21:33:42 -0400
Subject: [PATCH] compiler: add ##load-vector instruction to avoid wasting a
 temporary register on x86-32

---
 .../cfg/instructions/instructions.factor      |  6 +++-
 .../representations/peephole/peephole.factor  | 31 +++++++++++--------
 basis/compiler/codegen/codegen.factor         |  1 +
 basis/compiler/codegen/fixup/fixup.factor     |  3 ++
 basis/compiler/constants/constants.factor     |  1 +
 basis/cpu/architecture/architecture.factor    |  8 +++--
 basis/cpu/x86/32/32.factor                    |  5 ++-
 vm/code_blocks.cpp                            |  5 ++-
 vm/compaction.cpp                             |  3 ++
 vm/image.cpp                                  |  3 ++
 vm/instruction_operands.cpp                   | 15 +++++++++
 vm/instruction_operands.hpp                   |  8 ++++-
 vm/layouts.hpp                                |  2 ++
 vm/slot_visitor.hpp                           |  3 ++
 14 files changed, 74 insertions(+), 20 deletions(-)

diff --git a/basis/compiler/cfg/instructions/instructions.factor b/basis/compiler/cfg/instructions/instructions.factor
index 5286fd861b..e7a8338d91 100644
--- a/basis/compiler/cfg/instructions/instructions.factor
+++ b/basis/compiler/cfg/instructions/instructions.factor
@@ -29,7 +29,7 @@ INSN: ##load-reference
 def: dst/tagged-rep
 literal: obj ;
 
-! These two are inserted by representation selection
+! These three are inserted by representation selection
 INSN: ##load-tagged
 def: dst/tagged-rep
 literal: val ;
@@ -38,6 +38,10 @@ INSN: ##load-double
 def: dst/double-rep
 literal: val ;
 
+INSN: ##load-vector
+def: dst
+literal: val rep ;
+
 ! Stack operations
 INSN: ##peek
 def: dst/tagged-rep
diff --git a/basis/compiler/cfg/representations/peephole/peephole.factor b/basis/compiler/cfg/representations/peephole/peephole.factor
index 23e2e9c50c..4080e8077c 100644
--- a/basis/compiler/cfg/representations/peephole/peephole.factor
+++ b/basis/compiler/cfg/representations/peephole/peephole.factor
@@ -1,8 +1,8 @@
 ! Copyright (C) 2010 Slava Pestov.
 ! See http://factorcode.org/license.txt for BSD license.
-USING: accessors combinators combinators.short-circuit kernel
-layouts locals make math namespaces sequences cpu.architecture
-compiler.cfg.registers
+USING: accessors byte-arrays combinators
+combinators.short-circuit kernel layouts locals make math
+namespaces sequences cpu.architecture compiler.cfg.registers
 compiler.cfg.instructions
 compiler.cfg.representations.rewrite
 compiler.cfg.representations.selection ;
@@ -46,11 +46,18 @@ M: ##load-integer optimize-insn
 ! if the architecture supports it
 : convert-to-load-double? ( insn -- ? )
     {
-        [ drop load-double? ]
+        [ drop object-immediates? ]
         [ dst>> rep-of double-rep? ]
         [ obj>> float? ]
     } 1&& ;
 
+: convert-to-load-vector? ( insn -- ? )
+    {
+        [ drop object-immediates? ]
+        [ dst>> rep-of vector-rep? ]
+        [ obj>> byte-array? ]
+    } 1&& ;
+
 ! When a literal zeroes/ones vector is unboxed, we replace the ##load-reference
 ! with a ##zero-vector or ##fill-vector instruction since this is more efficient.
 : convert-to-zero-vector? ( insn -- ? )
@@ -65,25 +72,23 @@ M: ##load-integer optimize-insn
         [ obj>> B{ 255 255 255 255  255 255 255 255  255 255 255 255  255 255 255 255 } = ]
     } 1&& ;
 
-: (convert-to-load-double) ( insn -- dst val )
-    [ dst>> ] [ obj>> ] bi ; inline
-
-: (convert-to-zero/fill-vector) ( insn -- dst rep )
-    dst>> dup rep-of ; inline
-
 M: ##load-reference optimize-insn
     {
         {
             [ dup convert-to-load-double? ]
-            [ (convert-to-load-double) ##load-double here ]
+            [ [ dst>> ] [ obj>> ] bi ##load-double here ]
         }
         {
             [ dup convert-to-zero-vector? ]
-            [ (convert-to-zero/fill-vector) ##zero-vector here ]
+            [ dst>> dup rep-of ##zero-vector here ]
         }
         {
             [ dup convert-to-fill-vector? ]
-            [ (convert-to-zero/fill-vector) ##fill-vector here ]
+            [ dst>> dup rep-of ##fill-vector here ]
+        }
+        {
+            [ dup convert-to-load-vector? ]
+            [ [ dst>> ] [ obj>> ] [ dst>> rep-of ] tri ##load-vector here ]
         }
         [ call-next-method ]
     } cond ;
diff --git a/basis/compiler/codegen/codegen.factor b/basis/compiler/codegen/codegen.factor
index 2d0cfa8289..3d0a6ee8af 100755
--- a/basis/compiler/codegen/codegen.factor
+++ b/basis/compiler/codegen/codegen.factor
@@ -78,6 +78,7 @@ CODEGEN: ##load-integer %load-immediate
 CODEGEN: ##load-tagged %load-immediate
 CODEGEN: ##load-reference %load-reference
 CODEGEN: ##load-double %load-double
+CODEGEN: ##load-vector %load-vector
 CODEGEN: ##peek %peek
 CODEGEN: ##replace %replace
 CODEGEN: ##inc-d %inc-d
diff --git a/basis/compiler/codegen/fixup/fixup.factor b/basis/compiler/codegen/fixup/fixup.factor
index fa8dfc2149..ebc9c7b476 100644
--- a/basis/compiler/codegen/fixup/fixup.factor
+++ b/basis/compiler/codegen/fixup/fixup.factor
@@ -76,6 +76,9 @@ MEMO: cached-string>symbol ( symbol -- obj ) string>symbol ;
 : rel-float ( literal class -- )
     [ add-literal ] dip rt-float rel-fixup ;
 
+: rel-byte-array ( literal class -- )
+    [ add-literal ] dip rt-byte-array rel-fixup ;
+
 : rel-this ( class -- )
     rt-this rel-fixup ;
 
diff --git a/basis/compiler/constants/constants.factor b/basis/compiler/constants/constants.factor
index 2fdf814521..ca9dc5136d 100644
--- a/basis/compiler/constants/constants.factor
+++ b/basis/compiler/constants/constants.factor
@@ -68,6 +68,7 @@ CONSTANT: rt-cards-offset 10
 CONSTANT: rt-decks-offset 11
 CONSTANT: rt-exception-handler 12
 CONSTANT: rt-float 13
+CONSTANT: rt-byte-array 14
 
 : rc-absolute? ( n -- ? )
     ${
diff --git a/basis/cpu/architecture/architecture.factor b/basis/cpu/architecture/architecture.factor
index 09745dea6b..1418ab90b6 100644
--- a/basis/cpu/architecture/architecture.factor
+++ b/basis/cpu/architecture/architecture.factor
@@ -225,6 +225,7 @@ HOOK: complex-addressing? cpu ( -- ? )
 HOOK: %load-immediate cpu ( reg val -- )
 HOOK: %load-reference cpu ( reg obj -- )
 HOOK: %load-double cpu ( reg val -- )
+HOOK: %load-vector cpu ( reg val rep -- )
 
 HOOK: %peek cpu ( vreg loc -- )
 HOOK: %replace cpu ( vreg loc -- )
@@ -500,10 +501,11 @@ M: reg-class param-reg param-regs nth ;
 
 M: stack-params param-reg 2drop ;
 
-! Does this architecture support %load-double?
-HOOK: load-double? cpu ( -- ? )
+! Does this architecture support %load-double, %load-vector and
+! objects in %compare-imm?
+HOOK: object-immediates? cpu ( -- ? )
 
-M: object load-double? f ;
+M: object object-immediates? f ;
 
 ! Can this value be an immediate operand for %add-imm, %sub-imm,
 ! or %mul-imm?
diff --git a/basis/cpu/x86/32/32.factor b/basis/cpu/x86/32/32.factor
index 79ff4b1e35..4df4fbd602 100755
--- a/basis/cpu/x86/32/32.factor
+++ b/basis/cpu/x86/32/32.factor
@@ -27,11 +27,14 @@ M: x86.32 temp-reg ECX ;
 M: x86.32 immediate-comparand? ( n -- ? )
     [ call-next-method ] [ word? ] bi or ;
 
-M: x86.32 load-double? ( -- ? ) t ;
+M: x86.32 object-immediates? ( -- ? ) t ;
 
 M: x86.32 %load-double ( dst val -- )
     [ 0 [] MOVSD ] dip rc-absolute rel-float ;
 
+M:: x86.32 %load-vector ( dst val rep -- )
+    dst 0 [] rep copy-memory* val rc-absolute rel-byte-array ;
+
 M: x86.32 %mov-vm-ptr ( reg -- )
     0 MOV 0 rc-absolute-cell rel-vm ;
 
diff --git a/vm/code_blocks.cpp b/vm/code_blocks.cpp
index 2e7b8d4f09..351ce5a95e 100755
--- a/vm/code_blocks.cpp
+++ b/vm/code_blocks.cpp
@@ -231,7 +231,7 @@ void factor_vm::store_external_address(instruction_operand op)
 		break;
 #endif
 	default:
-		critical_error("Bad rel type",op.rel_type());
+		critical_error("Bad rel type in store_external_address()",op.rel_type());
 		break;
 	}
 }
@@ -268,6 +268,9 @@ struct initial_code_block_visitor {
 		case RT_FLOAT:
 			op.store_float(next_literal());
 			break;
+		case RT_BYTE_ARRAY:
+			op.store_byte_array(next_literal());
+			break;
 		case RT_ENTRY_POINT:
 			op.store_value(parent->compute_entry_point_address(next_literal()));
 			break;
diff --git a/vm/compaction.cpp b/vm/compaction.cpp
index 34398e3d88..f596c3040f 100644
--- a/vm/compaction.cpp
+++ b/vm/compaction.cpp
@@ -114,6 +114,9 @@ struct code_block_compaction_relocation_visitor {
 		case RT_FLOAT:
 			op.store_float(slot_forwarder.visit_pointer(op.load_float(old_offset)));
 			break;
+		case RT_BYTE_ARRAY:
+			op.store_byte_array(slot_forwarder.visit_pointer(op.load_byte_array(old_offset)));
+			break;
 		case RT_ENTRY_POINT:
 		case RT_ENTRY_POINT_PIC:
 		case RT_ENTRY_POINT_PIC_TAIL:
diff --git a/vm/image.cpp b/vm/image.cpp
index 4dfdc4242e..b64d828947 100755
--- a/vm/image.cpp
+++ b/vm/image.cpp
@@ -188,6 +188,9 @@ struct code_block_fixup_relocation_visitor {
 		case RT_FLOAT:
 			op.store_float(data_visitor.visit_pointer(op.load_float(old_offset)));
 			break;
+		case RT_BYTE_ARRAY:
+			op.store_byte_array(data_visitor.visit_pointer(op.load_byte_array(old_offset)));
+			break;
 		case RT_ENTRY_POINT:
 		case RT_ENTRY_POINT_PIC:
 		case RT_ENTRY_POINT_PIC_TAIL:
diff --git a/vm/instruction_operands.cpp b/vm/instruction_operands.cpp
index af7d363aef..65a2f93b2d 100644
--- a/vm/instruction_operands.cpp
+++ b/vm/instruction_operands.cpp
@@ -72,6 +72,16 @@ cell instruction_operand::load_float(cell pointer)
 	return (cell)load_value(pointer) - boxed_float_offset;
 }
 
+cell instruction_operand::load_byte_array()
+{
+	return (cell)load_value() - byte_array_offset;
+}
+
+cell instruction_operand::load_byte_array(cell pointer)
+{
+	return (cell)load_value(pointer) - byte_array_offset;
+}
+
 code_block *instruction_operand::load_code_block(cell relative_to)
 {
 	return ((code_block *)load_value(relative_to) - 1);
@@ -150,6 +160,11 @@ void instruction_operand::store_float(cell value)
 	store_value((fixnum)value + boxed_float_offset);
 }
 
+void instruction_operand::store_byte_array(cell value)
+{
+	store_value((fixnum)value + byte_array_offset);
+}
+
 void instruction_operand::store_code_block(code_block *compiled)
 {
 	store_value((cell)compiled->entry_point());
diff --git a/vm/instruction_operands.hpp b/vm/instruction_operands.hpp
index 5c120c2ec7..7adb517f18 100644
--- a/vm/instruction_operands.hpp
+++ b/vm/instruction_operands.hpp
@@ -32,6 +32,8 @@ enum relocation_type {
 	RT_EXCEPTION_HANDLER,
 	/* pointer to a float's payload */
 	RT_FLOAT,
+	/* pointer to a byte array's payload */
+	RT_BYTE_ARRAY,
 
 };
 
@@ -116,9 +118,10 @@ struct relocation_entry {
 		case RT_DECKS_OFFSET:
 		case RT_EXCEPTION_HANDLER:
 		case RT_FLOAT:
+		case RT_BYTE_ARRAY:
 			return 0;
 		default:
-			critical_error("Bad rel type",rel_type());
+			critical_error("Bad rel type in number_of_parameters()",rel_type());
 			return -1; /* Can't happen */
 		}
 	}
@@ -158,6 +161,8 @@ struct instruction_operand {
 	fixnum load_value();
 	cell load_float(cell relative_to);
 	cell load_float();
+	cell load_byte_array(cell relative_to);
+	cell load_byte_array();
 	code_block *load_code_block(cell relative_to);
 	code_block *load_code_block();
 
@@ -165,6 +170,7 @@ struct instruction_operand {
 	void store_value_masked(fixnum value, cell mask, cell shift);
 	void store_value(fixnum value);
 	void store_float(cell value);
+	void store_byte_array(cell value);
 	void store_code_block(code_block *compiled);
 };
 
diff --git a/vm/layouts.hpp b/vm/layouts.hpp
index 5275c79612..35f04e7cc5 100644
--- a/vm/layouts.hpp
+++ b/vm/layouts.hpp
@@ -178,6 +178,8 @@ struct bignum : public object {
 	cell *data() const { return (cell *)(this + 1); }
 };
 
+const cell byte_array_offset = 16 - BYTE_ARRAY_TYPE;
+
 struct byte_array : public object {
 	static const cell type_number = BYTE_ARRAY_TYPE;
 	static const cell element_size = 1;
diff --git a/vm/slot_visitor.hpp b/vm/slot_visitor.hpp
index cb2db1c705..d0507da93f 100644
--- a/vm/slot_visitor.hpp
+++ b/vm/slot_visitor.hpp
@@ -200,6 +200,9 @@ struct literal_references_visitor {
 		case RT_FLOAT:
 			op.store_float(visitor->visit_pointer(op.load_float()));
 			break;
+		case RT_BYTE_ARRAY:
+			op.store_byte_array(visitor->visit_pointer(op.load_byte_array()));
+			break;
 		default:
 			break;
 		}