From e78121f38c05f69bb3049ea4d424eae6ab7a7d3f Mon Sep 17 00:00:00 2001
From: Slava Pestov <slava@slava-pestovs-macbook-pro.local>
Date: Wed, 26 Nov 2008 04:22:39 -0600
Subject: [PATCH] Rewrite remaining fixnum primitives in assembly (work in
 progress)

---
 vm/cpu-x86.32.S |  4 +++
 vm/cpu-x86.64.S |  4 +++
 vm/cpu-x86.S    | 36 ++++++++++++++++++++++++
 vm/debug.c      |  2 +-
 vm/math.c       | 73 +++++++++++++++++++------------------------------
 5 files changed, 73 insertions(+), 46 deletions(-)
diff --git a/vm/cpu-x86.32.S b/vm/cpu-x86.32.S
index 6ddbd52da2..36db5d6c80 100755
--- a/vm/cpu-x86.32.S
+++ b/vm/cpu-x86.32.S
@@ -12,6 +12,10 @@ and the callstack top is passed in EDX */
 
 #define NV_TEMP_REG %ebx
 
+#define ARITH_TEMP_1 %ebp
+#define ARITH_TEMP_2 %ebx
+#define DIV_RESULT %eax
+
 #define CELL_SIZE 4
 #define STACK_PADDING 12
 
diff --git a/vm/cpu-x86.64.S b/vm/cpu-x86.64.S
index c981095d62..7b5b5f3167 100644
--- a/vm/cpu-x86.64.S
+++ b/vm/cpu-x86.64.S
@@ -9,6 +9,10 @@
 
 #define NV_TEMP_REG %rbp
 
+#define ARITH_TEMP_1 %r8
+#define ARITH_TEMP_2 %r9
+#define DIV_RESULT %rax
+
 #ifdef WINDOWS
 
 	#define ARG0 %rcx
diff --git a/vm/cpu-x86.S b/vm/cpu-x86.S
index 4d6737baeb..7a0d738fe0 100755
--- a/vm/cpu-x86.S
+++ b/vm/cpu-x86.S
@@ -1,3 +1,39 @@
+DEF(void,primitive_fixnum_add,(void)):
+    mov (DS_REG),ARG0
+    mov -CELL_SIZE(DS_REG),ARG1
+    sub $CELL_SIZE,DS_REG
+    mov ARG1,ARITH_TEMP_1
+    add ARG0,ARITH_TEMP_1
+    jo MANGLE(overflow_fixnum_add)
+    mov ARITH_TEMP_1,(DS_REG)
+    ret
+
+DEF(void,primitive_fixnum_subtract,(void)):
+    mov (DS_REG),ARG1
+    mov -CELL_SIZE(DS_REG),ARG0
+    sub $CELL_SIZE,DS_REG
+    mov ARG0,ARITH_TEMP_1
+    sub ARG1,ARITH_TEMP_1
+    jo MANGLE(overflow_fixnum_subtract)
+    mov ARITH_TEMP_1,(DS_REG)
+    ret
+
+DEF(void,primitive_fixnum_multiply,(void)):
+    mov (DS_REG),ARITH_TEMP_1
+    mov ARITH_TEMP_1,DIV_RESULT
+    mov -CELL_SIZE(DS_REG),ARITH_TEMP_2
+    sar $3,ARITH_TEMP_2
+    sub $CELL_SIZE,DS_REG
+    imul ARITH_TEMP_2
+    jo multiply_overflow
+    mov DIV_RESULT,(DS_REG)
+    ret
+multiply_overflow:
+    sar $3,ARITH_TEMP_1
+    mov ARITH_TEMP_1,ARG0
+    mov ARITH_TEMP_2,ARG1
+    jmp MANGLE(overflow_fixnum_multiply)
+
 DEF(F_FASTCALL void,c_to_factor,(CELL quot)):
 	PUSH_NONVOLATILE
 	mov ARG0,NV_TEMP_REG
diff --git a/vm/debug.c b/vm/debug.c
index db8e60c781..909cc8f710 100755
--- a/vm/debug.c
+++ b/vm/debug.c
@@ -129,7 +129,7 @@ void print_nested_obj(CELL obj, F_FIXNUM nesting)
 		print_string(" ]");
 		break;
 	default:
-		print_string("#<type "); print_cell(type_of(obj)); print_string(" @ "); print_cell_hex(obj);
+		print_string("#<type "); print_cell(type_of(obj)); print_string(" @ "); print_cell_hex(obj); print_string(">");
 		break;
 	}
 }
diff --git a/vm/math.c b/vm/math.c
index 07493a947f..7e2274f30f 100644
--- a/vm/math.c
+++ b/vm/math.c
@@ -1,7 +1,6 @@
 #include "master.h"
 
 /* Fixnums */
-
 F_FIXNUM to_fixnum(CELL tagged)
 {
 	switch(TAG(tagged))
@@ -31,50 +30,35 @@ void primitive_float_to_fixnum(void)
 	drepl(tag_fixnum(float_to_fixnum(dpeek())));
 }
 
-#define POP_FIXNUMS(x,y) \
-	F_FIXNUM y = untag_fixnum_fast(dpop()); \
-	F_FIXNUM x = untag_fixnum_fast(dpeek());
-
-void primitive_fixnum_add(void)
+/* The fixnum+, fixnum- and fixnum* primitives are defined in cpu_*.S. On
+overflow, they call these functions. */
+F_FASTCALL void overflow_fixnum_add(F_FIXNUM x, F_FIXNUM y)
 {
-	POP_FIXNUMS(x,y)
-	drepl(allot_integer(x + y));
+	drepl(tag_bignum(fixnum_to_bignum(
+		untag_fixnum_fast(x) + untag_fixnum_fast(y))));
 }
 
-void primitive_fixnum_subtract(void)
+F_FASTCALL void overflow_fixnum_subtract(F_FIXNUM x, F_FIXNUM y)
 {
-	POP_FIXNUMS(x,y)
-	drepl(allot_integer(x - y));
+	drepl(tag_bignum(fixnum_to_bignum(
+		untag_fixnum_fast(x) - untag_fixnum_fast(y))));
 }
 
-/* Multiply two integers, and trap overflow.
-Thanks to David Blaikie (The_Vulture from freenode #java) for the hint. */
-void primitive_fixnum_multiply(void)
+F_FASTCALL void overflow_fixnum_multiply(F_FIXNUM x, F_FIXNUM y)
 {
-	POP_FIXNUMS(x,y)
-
-	if(x == 0 || y == 0)
-		drepl(tag_fixnum(0));
-	else
-	{
-		F_FIXNUM prod = x * y;
-		/* if this is not equal, we have overflow */
-		if(prod / x == y)
-			drepl(allot_integer(prod));
-		else
-		{
-			F_ARRAY *bx = fixnum_to_bignum(x);
-			REGISTER_BIGNUM(bx);
-			F_ARRAY *by = fixnum_to_bignum(y);
-			UNREGISTER_BIGNUM(bx);
-			drepl(tag_bignum(bignum_multiply(bx,by)));
-		}
-	}
+	F_ARRAY *bx = fixnum_to_bignum(x);
+	REGISTER_BIGNUM(bx);
+	F_ARRAY *by = fixnum_to_bignum(y);
+	UNREGISTER_BIGNUM(bx);
+	drepl(tag_bignum(bignum_multiply(bx,by)));
 }
 
+/* Division can only overflow when we are dividing the most negative fixnum
+by -1. */
 void primitive_fixnum_divint(void)
 {
-	POP_FIXNUMS(x,y)
+	F_FIXNUM y = untag_fixnum_fast(dpop()); \
+	F_FIXNUM x = untag_fixnum_fast(dpeek());
 	F_FIXNUM result = x / y;
 	if(result == -FIXNUM_MIN)
 		drepl(allot_integer(-FIXNUM_MIN));
@@ -99,31 +83,30 @@ void primitive_fixnum_divmod(void)
 }
 
 /*
- * Note the hairy overflow check.
  * If we're shifting right by n bits, we won't overflow as long as none of the
  * high WORD_SIZE-TAG_BITS-n bits are set.
  */
+#define SIGN_MASK(x) ((x) >> (CELLS * 8 - 1))
+#define BRANCHLESS_MAX(x,y) (x - ((x - y) & SIGN_MASK(x - y)))
+#define BRANCHLESS_ABS(x) ((x ^ SIGN_MASK(x)) - SIGN_MASK(x))
+
 void primitive_fixnum_shift(void)
 {
-	POP_FIXNUMS(x,y)
+	F_FIXNUM y = untag_fixnum_fast(dpop()); \
+	F_FIXNUM x = untag_fixnum_fast(dpeek());
 
-	if(x == 0 || y == 0)
-	{
-		drepl(tag_fixnum(x));
+	if(x == 0)
 		return;
-	}
 	else if(y < 0)
 	{
-		if(y <= -WORD_SIZE)
-			drepl(x < 0 ? tag_fixnum(-1) : tag_fixnum(0));
-		else
-			drepl(tag_fixnum(x >> -y));
+		y = BRANCHLESS_MAX(y,-WORD_SIZE + 1);
+		drepl(tag_fixnum(x >> -y));
 		return;
 	}
 	else if(y < WORD_SIZE - TAG_BITS)
 	{
 		F_FIXNUM mask = -((F_FIXNUM)1 << (WORD_SIZE - 1 - TAG_BITS - y));
-		if((x > 0 && (x & mask) == 0) || (x & mask) == mask)
+		if(!(BRANCHLESS_ABS(x) & mask))
 		{
 			drepl(tag_fixnum(x << y));
 			return;