From 63edd20a55840c78cf2807d8afeb5d0ada0ef206 Mon Sep 17 00:00:00 2001
From: Slava Pestov <slava@slava-pestovs-macbook-pro.local>
Date: Sat, 19 Dec 2009 10:59:56 +1300
Subject: [PATCH 1/8] vm: eliminating register variables work in progress.
 Works on x86-32 with non-optimizing compiler

---
 basis/cpu/x86/32/bootstrap.factor |  57 +++++--
 vm/alien.cpp                      | 138 ++++++++--------
 vm/alien.hpp                      |   4 +-
 vm/arrays.cpp                     |   8 +-
 vm/bignum.cpp                     |   6 +-
 vm/booleans.cpp                   |  15 +-
 vm/booleans.hpp                   |   3 +-
 vm/byte_arrays.cpp                |   8 +-
 vm/callbacks.cpp                  |   4 +-
 vm/callstack.cpp                  |  34 ++--
 vm/callstack.hpp                  |   2 -
 vm/code_heap.cpp                  |   4 +-
 vm/contexts.cpp                   |  92 +++--------
 vm/contexts.hpp                   |  86 +++++-----
 vm/cpu-x86.32.S                   |  76 ++++++---
 vm/cpu-x86.32.hpp                 |   5 +-
 vm/cpu-x86.S                      |  15 +-
 vm/cpu-x86.hpp                    |   6 +-
 vm/data_heap.cpp                  |   4 +-
 vm/debug.cpp                      |  10 +-
 vm/dispatch.cpp                   |  18 +--
 vm/errors.cpp                     |  14 +-
 vm/factor.cpp                     |   4 +-
 vm/gc.cpp                         |   6 +-
 vm/image.cpp                      |   4 +-
 vm/inline_cache.cpp               |  10 +-
 vm/io.cpp                         |  47 +++---
 vm/math.cpp                       | 261 +++++++++++++++---------------
 vm/math.hpp                       |  34 ++--
 vm/objects.cpp                    |  28 ++--
 vm/os-macosx.mm                   |   4 +-
 vm/os-unix.cpp                    |   4 +-
 vm/os-windows.cpp                 |   4 +-
 vm/primitives.hpp                 |  27 +---
 vm/profiler.cpp                   |   2 +-
 vm/quotations.cpp                 |  21 ++-
 vm/quotations.hpp                 |   2 +-
 vm/run.cpp                        |   8 +-
 vm/strings.cpp                    |  26 +--
 vm/tuples.cpp                     |  12 +-
 vm/vm.hpp                         |  37 ++---
 vm/words.cpp                      |  26 +--
 42 files changed, 569 insertions(+), 607 deletions(-)

diff --git a/basis/cpu/x86/32/bootstrap.factor b/basis/cpu/x86/32/bootstrap.factor
index 97a3ae2c1f..ed5d1950bb 100644
--- a/basis/cpu/x86/32/bootstrap.factor
+++ b/basis/cpu/x86/32/bootstrap.factor
@@ -34,18 +34,37 @@ IN: bootstrap.x86
     ESP stack-frame-size 3 bootstrap-cells - SUB
 ] jit-prolog jit-define
 
+: jit-load-vm ( -- )
+    EBP 0 MOV 0 rc-absolute-cell jit-vm ;
+
 : jit-save-context ( -- )
-    EAX 0 [] MOV rc-absolute-cell rt-context jit-rel
-    ! save stack pointer
-    ECX ESP -4 [+] LEA
-    EAX [] ECX MOV ;
+    ! VM pointer must be in EBP already
+    ECX EBP [] MOV
+    ! save ctx->callstack_top
+    EAX ESP -4 [+] LEA
+    ECX [] EAX MOV
+    ! save ctx->datastack
+    ECX 8 [+] ds-reg MOV
+    ! save ctx->retainstack
+    ECX 12 [+] rs-reg MOV ;
+
+: jit-restore-context ( -- )
+    ! VM pointer must be in EBP already
+    ECX EBP [] MOV
+    ! restore ctx->datastack
+    ds-reg ECX 8 [+] MOV
+    ! restore ctx->retainstack
+    rs-reg ECX 12 [+] MOV ;
 
 [
+    jit-load-vm
+    ! save ds, rs registers
     jit-save-context
-    ! pass vm ptr to primitive
-    EAX 0 MOV rc-absolute-cell rt-vm jit-rel
     ! call the primitive
+    ESP [] EBP MOV
     0 CALL rc-relative rt-primitive jit-rel
+    ! restore ds, rs registers
+    jit-restore-context
 ] jit-primitive jit-define
 
 ! Inline cache miss entry points
@@ -55,10 +74,12 @@ IN: bootstrap.x86
 ! These are always in tail position with an existing stack
 ! frame, and the stack. The frame setup takes this into account.
 : jit-inline-cache-miss ( -- )
+    jit-load-vm
     jit-save-context
-    ESP 4 [+] 0 MOV 0 rc-absolute-cell jit-vm
+    ESP 4 [+] EBP MOV
     ESP [] EBX MOV
-    0 CALL "inline_cache_miss" f rc-relative jit-dlsym ;
+    0 CALL "inline_cache_miss" f rc-relative jit-dlsym
+    jit-restore-context ;
 
 [ jit-load-return-address jit-inline-cache-miss ]
 [ EAX CALL ]
@@ -72,16 +93,17 @@ IN: bootstrap.x86
 
 ! Overflowing fixnum arithmetic
 : jit-overflow ( insn func -- )
-    jit-save-context
-    EAX ds-reg -4 [+] MOV
-    EDX ds-reg [] MOV
     ds-reg 4 SUB
+    jit-load-vm
+    jit-save-context
+    EAX ds-reg [] MOV
+    EDX ds-reg 4 [+] MOV
     ECX EAX MOV
     [ [ ECX EDX ] dip call( dst src -- ) ] dip
     ds-reg [] ECX MOV
     [ JNO ]
     [
-        ECX 0 MOV 0 rc-absolute-cell jit-vm
+        ECX EBP MOV
         [ 0 CALL ] dip f rc-relative jit-dlsym
     ]
     jit-conditional ;
@@ -91,12 +113,13 @@ IN: bootstrap.x86
 [ [ SUB ] "overflow_fixnum_subtract" jit-overflow ] \ fixnum- define-sub-primitive
 
 [
-    jit-save-context
-    ECX ds-reg -4 [+] MOV
-    EBX ds-reg [] MOV
-    EBX tag-bits get SAR
     ds-reg 4 SUB
+    jit-load-vm
+    jit-save-context
+    ECX ds-reg [] MOV
     EAX ECX MOV
+    EBX ds-reg 4 [+] MOV
+    EBX tag-bits get SAR
     EBX IMUL
     ds-reg [] EAX MOV
     [ JNO ]
@@ -104,7 +127,7 @@ IN: bootstrap.x86
         EAX ECX MOV
         EAX tag-bits get SAR
         EDX EBX MOV
-        ECX 0 MOV 0 rc-absolute-cell jit-vm
+        ECX EBP MOV
         0 CALL "overflow_fixnum_multiply" f rc-relative jit-dlsym
     ]
     jit-conditional
diff --git a/vm/alien.cpp b/vm/alien.cpp
index d07b6e353b..5fd6cad4af 100755
--- a/vm/alien.cpp
+++ b/vm/alien.cpp
@@ -27,9 +27,17 @@ char *factor_vm::pinned_alien_offset(cell obj)
 	}
 }
 
+VM_C_API char *pinned_alien_offset(cell obj, factor_vm *parent)
+{
+	return parent->pinned_alien_offset(obj);
+}
+
 /* make an alien */
 cell factor_vm::allot_alien(cell delegate_, cell displacement)
 {
+	if(delegate_ == false_object && displacement == 0)
+		return false_object;
+
 	data_root<object> delegate(delegate_,this);
 	data_root<alien> new_alien(allot<alien>(sizeof(alien)),this);
 
@@ -49,27 +57,32 @@ cell factor_vm::allot_alien(cell delegate_, cell displacement)
 	return new_alien.value();
 }
 
+cell factor_vm::allot_alien(void *address)
+{
+	return allot_alien(false_object,(cell)address);
+}
+
+VM_C_API cell allot_alien(void *address, factor_vm *vm)
+{
+	return vm->allot_alien(address);
+}
+
 /* make an alien pointing at an offset of another alien */
 void factor_vm::primitive_displaced_alien()
 {
-	cell alien = dpop();
-	cell displacement = to_cell(dpop());
+	cell alien = ctx->pop();
+	cell displacement = to_cell(ctx->pop());
 
-	if(!to_boolean(alien) && displacement == 0)
-		dpush(false_object);
-	else
+	switch(tagged<object>(alien).type())
 	{
-		switch(tagged<object>(alien).type())
-		{
-		case BYTE_ARRAY_TYPE:
-		case ALIEN_TYPE:
-		case F_TYPE:
-			dpush(allot_alien(alien,displacement));
-			break;
-		default:
-			type_error(ALIEN_TYPE,alien);
-			break;
-		}
+	case BYTE_ARRAY_TYPE:
+	case ALIEN_TYPE:
+	case F_TYPE:
+		ctx->push(allot_alien(alien,displacement));
+		break;
+	default:
+		type_error(ALIEN_TYPE,alien);
+		break;
 	}
 }
 
@@ -77,59 +90,59 @@ void factor_vm::primitive_displaced_alien()
 if the object is a byte array, as a sanity check. */
 void factor_vm::primitive_alien_address()
 {
-	box_unsigned_cell((cell)pinned_alien_offset(dpop()));
+	ctx->push(allot_cell((cell)pinned_alien_offset(ctx->pop())));
 }
 
 /* pop ( alien n ) from datastack, return alien's address plus n */
 void *factor_vm::alien_pointer()
 {
-	fixnum offset = to_fixnum(dpop());
-	return unbox_alien() + offset;
+	fixnum offset = to_fixnum(ctx->pop());
+	return alien_offset(ctx->pop()) + offset;
 }
 
 /* define words to read/write values at an alien address */
-#define DEFINE_ALIEN_ACCESSOR(name,type,boxer,to) \
+#define DEFINE_ALIEN_ACCESSOR(name,type,from,to) \
 	PRIMITIVE(alien_##name) \
 	{ \
-		parent->boxer(*(type*)(parent->alien_pointer())); \
+		parent->ctx->push(from(*(type*)(parent->alien_pointer()),parent)); \
 	} \
 	PRIMITIVE(set_alien_##name) \
 	{ \
 		type *ptr = (type *)parent->alien_pointer(); \
-		type value = parent->to(dpop()); \
+		type value = to(parent->ctx->pop(),parent); \
 		*ptr = value; \
 	}
 
-DEFINE_ALIEN_ACCESSOR(signed_cell,fixnum,box_signed_cell,to_fixnum)
-DEFINE_ALIEN_ACCESSOR(unsigned_cell,cell,box_unsigned_cell,to_cell)
-DEFINE_ALIEN_ACCESSOR(signed_8,s64,box_signed_8,to_signed_8)
-DEFINE_ALIEN_ACCESSOR(unsigned_8,u64,box_unsigned_8,to_unsigned_8)
-DEFINE_ALIEN_ACCESSOR(signed_4,s32,box_signed_4,to_fixnum)
-DEFINE_ALIEN_ACCESSOR(unsigned_4,u32,box_unsigned_4,to_cell)
-DEFINE_ALIEN_ACCESSOR(signed_2,s16,box_signed_2,to_fixnum)
-DEFINE_ALIEN_ACCESSOR(unsigned_2,u16,box_unsigned_2,to_cell)
-DEFINE_ALIEN_ACCESSOR(signed_1,s8,box_signed_1,to_fixnum)
-DEFINE_ALIEN_ACCESSOR(unsigned_1,u8,box_unsigned_1,to_cell)
-DEFINE_ALIEN_ACCESSOR(float,float,box_float,to_float)
-DEFINE_ALIEN_ACCESSOR(double,double,box_double,to_double)
-DEFINE_ALIEN_ACCESSOR(cell,void *,box_alien,pinned_alien_offset)
+DEFINE_ALIEN_ACCESSOR(signed_cell,fixnum,from_signed_cell,to_fixnum)
+DEFINE_ALIEN_ACCESSOR(unsigned_cell,cell,from_unsigned_cell,to_cell)
+DEFINE_ALIEN_ACCESSOR(signed_8,s64,from_signed_8,to_signed_8)
+DEFINE_ALIEN_ACCESSOR(unsigned_8,u64,from_unsigned_8,to_unsigned_8)
+DEFINE_ALIEN_ACCESSOR(signed_4,s32,from_signed_4,to_fixnum)
+DEFINE_ALIEN_ACCESSOR(unsigned_4,u32,from_unsigned_4,to_cell)
+DEFINE_ALIEN_ACCESSOR(signed_2,s16,from_signed_2,to_fixnum)
+DEFINE_ALIEN_ACCESSOR(unsigned_2,u16,from_unsigned_2,to_cell)
+DEFINE_ALIEN_ACCESSOR(signed_1,s8,from_signed_1,to_fixnum)
+DEFINE_ALIEN_ACCESSOR(unsigned_1,u8,from_unsigned_1,to_cell)
+DEFINE_ALIEN_ACCESSOR(float,float,from_float,to_float)
+DEFINE_ALIEN_ACCESSOR(double,double,from_double,to_double)
+DEFINE_ALIEN_ACCESSOR(cell,void *,allot_alien,pinned_alien_offset)
 
 /* open a native library and push a handle */
 void factor_vm::primitive_dlopen()
 {
-	data_root<byte_array> path(dpop(),this);
+	data_root<byte_array> path(ctx->pop(),this);
 	path.untag_check(this);
 	data_root<dll> library(allot<dll>(sizeof(dll)),this);
 	library->path = path.value();
 	ffi_dlopen(library.untagged());
-	dpush(library.value());
+	ctx->push(library.value());
 }
 
 /* look up a symbol in a native library */
 void factor_vm::primitive_dlsym()
 {
-	data_root<object> library(dpop(),this);
-	data_root<byte_array> name(dpop(),this);
+	data_root<object> library(ctx->pop(),this);
+	data_root<byte_array> name(ctx->pop(),this);
 	name.untag_check(this);
 
 	symbol_char *sym = name->data<symbol_char>();
@@ -139,29 +152,29 @@ void factor_vm::primitive_dlsym()
 		dll *d = untag_check<dll>(library.value());
 
 		if(d->dll == NULL)
-			dpush(false_object);
+			ctx->push(false_object);
 		else
-			box_alien(ffi_dlsym(d,sym));
+			ctx->push(allot_alien(ffi_dlsym(d,sym)));
 	}
 	else
-		box_alien(ffi_dlsym(NULL,sym));
+		ctx->push(allot_alien(ffi_dlsym(NULL,sym)));
 }
 
 /* close a native library handle */
 void factor_vm::primitive_dlclose()
 {
-	dll *d = untag_check<dll>(dpop());
+	dll *d = untag_check<dll>(ctx->pop());
 	if(d->dll != NULL)
 		ffi_dlclose(d);
 }
 
 void factor_vm::primitive_dll_validp()
 {
-	cell library = dpop();
+	cell library = ctx->pop();
 	if(to_boolean(library))
-		dpush(tag_boolean(untag_check<dll>(library)->dll != NULL));
+		ctx->push(tag_boolean(untag_check<dll>(library)->dll != NULL));
 	else
-		dpush(true_object);
+		ctx->push(true_object);
 }
 
 /* gets the address of an object representing a C pointer */
@@ -186,32 +199,7 @@ VM_C_API char *alien_offset(cell obj, factor_vm *parent)
 	return parent->alien_offset(obj);
 }
 
-/* pop an object representing a C pointer */
-char *factor_vm::unbox_alien()
-{
-	return alien_offset(dpop());
-}
-
-VM_C_API char *unbox_alien(factor_vm *parent)
-{
-	return parent->unbox_alien();
-}
-
-/* make an alien and push */
-void factor_vm::box_alien(void *ptr)
-{
-	if(ptr == NULL)
-		dpush(false_object);
-	else
-		dpush(allot_alien(false_object,(cell)ptr));
-}
-
-VM_C_API void box_alien(void *ptr, factor_vm *parent)
-{
-	return parent->box_alien(ptr);
-}
-
-/* for FFI calls passing structs by value */
+/* For FFI calls passing structs by value. Cannot allocate */
 void factor_vm::to_value_struct(cell src, void *dest, cell size)
 {
 	memcpy(dest,alien_offset(src),size);
@@ -222,12 +210,12 @@ VM_C_API void to_value_struct(cell src, void *dest, cell size, factor_vm *parent
 	return parent->to_value_struct(src,dest,size);
 }
 
-/* for FFI callbacks receiving structs by value */
+/* For FFI callbacks receiving structs by value */
 void factor_vm::box_value_struct(void *src, cell size)
 {
 	byte_array *bytes = allot_byte_array(size);
 	memcpy(bytes->data<void>(),src,size);
-	dpush(tag<byte_array>(bytes));
+	ctx->push(tag<byte_array>(bytes));
 }
 
 VM_C_API void box_value_struct(void *src, cell size,factor_vm *parent)
@@ -267,7 +255,7 @@ VM_C_API void box_medium_struct(cell x1, cell x2, cell x3, cell x4, cell size, f
 
 void factor_vm::primitive_vm_ptr()
 {
-	box_alien(this);
+	ctx->push(allot_alien(this));
 }
 
 }
diff --git a/vm/alien.hpp b/vm/alien.hpp
index 129cc2560a..906e204ebe 100755
--- a/vm/alien.hpp
+++ b/vm/alien.hpp
@@ -2,8 +2,8 @@ namespace factor
 {
 
 VM_C_API char *alien_offset(cell object, factor_vm *vm);
-VM_C_API char *unbox_alien(factor_vm *vm);
-VM_C_API void box_alien(void *ptr, factor_vm *vm);
+VM_C_API char *pinned_alien_offset(cell object, factor_vm *vm);
+VM_C_API cell allot_alien(void *address, factor_vm *vm);
 VM_C_API void to_value_struct(cell src, void *dest, cell size, factor_vm *vm);
 VM_C_API void box_value_struct(void *src, cell size,factor_vm *vm);
 VM_C_API void box_small_struct(cell x, cell y, cell size,factor_vm *vm);
diff --git a/vm/arrays.cpp b/vm/arrays.cpp
index 4c97ef59a0..cdfee274c7 100644
--- a/vm/arrays.cpp
+++ b/vm/arrays.cpp
@@ -13,11 +13,11 @@ array *factor_vm::allot_array(cell capacity, cell fill_)
 
 void factor_vm::primitive_array()
 {
-	data_root<object> fill(dpop(),this);
+	data_root<object> fill(ctx->pop(),this);
 	cell capacity = unbox_array_size();
 	array *new_array = allot_uninitialized_array<array>(capacity);
 	memset_cell(new_array->data(),fill.value(),capacity * sizeof(cell));
-	dpush(tag<array>(new_array));
+	ctx->push(tag<array>(new_array));
 }
 
 cell factor_vm::allot_array_1(cell obj_)
@@ -54,10 +54,10 @@ cell factor_vm::allot_array_4(cell v1_, cell v2_, cell v3_, cell v4_)
 
 void factor_vm::primitive_resize_array()
 {
-	data_root<array> a(dpop(),this);
+	data_root<array> a(ctx->pop(),this);
 	a.untag_check(this);
 	cell capacity = unbox_array_size();
-	dpush(tag<array>(reallot_array(a.untagged(),capacity)));
+	ctx->push(tag<array>(reallot_array(a.untagged(),capacity)));
 }
 
 void growable_array::add(cell elt_)
diff --git a/vm/bignum.cpp b/vm/bignum.cpp
index 5a391e7625..c3d47d45f3 100755
--- a/vm/bignum.cpp
+++ b/vm/bignum.cpp
@@ -329,6 +329,7 @@ bignum *factor_vm::bignum_remainder(bignum * numerator, bignum * denominator)
 		}
 }
 
+/* allocates memory */
 #define FOO_TO_BIGNUM(name,type,utype)					\
 bignum * factor_vm::name##_to_bignum(type n)				\
 {									\
@@ -358,13 +359,13 @@ bignum * factor_vm::name##_to_bignum(type n)				\
 		return (result);					\
 	}								\
 }
-  
-/* all below allocate memory */
+
 FOO_TO_BIGNUM(cell,cell,cell)
 FOO_TO_BIGNUM(fixnum,fixnum,cell)
 FOO_TO_BIGNUM(long_long,s64,u64)
 FOO_TO_BIGNUM(ulong_long,u64,u64)
 
+/* cannot allocate memory */
 #define BIGNUM_TO_FOO(name,type,utype)					\
 	type factor_vm::bignum_to_##name(bignum * bignum)		\
 	{								\
@@ -380,7 +381,6 @@ FOO_TO_BIGNUM(ulong_long,u64,u64)
 		}							\
 	}
 
-/* all of the below allocate memory */
 BIGNUM_TO_FOO(cell,cell,cell);
 BIGNUM_TO_FOO(fixnum,fixnum,cell);
 BIGNUM_TO_FOO(long_long,s64,u64)
diff --git a/vm/booleans.cpp b/vm/booleans.cpp
index 34955cec8c..dedb385f3d 100644
--- a/vm/booleans.cpp
+++ b/vm/booleans.cpp
@@ -3,19 +3,14 @@
 namespace factor
 {
 
-void factor_vm::box_boolean(bool value)
-{
-	dpush(tag_boolean(value));
-}
-
-VM_C_API void box_boolean(bool value, factor_vm *parent)
-{
-	return parent->box_boolean(value);
-}
-
 VM_C_API bool to_boolean(cell value, factor_vm *parent)
 {
 	return to_boolean(value);
 }
 
+VM_C_API cell from_boolean(bool value, factor_vm *parent)
+{
+	return parent->tag_boolean(value);
+}
+
 }
diff --git a/vm/booleans.hpp b/vm/booleans.hpp
index 5654f445ad..a11103c5c6 100644
--- a/vm/booleans.hpp
+++ b/vm/booleans.hpp
@@ -1,9 +1,10 @@
 namespace factor
 {
 
-VM_C_API void box_boolean(bool value, factor_vm *vm);
 VM_C_API bool to_boolean(cell value, factor_vm *vm);
+VM_C_API cell from_boolean(bool value, factor_vm *vm);
 
+/* Cannot allocate */
 inline static bool to_boolean(cell value)
 {
 	return value != false_object;
diff --git a/vm/byte_arrays.cpp b/vm/byte_arrays.cpp
index 7cfe6c2ff0..1986b5d35c 100644
--- a/vm/byte_arrays.cpp
+++ b/vm/byte_arrays.cpp
@@ -13,21 +13,21 @@ byte_array *factor_vm::allot_byte_array(cell size)
 void factor_vm::primitive_byte_array()
 {
 	cell size = unbox_array_size();
-	dpush(tag<byte_array>(allot_byte_array(size)));
+	ctx->push(tag<byte_array>(allot_byte_array(size)));
 }
 
 void factor_vm::primitive_uninitialized_byte_array()
 {
 	cell size = unbox_array_size();
-	dpush(tag<byte_array>(allot_uninitialized_array<byte_array>(size)));
+	ctx->push(tag<byte_array>(allot_uninitialized_array<byte_array>(size)));
 }
 
 void factor_vm::primitive_resize_byte_array()
 {
-	data_root<byte_array> array(dpop(),this);
+	data_root<byte_array> array(ctx->pop(),this);
 	array.untag_check(this);
 	cell capacity = unbox_array_size();
-	dpush(tag<byte_array>(reallot_array(array.untagged(),capacity)));
+	ctx->push(tag<byte_array>(reallot_array(array.untagged(),capacity)));
 }
 
 void growable_byte_array::append_bytes(void *elts, cell len)
diff --git a/vm/callbacks.cpp b/vm/callbacks.cpp
index 7ab44c2dc7..e5cdb805d3 100644
--- a/vm/callbacks.cpp
+++ b/vm/callbacks.cpp
@@ -81,9 +81,9 @@ void callback_heap::update()
 
 void factor_vm::primitive_callback()
 {
-	tagged<word> w(dpop());
+	tagged<word> w(ctx->pop());
 	w.untag_check(this);
-	box_alien(callbacks->add(w.value())->xt());
+	ctx->push(allot_alien(callbacks->add(w.value())->xt()));
 }
 
 }
diff --git a/vm/callstack.cpp b/vm/callstack.cpp
index cbd18b3c1a..714a4585c3 100755
--- a/vm/callstack.cpp
+++ b/vm/callstack.cpp
@@ -57,14 +57,15 @@ void factor_vm::primitive_callstack()
 
 	callstack *stack = allot_callstack(size);
 	memcpy(stack->top(),top,size);
-	dpush(tag<callstack>(stack));
+	ctx->push(tag<callstack>(stack));
 }
 
 void factor_vm::primitive_set_callstack()
 {
-	callstack *stack = untag_check<callstack>(dpop());
+	callstack *stack = untag_check<callstack>(ctx->pop());
 
-	set_callstack(ctx->callstack_bottom,
+	set_callstack(this,
+		ctx->callstack_bottom,
 		stack->top(),
 		untag_fixnum(stack->length),
 		memcpy);
@@ -157,13 +158,13 @@ struct stack_frame_accumulator {
 
 void factor_vm::primitive_callstack_to_array()
 {
-	data_root<callstack> callstack(dpop(),this);
+	data_root<callstack> callstack(ctx->pop(),this);
 
 	stack_frame_accumulator accum(this);
 	iterate_callstack_object(callstack.untagged(),accum);
 	accum.frames.trim();
 
-	dpush(accum.frames.elements.value());
+	ctx->push(accum.frames.elements.value());
 }
 
 stack_frame *factor_vm::innermost_stack_frame(callstack *stack)
@@ -182,20 +183,20 @@ stack_frame *factor_vm::innermost_stack_frame(callstack *stack)
 Used by the single stepper. */
 void factor_vm::primitive_innermost_stack_frame_executing()
 {
-	stack_frame *frame = innermost_stack_frame(untag_check<callstack>(dpop()));
-	dpush(frame_executing_quot(frame));
+	stack_frame *frame = innermost_stack_frame(untag_check<callstack>(ctx->pop()));
+	ctx->push(frame_executing_quot(frame));
 }
 
 void factor_vm::primitive_innermost_stack_frame_scan()
 {
-	stack_frame *frame = innermost_stack_frame(untag_check<callstack>(dpop()));
-	dpush(frame_scan(frame));
+	stack_frame *frame = innermost_stack_frame(untag_check<callstack>(ctx->pop()));
+	ctx->push(frame_scan(frame));
 }
 
 void factor_vm::primitive_set_innermost_stack_frame_quot()
 {
-	data_root<callstack> callstack(dpop(),this);
-	data_root<quotation> quot(dpop(),this);
+	data_root<callstack> callstack(ctx->pop(),this);
+	data_root<quotation> quot(ctx->pop(),this);
 
 	callstack.untag_check(this);
 	quot.untag_check(this);
@@ -208,15 +209,4 @@ void factor_vm::primitive_set_innermost_stack_frame_quot()
 	FRAME_RETURN_ADDRESS(inner,this) = (char *)quot->xt + offset;
 }
 
-/* called before entry into Factor code. */
-void factor_vm::save_callstack_bottom(stack_frame *callstack_bottom)
-{
-	ctx->callstack_bottom = callstack_bottom;
-}
-
-VM_ASM_API void save_callstack_bottom(stack_frame *callstack_bottom, factor_vm *parent)
-{
-	return parent->save_callstack_bottom(callstack_bottom);
-}
-
 }
diff --git a/vm/callstack.hpp b/vm/callstack.hpp
index dcbf78632e..9f8867447c 100755
--- a/vm/callstack.hpp
+++ b/vm/callstack.hpp
@@ -6,8 +6,6 @@ inline static cell callstack_size(cell size)
 	return sizeof(callstack) + size;
 }
 
-VM_ASM_API void save_callstack_bottom(stack_frame *callstack_bottom, factor_vm *parent);
-
 /* This is a little tricky. The iterator may allocate memory, so we
 keep the callstack in a GC root and use relative offsets */
 template<typename Iterator> void factor_vm::iterate_callstack_object(callstack *stack_, Iterator &iterator)
diff --git a/vm/code_heap.cpp b/vm/code_heap.cpp
index 9e64ff6552..ac1c0157d2 100755
--- a/vm/code_heap.cpp
+++ b/vm/code_heap.cpp
@@ -96,7 +96,7 @@ void factor_vm::update_code_heap_words()
 
 void factor_vm::primitive_modify_code_heap()
 {
-	data_root<array> alist(dpop(),this);
+	data_root<array> alist(ctx->pop(),this);
 
 	cell count = array_capacity(alist.untagged());
 
@@ -163,7 +163,7 @@ code_heap_room factor_vm::code_room()
 void factor_vm::primitive_code_room()
 {
 	code_heap_room room = code_room();
-	dpush(tag<byte_array>(byte_array_from_value(&room)));
+	ctx->push(tag<byte_array>(byte_array_from_value(&room)));
 }
 
 struct stack_trace_stripper {
diff --git a/vm/contexts.cpp b/vm/contexts.cpp
index f5fac1119e..4ea378a6f9 100644
--- a/vm/contexts.cpp
+++ b/vm/contexts.cpp
@@ -8,42 +8,15 @@ context::context(cell ds_size, cell rs_size) :
 	callstack_bottom(NULL),
 	datastack(0),
 	retainstack(0),
-	datastack_save(0),
-	retainstack_save(0),
 	magic_frame(NULL),
 	datastack_region(new segment(ds_size,false)),
 	retainstack_region(new segment(rs_size,false)),
 	catchstack_save(0),
 	current_callback_save(0),
-	next(NULL) {}
-
-void factor_vm::reset_datastack()
+	next(NULL)
 {
-	ds = ds_bot - sizeof(cell);
-}
-
-void factor_vm::reset_retainstack()
-{
-	rs = rs_bot - sizeof(cell);
-}
-
-static const cell stack_reserved = (64 * sizeof(cell));
-
-void factor_vm::fix_stacks()
-{
-	if(ds + sizeof(cell) < ds_bot || ds + stack_reserved >= ds_top) reset_datastack();
-	if(rs + sizeof(cell) < rs_bot || rs + stack_reserved >= rs_top) reset_retainstack();
-}
-
-/* called before entry into foreign C code. Note that ds and rs might
-be stored in registers, so callbacks must save and restore the correct values */
-void factor_vm::save_stacks()
-{
-	if(ctx)
-	{
-		ctx->datastack = ds;
-		ctx->retainstack = rs;
-	}
+	reset_datastack();
+	reset_retainstack();
 }
 
 context *factor_vm::alloc_context()
@@ -75,19 +48,6 @@ void factor_vm::nest_stacks(stack_frame *magic_frame)
 	new_ctx->callstack_bottom = (stack_frame *)-1;
 	new_ctx->callstack_top = (stack_frame *)-1;
 
-	/* note that these register values are not necessarily valid stack
-	pointers. they are merely saved non-volatile registers, and are
-	restored in unnest_stacks(). consider this scenario:
-	- factor code calls C function
-	- C function saves ds/cs registers (since they're non-volatile)
-	- C function clobbers them
-	- C function calls Factor callback
-	- Factor callback returns
-	- C function restores registers
-	- C function returns to Factor code */
-	new_ctx->datastack_save = ds;
-	new_ctx->retainstack_save = rs;
-
 	new_ctx->magic_frame = magic_frame;
 
 	/* save per-callback special_objects */
@@ -96,9 +56,6 @@ void factor_vm::nest_stacks(stack_frame *magic_frame)
 
 	new_ctx->next = ctx;
 	ctx = new_ctx;
-
-	reset_datastack();
-	reset_retainstack();
 }
 
 void nest_stacks(stack_frame *magic_frame, factor_vm *parent)
@@ -109,9 +66,6 @@ void nest_stacks(stack_frame *magic_frame, factor_vm *parent)
 /* called when leaving a compiled callback */
 void factor_vm::unnest_stacks()
 {
-	ds = ctx->datastack_save;
-	rs = ctx->retainstack_save;
-
 	/* restore per-callback special_objects */
 	special_objects[OBJ_CURRENT_CALLBACK] = ctx->current_callback_save;
 	special_objects[OBJ_CATCHSTACK] = ctx->catchstack_save;
@@ -145,20 +99,20 @@ bool factor_vm::stack_to_array(cell bottom, cell top)
 	{
 		array *a = allot_uninitialized_array<array>(depth / sizeof(cell));
 		memcpy(a + 1,(void*)bottom,depth);
-		dpush(tag<array>(a));
+		ctx->push(tag<array>(a));
 		return true;
 	}
 }
 
 void factor_vm::primitive_datastack()
 {
-	if(!stack_to_array(ds_bot,ds))
+	if(!stack_to_array(ctx->datastack_region->start,ctx->datastack))
 		general_error(ERROR_DS_UNDERFLOW,false_object,false_object,NULL);
 }
 
 void factor_vm::primitive_retainstack()
 {
-	if(!stack_to_array(rs_bot,rs))
+	if(!stack_to_array(ctx->retainstack_region->start,ctx->retainstack))
 		general_error(ERROR_RS_UNDERFLOW,false_object,false_object,NULL);
 }
 
@@ -172,46 +126,48 @@ cell factor_vm::array_to_stack(array *array, cell bottom)
 
 void factor_vm::primitive_set_datastack()
 {
-	ds = array_to_stack(untag_check<array>(dpop()),ds_bot);
+	ctx->datastack = array_to_stack(untag_check<array>(ctx->pop()),ctx->datastack_region->start);
 }
 
 void factor_vm::primitive_set_retainstack()
 {
-	rs = array_to_stack(untag_check<array>(dpop()),rs_bot);
+	ctx->retainstack = array_to_stack(untag_check<array>(ctx->pop()),ctx->retainstack_region->start);
 }
 
 /* Used to implement call( */
 void factor_vm::primitive_check_datastack()
 {
-	fixnum out = to_fixnum(dpop());
-	fixnum in = to_fixnum(dpop());
+	fixnum out = to_fixnum(ctx->pop());
+	fixnum in = to_fixnum(ctx->pop());
 	fixnum height = out - in;
-	array *saved_datastack = untag_check<array>(dpop());
+	array *saved_datastack = untag_check<array>(ctx->pop());
 	fixnum saved_height = array_capacity(saved_datastack);
-	fixnum current_height = (ds - ds_bot + sizeof(cell)) / sizeof(cell);
+	fixnum current_height = (ctx->datastack - ctx->datastack_region->start + sizeof(cell)) / sizeof(cell);
 	if(current_height - height != saved_height)
-		dpush(false_object);
+		ctx->push(false_object);
 	else
 	{
-		fixnum i;
-		for(i = 0; i < saved_height - in; i++)
+		cell *ds_bot = (cell *)ctx->datastack_region->start;
+		for(fixnum i = 0; i < saved_height - in; i++)
 		{
-			if(((cell *)ds_bot)[i] != array_nth(saved_datastack,i))
+			if(ds_bot[i] != array_nth(saved_datastack,i))
 			{
-				dpush(false_object);
+				ctx->push(false_object);
 				return;
 			}
 		}
-		dpush(true_object);
+		ctx->push(true_object);
 	}
 }
 
 void factor_vm::primitive_load_locals()
 {
-	fixnum count = untag_fixnum(dpop());
-	memcpy((cell *)(rs + sizeof(cell)),(cell *)(ds - sizeof(cell) * (count - 1)),sizeof(cell) * count);
-	ds -= sizeof(cell) * count;
-	rs += sizeof(cell) * count;
+	fixnum count = untag_fixnum(ctx->pop());
+	memcpy((cell *)(ctx->retainstack + sizeof(cell)),
+		(cell *)(ctx->datastack - sizeof(cell) * (count - 1)),
+		sizeof(cell) * count);
+	ctx->datastack -= sizeof(cell) * count;
+	ctx->retainstack += sizeof(cell) * count;
 }
 
 }
diff --git a/vm/contexts.hpp b/vm/contexts.hpp
index 5b9ac3b615..14a853bf37 100644
--- a/vm/contexts.hpp
+++ b/vm/contexts.hpp
@@ -1,11 +1,7 @@
 namespace factor
 {
 
-/* Assembly code makes assumptions about the layout of this struct:
-   - callstack_top field is 0
-   - callstack_bottom field is 1
-   - datastack field is 2
-   - retainstack field is 3 */
+/* Assembly code makes assumptions about the layout of this struct */
 struct context {
 	/* C stack pointer on entry */
 	stack_frame *callstack_top;
@@ -17,12 +13,6 @@ struct context {
 	/* current retain stack top pointer */
 	cell retainstack;
 
-	/* saved contents of ds register on entry to callback */
-	cell datastack_save;
-
-	/* saved contents of rs register on entry to callback */
-	cell retainstack_save;
-
 	/* callback-bottom stack frame, or NULL for top-level context.
 	When nest_stacks() is called, callstack layout with callbacks
 	is as follows:
@@ -48,36 +38,54 @@ struct context {
 	context *next;
 
 	context(cell ds_size, cell rs_size);
+
+	cell peek()
+	{
+		return *(cell *)datastack;
+	}
+
+	void replace(cell tagged)
+	{
+		*(cell *)datastack = tagged;
+	}
+
+	cell pop()
+	{
+		cell value = peek();
+		datastack -= sizeof(cell);
+		return value;
+	}
+
+	void push(cell tagged)
+	{
+		datastack += sizeof(cell);
+		replace(tagged);
+	}
+
+	void reset_datastack()
+	{
+		datastack = datastack_region->start - sizeof(cell);
+	}
+
+	void reset_retainstack()
+	{
+		retainstack = retainstack_region->start - sizeof(cell);
+	}
+
+	static const cell stack_reserved = (64 * sizeof(cell));
+
+	void fix_stacks()
+	{
+		if(datastack + sizeof(cell) < datastack_region->start
+			|| datastack + stack_reserved >= datastack_region->end)
+			reset_datastack();
+
+		if(retainstack + sizeof(cell) < retainstack_region->start
+			|| retainstack + stack_reserved >= retainstack_region->end)
+			reset_retainstack();
+	}
 };
 
-#define ds_bot (ctx->datastack_region->start)
-#define ds_top (ctx->datastack_region->end)
-#define rs_bot (ctx->retainstack_region->start)
-#define rs_top (ctx->retainstack_region->end)
-
-inline cell dpeek()
-{
-	return *(cell *)ds;
-}
-
-inline void drepl(cell tagged)
-{
-	*(cell *)ds = tagged;
-}
-
-inline cell dpop()
-{
-	cell value = dpeek();
-	ds -= sizeof(cell);
-	return value;
-}
-
-inline void dpush(cell tagged)
-{
-	ds += sizeof(cell);
-	drepl(tagged);
-}
-
 VM_C_API void nest_stacks(stack_frame *magic_frame, factor_vm *vm);
 VM_C_API void unnest_stacks(factor_vm *vm);
 
diff --git a/vm/cpu-x86.32.S b/vm/cpu-x86.32.S
index b9e60b06a9..6c1e0cfc93 100644
--- a/vm/cpu-x86.32.S
+++ b/vm/cpu-x86.32.S
@@ -5,6 +5,7 @@
 #define ARG2 %ecx
 #define STACK_REG %esp
 #define DS_REG %esi
+#define RS_REG %edi
 #define RETURN_REG %eax
 
 #define NV0 %ebx
@@ -15,22 +16,24 @@
 
 #define PUSH_NONVOLATILE \
 	push %ebx ; \
-	push %ebp
+	push %ebp ; \
+	push %esi ; \
+	push %edi
 
 #define POP_NONVOLATILE \
+	pop %edi ; \
+	pop %esi ; \
 	pop %ebp ; \
 	pop %ebx
 
 #define QUOT_XT_OFFSET 12
 
-/* We pass a function pointer to memcpy to work around a Mac OS X
-ABI limitation which would otherwise require us to do a bizzaro PC-relative
-trampoline to retrieve the function address */
-DEF(void,set_callstack,(F_STACK_FRAME *to, F_STACK_FRAME *from, CELL length, void *memcpy)):
-	mov 4(%esp),%ebp                   /* to */
-	mov 8(%esp),%edx                   /* from */
-	mov 12(%esp),%ecx                  /* length */
-	mov 16(%esp),%eax                  /* memcpy */
+DEF(void,set_callstack,(void *vm, stack_frame *to, stack_frame *from, cell length, void *memcpy)):
+	mov 4(%esp),%ebx                   /* vm */
+	mov 8(%esp),%ebp                   /* to */
+	mov 12(%esp),%edx                  /* from */
+	mov 16(%esp),%ecx                  /* length */
+	mov 20(%esp),%eax                  /* memcpy */
 	sub %ecx,%ebp                      /* compute new stack pointer */
 	mov %ebp,%esp
 	push %ecx                          /* pass length */
@@ -38,29 +41,50 @@ DEF(void,set_callstack,(F_STACK_FRAME *to, F_STACK_FRAME *from, CELL length, voi
 	push %ebp                          /* pass dst */
 	call *%eax                         /* call memcpy */
 	add $12,%esp                       /* pop args from the stack */
+	mov (%ebx),%ebx                    /* load context */
+	mov 8(%ebx),DS_REG                 /* load datastack */
+	mov 12(%ebx),RS_REG                /* load retainstack */
 	ret                                /* return _with new stack_ */
 
-DEF(F_FASTCALL void,throw_impl,(CELL quot, F_STACK_FRAME *rewind_to, void *vm)):
-	mov ARG2,NV0  /* remember vm ptr in case quot_xt = lazy_jit_compile */		
+DEF(void,throw_impl,(cell quot, void *new_stack, void *vm)):
 	/* clear x87 stack, but preserve rounding mode and exception flags */
-	sub $2,STACK_REG
-	fnstcw (STACK_REG)
+	sub $2,%esp
+	fnstcw (%esp)
 	fninit
-	fldcw (STACK_REG)
-	/* rewind_to */
-	mov ARG1,STACK_REG
-	mov NV0,ARG1
-	jmp *QUOT_XT_OFFSET(ARG0)
+	fldcw (%esp)
+	add $2,%esp
+	/* load quotation and vm parameters */
+	mov 4(%esp),%eax
+	mov 12(%esp),%edx
+	/* load new stack pointer */
+	mov 8(%esp),%esp
+	/* load context */
+	mov (%edx),%ebx
+	/* load datastack */
+	mov 8(%ebx),DS_REG
+	/* load retainstack */
+	mov 12(%ebx),RS_REG
+	/* call the error handler */
+	jmp *QUOT_XT_OFFSET(%eax)
 
-DEF(F_FASTCALL void,lazy_jit_compile,(CELL quot, void *vm)):
-	mov ARG1,ARG2
-	mov STACK_REG,ARG1           /* Save stack pointer */
-	sub $STACK_PADDING,STACK_REG
+DEF(VM_ASM_API void,lazy_jit_compile,(cell quot, void *vm)):
+	/* load context */
+	mov (ARG1),%ebx
+	/* save callstack */
+	lea -4(%esp),%ebp
+	mov %ebp,(%ebx)
+	/* save datastack */
+	mov DS_REG,8(%ebx)
+	/* save retainstack */
+	mov RS_REG,12(%ebx)
+	/* compile quotation */
+	sub $4,%esp
+	push ARG1
+	push ARG0
 	call MANGLE(lazy_jit_compile_impl)
-	mov RETURN_REG,ARG0          /* No-op on 32-bit */
-	add $STACK_PADDING,STACK_REG
-    jmp *QUOT_XT_OFFSET(ARG0)    /* Call the quotation */
-
+	add $12,%esp
+	/* call quotation */
+	jmp *QUOT_XT_OFFSET(%eax)
 
 DEF(long long,read_timestamp_counter,(void)):
 	rdtsc
diff --git a/vm/cpu-x86.32.hpp b/vm/cpu-x86.32.hpp
index e740771470..24b1761472 100644
--- a/vm/cpu-x86.32.hpp
+++ b/vm/cpu-x86.32.hpp
@@ -2,9 +2,6 @@ namespace factor
 {
 
 #define FACTOR_CPU_STRING "x86.32"
-
-register cell ds asm("esi");
-register cell rs asm("edi");
-
 #define VM_ASM_API VM_C_API __attribute__ ((regparm (3)))
+
 }
diff --git a/vm/cpu-x86.S b/vm/cpu-x86.S
index 87e45c75db..c148f7e5eb 100644
--- a/vm/cpu-x86.S
+++ b/vm/cpu-x86.S
@@ -12,9 +12,18 @@ DEF(F_FASTCALL void,c_to_factor,(CELL quot, void *vm)):
 	/* Create register shadow area for Win64 */
 	sub $32,STACK_REG
 
-	/* Save stack pointer */
-	lea -CELL_SIZE(STACK_REG),ARG0
-	call MANGLE(save_callstack_bottom)
+	/* Load context */
+    mov (NV1),ARG0
+
+    /* Save ctx->callstack_bottom */
+	lea -CELL_SIZE(STACK_REG),ARG1
+    mov ARG1,CELL_SIZE(ARG0)
+
+    /* Load ctx->datastack */
+    mov (CELL_SIZE * 2)(ARG0),DS_REG
+
+    /* Load ctx->retainstack */
+    mov (CELL_SIZE * 3)(ARG0),RS_REG
 
 	/* Call quot-xt */
 	mov NV0,ARG0
diff --git a/vm/cpu-x86.hpp b/vm/cpu-x86.hpp
index 4eed59dd78..6c63edf212 100644
--- a/vm/cpu-x86.hpp
+++ b/vm/cpu-x86.hpp
@@ -75,10 +75,12 @@ inline static unsigned int fpu_status(unsigned int status)
 
 /* Defined in assembly */
 VM_ASM_API void c_to_factor(cell quot, void *vm);
-VM_ASM_API void throw_impl(cell quot, stack_frame *rewind_to, void *vm);
+VM_C_API void throw_impl(cell quot, void *new_stack, void *vm);
 VM_ASM_API void lazy_jit_compile(cell quot, void *vm);
 
-VM_C_API void set_callstack(stack_frame *to,
+VM_C_API void set_callstack(
+	void *vm,
+	stack_frame *to,
 	stack_frame *from,
 	cell length,
 	void *(*memcpy)(void*,const void*, size_t));
diff --git a/vm/data_heap.cpp b/vm/data_heap.cpp
index 8e732830e0..f5946d648b 100755
--- a/vm/data_heap.cpp
+++ b/vm/data_heap.cpp
@@ -230,7 +230,7 @@ data_heap_room factor_vm::data_room()
 void factor_vm::primitive_data_room()
 {
 	data_heap_room room = data_room();
-	dpush(tag<byte_array>(byte_array_from_value(&room)));
+	ctx->push(tag<byte_array>(byte_array_from_value(&room)));
 }
 
 struct object_accumulator {
@@ -265,7 +265,7 @@ cell factor_vm::instances(cell type)
 void factor_vm::primitive_all_instances()
 {
 	primitive_full_gc();
-	dpush(instances(TYPE_COUNT));
+	ctx->push(instances(TYPE_COUNT));
 }
 
 }
diff --git a/vm/debug.cpp b/vm/debug.cpp
index a19e8fe34e..dc87c5f301 100755
--- a/vm/debug.cpp
+++ b/vm/debug.cpp
@@ -145,13 +145,13 @@ void factor_vm::print_objects(cell *start, cell *end)
 void factor_vm::print_datastack()
 {
 	std::cout << "==== DATA STACK:\n";
-	print_objects((cell *)ds_bot,(cell *)ds);
+	print_objects((cell *)ctx->datastack_region->start,(cell *)ctx->datastack);
 }
 
 void factor_vm::print_retainstack()
 {
 	std::cout << "==== RETAIN STACK:\n";
-	print_objects((cell *)rs_bot,(cell *)rs);
+	print_objects((cell *)ctx->retainstack_region->start,(cell *)ctx->retainstack);
 }
 
 struct stack_frame_printer {
@@ -421,9 +421,9 @@ void factor_vm::factorbug()
 		else if(strcmp(cmd,"t") == 0)
 			full_output = !full_output;
 		else if(strcmp(cmd,"s") == 0)
-			dump_memory(ds_bot,ds);
+			dump_memory(ctx->datastack_region->start,ctx->datastack);
 		else if(strcmp(cmd,"r") == 0)
-			dump_memory(rs_bot,rs);
+			dump_memory(ctx->retainstack_region->start,ctx->retainstack);
 		else if(strcmp(cmd,".s") == 0)
 			print_datastack();
 		else if(strcmp(cmd,".r") == 0)
@@ -459,7 +459,7 @@ void factor_vm::factorbug()
 		else if(strcmp(cmd,"push") == 0)
 		{
 			cell addr = read_cell_hex();
-			dpush(addr);
+			ctx->push(addr);
 		}
 		else if(strcmp(cmd,"code") == 0)
 			dump_code_heap();
diff --git a/vm/dispatch.cpp b/vm/dispatch.cpp
index 9ee9957819..b0f9159da7 100755
--- a/vm/dispatch.cpp
+++ b/vm/dispatch.cpp
@@ -88,9 +88,9 @@ cell factor_vm::lookup_method(cell obj, cell methods)
 
 void factor_vm::primitive_lookup_method()
 {
-	cell methods = dpop();
-	cell obj = dpop();
-	dpush(lookup_method(obj,methods));
+	cell methods = ctx->pop();
+	cell obj = ctx->pop();
+	ctx->push(lookup_method(obj,methods));
 }
 
 cell factor_vm::object_class(cell obj)
@@ -120,17 +120,17 @@ void factor_vm::primitive_mega_cache_miss()
 {
 	dispatch_stats.megamorphic_cache_misses++;
 
-	cell cache = dpop();
-	fixnum index = untag_fixnum(dpop());
-	cell methods = dpop();
+	cell cache = ctx->pop();
+	fixnum index = untag_fixnum(ctx->pop());
+	cell methods = ctx->pop();
 
-	cell object = ((cell *)ds)[-index];
+	cell object = ((cell *)ctx->datastack)[-index];
 	cell klass = object_class(object);
 	cell method = lookup_method(object,methods);
 
 	update_method_cache(cache,klass,method);
 
-	dpush(method);
+	ctx->push(method);
 }
 
 void factor_vm::primitive_reset_dispatch_stats()
@@ -140,7 +140,7 @@ void factor_vm::primitive_reset_dispatch_stats()
 
 void factor_vm::primitive_dispatch_stats()
 {
-	dpush(tag<byte_array>(byte_array_from_value(&dispatch_stats)));
+	ctx->push(tag<byte_array>(byte_array_from_value(&dispatch_stats)));
 }
 
 void quotation_jit::emit_mega_cache_lookup(cell methods_, fixnum index, cell cache_)
diff --git a/vm/errors.cpp b/vm/errors.cpp
index 7d7b1f0080..2292c27693 100755
--- a/vm/errors.cpp
+++ b/vm/errors.cpp
@@ -43,9 +43,9 @@ void factor_vm::throw_error(cell error, stack_frame *callstack_top)
 
 		/* If we had an underflow or overflow, stack pointers might be
 		out of bounds */
-		fix_stacks();
+		ctx->fix_stacks();
 
-		dpush(error);
+		ctx->push(error);
 
 		/* Errors thrown from C code pass NULL for this parameter.
 		Errors thrown from Factor code, or signal handlers, pass the
@@ -99,13 +99,13 @@ bool factor_vm::in_page(cell fault, cell area, cell area_size, int offset)
 
 void factor_vm::memory_protection_error(cell addr, stack_frame *native_stack)
 {
-	if(in_page(addr, ds_bot, 0, -1))
+	if(in_page(addr, ctx->datastack_region->start, 0, -1))
 		general_error(ERROR_DS_UNDERFLOW,false_object,false_object,native_stack);
-	else if(in_page(addr, ds_bot, ds_size, 0))
+	else if(in_page(addr, ctx->datastack_region->start, ds_size, 0))
 		general_error(ERROR_DS_OVERFLOW,false_object,false_object,native_stack);
-	else if(in_page(addr, rs_bot, 0, -1))
+	else if(in_page(addr, ctx->retainstack_region->start, 0, -1))
 		general_error(ERROR_RS_UNDERFLOW,false_object,false_object,native_stack);
-	else if(in_page(addr, rs_bot, rs_size, 0))
+	else if(in_page(addr, ctx->retainstack_region->start, rs_size, 0))
 		general_error(ERROR_RS_OVERFLOW,false_object,false_object,native_stack);
 	else if(in_page(addr, nursery.end, 0, 0))
 		critical_error("allot_object() missed GC check",0);
@@ -130,7 +130,7 @@ void factor_vm::fp_trap_error(unsigned int fpu_status, stack_frame *signal_calls
 
 void factor_vm::primitive_call_clear()
 {
-	throw_impl(dpop(),ctx->callstack_bottom,this);
+	throw_impl(ctx->pop(),ctx->callstack_bottom,this);
 }
 
 /* For testing purposes */
diff --git a/vm/factor.cpp b/vm/factor.cpp
index 39088500ad..04961ff505 100755
--- a/vm/factor.cpp
+++ b/vm/factor.cpp
@@ -152,11 +152,9 @@ void factor_vm::init_factor(vm_parameters *p)
 void factor_vm::pass_args_to_factor(int argc, vm_char **argv)
 {
 	growable_array args(this);
-	int i;
 
-	for(i = 1; i < argc; i++){
+	for(fixnum i = 1; i < argc; i++)
 		args.add(allot_alien(false_object,(cell)argv[i]));
-	}
 
 	args.trim();
 	special_objects[OBJ_ARGS] = args.elements.value();
diff --git a/vm/gc.cpp b/vm/gc.cpp
index f8a9fd45b8..96eab18c47 100755
--- a/vm/gc.cpp
+++ b/vm/gc.cpp
@@ -131,8 +131,6 @@ void factor_vm::gc(gc_op op, cell requested_bytes, bool trace_contexts_p)
 	assert(!gc_off);
 	assert(!current_gc);
 
-	save_stacks();
-
 	current_gc = new gc_state(op,this);
 
 	/* Keep trying to GC higher and higher generations until we don't run out
@@ -277,12 +275,12 @@ void factor_vm::primitive_disable_gc_events()
 		}
 
 		result.trim();
-		dpush(result.elements.value());
+		ctx->push(result.elements.value());
 
 		delete this->gc_events;
 	}
 	else
-		dpush(false_object);
+		ctx->push(false_object);
 }
 
 }
diff --git a/vm/image.cpp b/vm/image.cpp
index 1afd3d0b1b..cda07c9b0e 100755
--- a/vm/image.cpp
+++ b/vm/image.cpp
@@ -314,7 +314,7 @@ void factor_vm::primitive_save_image()
 	/* do a full GC to push everything into tenured space */
 	primitive_compact_gc();
 
-	data_root<byte_array> path(dpop(),this);
+	data_root<byte_array> path(ctx->pop(),this);
 	path.untag_check(this);
 	save_image((vm_char *)(path.untagged() + 1));
 }
@@ -324,7 +324,7 @@ void factor_vm::primitive_save_image_and_exit()
 	/* We unbox this before doing anything else. This is the only point
 	where we might throw an error, so we have to throw an error here since
 	later steps destroy the current image. */
-	data_root<byte_array> path(dpop(),this);
+	data_root<byte_array> path(ctx->pop(),this);
 	path.untag_check(this);
 
 	/* strip out special_objects data which is set on startup anyway */
diff --git a/vm/inline_cache.cpp b/vm/inline_cache.cpp
index 6923a650e7..092b210cc8 100755
--- a/vm/inline_cache.cpp
+++ b/vm/inline_cache.cpp
@@ -198,11 +198,11 @@ void *factor_vm::inline_cache_miss(cell return_address_)
 		<< std::endl;
 #endif
 
-	data_root<array> cache_entries(dpop(),this);
-	fixnum index = untag_fixnum(dpop());
-	data_root<array> methods(dpop(),this);
-	data_root<word> generic_word(dpop(),this);
-	data_root<object> object(((cell *)ds)[-index],this);
+	data_root<array> cache_entries(ctx->pop(),this);
+	fixnum index = untag_fixnum(ctx->pop());
+	data_root<array> methods(ctx->pop(),this);
+	data_root<word> generic_word(ctx->pop(),this);
+	data_root<object> object(((cell *)ctx->datastack)[-index],this);
 
 	cell pic_size = inline_cache_size(cache_entries.value());
 
diff --git a/vm/io.cpp b/vm/io.cpp
index a8f9cb6897..a45e1d10ab 100755
--- a/vm/io.cpp
+++ b/vm/io.cpp
@@ -33,8 +33,8 @@ void factor_vm::io_error()
 
 void factor_vm::primitive_fopen()
 {
-	data_root<byte_array> mode(dpop(),this);
-	data_root<byte_array> path(dpop(),this);
+	data_root<byte_array> mode(ctx->pop(),this);
+	data_root<byte_array> path(ctx->pop(),this);
 	mode.untag_check(this);
 	path.untag_check(this);
 
@@ -46,15 +46,20 @@ void factor_vm::primitive_fopen()
 			io_error();
 		else
 		{
-			box_alien(file);
+			ctx->push(allot_alien(file));
 			break;
 		}
 	}
 }
 
+FILE *factor_vm::pop_file_handle()
+{
+	return (FILE *)alien_offset(ctx->pop());
+}
+
 void factor_vm::primitive_fgetc()
 {
-	FILE *file = (FILE *)unbox_alien();
+	FILE *file = pop_file_handle();
 
 	for(;;)
 	{
@@ -63,7 +68,7 @@ void factor_vm::primitive_fgetc()
 		{
 			if(feof(file))
 			{
-				dpush(false_object);
+				ctx->push(false_object);
 				break;
 			}
 			else
@@ -71,7 +76,7 @@ void factor_vm::primitive_fgetc()
 		}
 		else
 		{
-			dpush(tag_fixnum(c));
+			ctx->push(tag_fixnum(c));
 			break;
 		}
 	}
@@ -79,12 +84,12 @@ void factor_vm::primitive_fgetc()
 
 void factor_vm::primitive_fread()
 {
-	FILE *file = (FILE *)unbox_alien();
+	FILE *file = pop_file_handle();
 	fixnum size = unbox_array_size();
 
 	if(size == 0)
 	{
-		dpush(tag<string>(allot_string(0,0)));
+		ctx->push(tag<string>(allot_string(0,0)));
 		return;
 	}
 
@@ -97,7 +102,7 @@ void factor_vm::primitive_fread()
 		{
 			if(feof(file))
 			{
-				dpush(false_object);
+				ctx->push(false_object);
 				break;
 			}
 			else
@@ -111,7 +116,7 @@ void factor_vm::primitive_fread()
 				memcpy(new_buf + 1, buf.untagged() + 1,c);
 				buf = new_buf;
 			}
-			dpush(buf.value());
+			ctx->push(buf.value());
 			break;
 		}
 	}
@@ -119,8 +124,8 @@ void factor_vm::primitive_fread()
 
 void factor_vm::primitive_fputc()
 {
-	FILE *file = (FILE *)unbox_alien();
-	fixnum ch = to_fixnum(dpop());
+	FILE *file = pop_file_handle();
+	fixnum ch = to_fixnum(ctx->pop());
 
 	for(;;)
 	{
@@ -137,8 +142,8 @@ void factor_vm::primitive_fputc()
 
 void factor_vm::primitive_fwrite()
 {
-	FILE *file = (FILE *)unbox_alien();
-	byte_array *text = untag_check<byte_array>(dpop());
+	FILE *file = pop_file_handle();
+	byte_array *text = untag_check<byte_array>(ctx->pop());
 	cell length = array_capacity(text);
 	char *string = (char *)(text + 1);
 
@@ -166,20 +171,20 @@ void factor_vm::primitive_fwrite()
 
 void factor_vm::primitive_ftell()
 {
-	FILE *file = (FILE *)unbox_alien();
+	FILE *file = pop_file_handle();
 	off_t offset;
 
 	if((offset = FTELL(file)) == -1)
 		io_error();
 
-	box_signed_8(offset);
+	ctx->push(from_signed_8(offset));
 }
 
 void factor_vm::primitive_fseek()
 {
-	int whence = to_fixnum(dpop());
-	FILE *file = (FILE *)unbox_alien();
-	off_t offset = to_signed_8(dpop());
+	int whence = to_fixnum(ctx->pop());
+	FILE *file = pop_file_handle();
+	off_t offset = to_signed_8(ctx->pop());
 
 	switch(whence)
 	{
@@ -202,7 +207,7 @@ void factor_vm::primitive_fseek()
 
 void factor_vm::primitive_fflush()
 {
-	FILE *file = (FILE *)unbox_alien();
+	FILE *file = pop_file_handle();
 	for(;;)
 	{
 		if(fflush(file) == EOF)
@@ -214,7 +219,7 @@ void factor_vm::primitive_fflush()
 
 void factor_vm::primitive_fclose()
 {
-	FILE *file = (FILE *)unbox_alien();
+	FILE *file = pop_file_handle();
 	for(;;)
 	{
 		if(fclose(file) == EOF)
diff --git a/vm/math.cpp b/vm/math.cpp
index 4266edc09c..4deb3b3f08 100755
--- a/vm/math.cpp
+++ b/vm/math.cpp
@@ -5,40 +5,40 @@ namespace factor
 
 void factor_vm::primitive_bignum_to_fixnum()
 {
-	drepl(tag_fixnum(bignum_to_fixnum(untag<bignum>(dpeek()))));
+	ctx->replace(tag_fixnum(bignum_to_fixnum(untag<bignum>(ctx->peek()))));
 }
 
 void factor_vm::primitive_float_to_fixnum()
 {
-	drepl(tag_fixnum(float_to_fixnum(dpeek())));
+	ctx->replace(tag_fixnum(float_to_fixnum(ctx->peek())));
 }
 
 /* Division can only overflow when we are dividing the most negative fixnum
 by -1. */
 void factor_vm::primitive_fixnum_divint()
 {
-	fixnum y = untag_fixnum(dpop()); \
-	fixnum x = untag_fixnum(dpeek());
+	fixnum y = untag_fixnum(ctx->pop()); \
+	fixnum x = untag_fixnum(ctx->peek());
 	fixnum result = x / y;
 	if(result == -fixnum_min)
-		drepl(allot_integer(-fixnum_min));
+		ctx->replace(allot_integer(-fixnum_min));
 	else
-		drepl(tag_fixnum(result));
+		ctx->replace(tag_fixnum(result));
 }
 
 void factor_vm::primitive_fixnum_divmod()
 {
-	cell y = ((cell *)ds)[0];
-	cell x = ((cell *)ds)[-1];
+	cell y = ((cell *)ctx->datastack)[0];
+	cell x = ((cell *)ctx->datastack)[-1];
 	if(y == tag_fixnum(-1) && x == tag_fixnum(fixnum_min))
 	{
-		((cell *)ds)[-1] = allot_integer(-fixnum_min);
-		((cell *)ds)[0] = tag_fixnum(0);
+		((cell *)ctx->datastack)[-1] = allot_integer(-fixnum_min);
+		((cell *)ctx->datastack)[0] = tag_fixnum(0);
 	}
 	else
 	{
-		((cell *)ds)[-1] = tag_fixnum(untag_fixnum(x) / untag_fixnum(y));
-		((cell *)ds)[0] = (fixnum)x % (fixnum)y;
+		((cell *)ctx->datastack)[-1] = tag_fixnum(untag_fixnum(x) / untag_fixnum(y));
+		((cell *)ctx->datastack)[0] = (fixnum)x % (fixnum)y;
 	}
 }
 
@@ -63,15 +63,15 @@ inline fixnum factor_vm::branchless_abs(fixnum x)
 
 void factor_vm::primitive_fixnum_shift()
 {
-	fixnum y = untag_fixnum(dpop());
-	fixnum x = untag_fixnum(dpeek());
+	fixnum y = untag_fixnum(ctx->pop());
+	fixnum x = untag_fixnum(ctx->peek());
 
 	if(x == 0)
 		return;
 	else if(y < 0)
 	{
 		y = branchless_max(y,-WORD_SIZE + 1);
-		drepl(tag_fixnum(x >> -y));
+		ctx->replace(tag_fixnum(x >> -y));
 		return;
 	}
 	else if(y < WORD_SIZE - TAG_BITS)
@@ -79,57 +79,57 @@ void factor_vm::primitive_fixnum_shift()
 		fixnum mask = -((fixnum)1 << (WORD_SIZE - 1 - TAG_BITS - y));
 		if(!(branchless_abs(x) & mask))
 		{
-			drepl(tag_fixnum(x << y));
+			ctx->replace(tag_fixnum(x << y));
 			return;
 		}
 	}
 
-	drepl(tag<bignum>(bignum_arithmetic_shift(
+	ctx->replace(tag<bignum>(bignum_arithmetic_shift(
 		fixnum_to_bignum(x),y)));
 }
 
 void factor_vm::primitive_fixnum_to_bignum()
 {
-	drepl(tag<bignum>(fixnum_to_bignum(untag_fixnum(dpeek()))));
+	ctx->replace(tag<bignum>(fixnum_to_bignum(untag_fixnum(ctx->peek()))));
 }
 
 void factor_vm::primitive_float_to_bignum()
 {
-	drepl(tag<bignum>(float_to_bignum(dpeek())));
+	ctx->replace(tag<bignum>(float_to_bignum(ctx->peek())));
 }
 
 #define POP_BIGNUMS(x,y) \
-	bignum * y = untag<bignum>(dpop()); \
-	bignum * x = untag<bignum>(dpop());
+	bignum * y = untag<bignum>(ctx->pop()); \
+	bignum * x = untag<bignum>(ctx->pop());
 
 void factor_vm::primitive_bignum_eq()
 {
 	POP_BIGNUMS(x,y);
-	box_boolean(bignum_equal_p(x,y));
+	ctx->push(tag_boolean(bignum_equal_p(x,y)));
 }
 
 void factor_vm::primitive_bignum_add()
 {
 	POP_BIGNUMS(x,y);
-	dpush(tag<bignum>(bignum_add(x,y)));
+	ctx->push(tag<bignum>(bignum_add(x,y)));
 }
 
 void factor_vm::primitive_bignum_subtract()
 {
 	POP_BIGNUMS(x,y);
-	dpush(tag<bignum>(bignum_subtract(x,y)));
+	ctx->push(tag<bignum>(bignum_subtract(x,y)));
 }
 
 void factor_vm::primitive_bignum_multiply()
 {
 	POP_BIGNUMS(x,y);
-	dpush(tag<bignum>(bignum_multiply(x,y)));
+	ctx->push(tag<bignum>(bignum_multiply(x,y)));
 }
 
 void factor_vm::primitive_bignum_divint()
 {
 	POP_BIGNUMS(x,y);
-	dpush(tag<bignum>(bignum_quotient(x,y)));
+	ctx->push(tag<bignum>(bignum_quotient(x,y)));
 }
 
 void factor_vm::primitive_bignum_divmod()
@@ -137,85 +137,85 @@ void factor_vm::primitive_bignum_divmod()
 	bignum *q, *r;
 	POP_BIGNUMS(x,y);
 	bignum_divide(x,y,&q,&r);
-	dpush(tag<bignum>(q));
-	dpush(tag<bignum>(r));
+	ctx->push(tag<bignum>(q));
+	ctx->push(tag<bignum>(r));
 }
 
 void factor_vm::primitive_bignum_mod()
 {
 	POP_BIGNUMS(x,y);
-	dpush(tag<bignum>(bignum_remainder(x,y)));
+	ctx->push(tag<bignum>(bignum_remainder(x,y)));
 }
 
 void factor_vm::primitive_bignum_and()
 {
 	POP_BIGNUMS(x,y);
-	dpush(tag<bignum>(bignum_bitwise_and(x,y)));
+	ctx->push(tag<bignum>(bignum_bitwise_and(x,y)));
 }
 
 void factor_vm::primitive_bignum_or()
 {
 	POP_BIGNUMS(x,y);
-	dpush(tag<bignum>(bignum_bitwise_ior(x,y)));
+	ctx->push(tag<bignum>(bignum_bitwise_ior(x,y)));
 }
 
 void factor_vm::primitive_bignum_xor()
 {
 	POP_BIGNUMS(x,y);
-	dpush(tag<bignum>(bignum_bitwise_xor(x,y)));
+	ctx->push(tag<bignum>(bignum_bitwise_xor(x,y)));
 }
 
 void factor_vm::primitive_bignum_shift()
 {
-	fixnum y = untag_fixnum(dpop());
-        bignum* x = untag<bignum>(dpop());
-	dpush(tag<bignum>(bignum_arithmetic_shift(x,y)));
+	fixnum y = untag_fixnum(ctx->pop());
+        bignum* x = untag<bignum>(ctx->pop());
+	ctx->push(tag<bignum>(bignum_arithmetic_shift(x,y)));
 }
 
 void factor_vm::primitive_bignum_less()
 {
 	POP_BIGNUMS(x,y);
-	box_boolean(bignum_compare(x,y) == bignum_comparison_less);
+	ctx->push(tag_boolean(bignum_compare(x,y) == bignum_comparison_less));
 }
 
 void factor_vm::primitive_bignum_lesseq()
 {
 	POP_BIGNUMS(x,y);
-	box_boolean(bignum_compare(x,y) != bignum_comparison_greater);
+	ctx->push(tag_boolean(bignum_compare(x,y) != bignum_comparison_greater));
 }
 
 void factor_vm::primitive_bignum_greater()
 {
 	POP_BIGNUMS(x,y);
-	box_boolean(bignum_compare(x,y) == bignum_comparison_greater);
+	ctx->push(tag_boolean(bignum_compare(x,y) == bignum_comparison_greater));
 }
 
 void factor_vm::primitive_bignum_greatereq()
 {
 	POP_BIGNUMS(x,y);
-	box_boolean(bignum_compare(x,y) != bignum_comparison_less);
+	ctx->push(tag_boolean(bignum_compare(x,y) != bignum_comparison_less));
 }
 
 void factor_vm::primitive_bignum_not()
 {
-	drepl(tag<bignum>(bignum_bitwise_not(untag<bignum>(dpeek()))));
+	ctx->replace(tag<bignum>(bignum_bitwise_not(untag<bignum>(ctx->peek()))));
 }
 
 void factor_vm::primitive_bignum_bitp()
 {
-	fixnum bit = to_fixnum(dpop());
-	bignum *x = untag<bignum>(dpop());
-	box_boolean(bignum_logbitp(bit,x));
+	fixnum bit = to_fixnum(ctx->pop());
+	bignum *x = untag<bignum>(ctx->pop());
+	ctx->push(tag_boolean(bignum_logbitp(bit,x)));
 }
 
 void factor_vm::primitive_bignum_log2()
 {
-	drepl(tag<bignum>(bignum_integer_length(untag<bignum>(dpeek()))));
+	ctx->replace(tag<bignum>(bignum_integer_length(untag<bignum>(ctx->peek()))));
 }
 
 unsigned int factor_vm::bignum_producer(unsigned int digit)
 {
-	unsigned char *ptr = (unsigned char *)alien_offset(dpeek());
+	unsigned char *ptr = (unsigned char *)alien_offset(ctx->peek());
 	return *(ptr + digit);
 }
 
@@ -226,145 +226,146 @@ unsigned int bignum_producer(unsigned int digit, factor_vm *parent)
 
 void factor_vm::primitive_byte_array_to_bignum()
 {
-	cell n_digits = array_capacity(untag_check<byte_array>(dpeek()));
+	cell n_digits = array_capacity(untag_check<byte_array>(ctx->peek()));
 	bignum * result = digit_stream_to_bignum(n_digits,factor::bignum_producer,0x100,0);
-	drepl(tag<bignum>(result));
+	ctx->replace(tag<bignum>(result));
 }
 
 cell factor_vm::unbox_array_size_slow()
 {
-	if(tagged<object>(dpeek()).type() == BIGNUM_TYPE)
+	if(tagged<object>(ctx->peek()).type() == BIGNUM_TYPE)
 	{
 		bignum *zero = untag<bignum>(bignum_zero);
 		bignum *max = cell_to_bignum(array_size_max);
-		bignum *n = untag<bignum>(dpeek());
+		bignum *n = untag<bignum>(ctx->peek());
 		if(bignum_compare(n,zero) != bignum_comparison_less
 			&& bignum_compare(n,max) == bignum_comparison_less)
 		{
-			dpop();
+			ctx->pop();
 			return bignum_to_cell(n);
 		}
 	}
 
-	general_error(ERROR_ARRAY_SIZE,dpop(),tag_fixnum(array_size_max),NULL);
+	general_error(ERROR_ARRAY_SIZE,ctx->pop(),tag_fixnum(array_size_max),NULL);
 	return 0; /* can't happen */
 }
 
 void factor_vm::primitive_fixnum_to_float()
 {
-	drepl(allot_float(fixnum_to_float(dpeek())));
+	ctx->replace(allot_float(fixnum_to_float(ctx->peek())));
 }
 
 void factor_vm::primitive_bignum_to_float()
 {
-	drepl(allot_float(bignum_to_float(dpeek())));
+	ctx->replace(allot_float(bignum_to_float(ctx->peek())));
 }
 
 void factor_vm::primitive_str_to_float()
 {
-	byte_array *bytes = untag_check<byte_array>(dpeek());
+	byte_array *bytes = untag_check<byte_array>(ctx->peek());
 	cell capacity = array_capacity(bytes);
 
 	char *c_str = (char *)(bytes + 1);
 	char *end = c_str;
 	double f = strtod(c_str,&end);
 	if(end == c_str + capacity - 1)
-		drepl(allot_float(f));
+		ctx->replace(allot_float(f));
 	else
-		drepl(false_object);
+		ctx->replace(false_object);
 }
 
 void factor_vm::primitive_float_to_str()
 {
 	byte_array *array = allot_byte_array(33);
-	snprintf((char *)(array + 1),32,"%.16g",untag_float_check(dpop()));
-	dpush(tag<byte_array>(array));
+	snprintf((char *)(array + 1),32,"%.16g",untag_float_check(ctx->pop()));
+	ctx->push(tag<byte_array>(array));
 }
 
 #define POP_FLOATS(x,y) \
-	double y = untag_float(dpop()); \
-	double x = untag_float(dpop());
+	double y = untag_float(ctx->pop()); \
+	double x = untag_float(ctx->pop());
 
 void factor_vm::primitive_float_eq()
 {
 	POP_FLOATS(x,y);
-	box_boolean(x == y);
+	ctx->push(tag_boolean(x == y));
 }
 
 void factor_vm::primitive_float_add()
 {
 	POP_FLOATS(x,y);
-	box_double(x + y);
+	ctx->push(allot_float(x + y));
 }
 
 void factor_vm::primitive_float_subtract()
 {
 	POP_FLOATS(x,y);
-	box_double(x - y);
+	ctx->push(allot_float(x - y));
 }
 
 void factor_vm::primitive_float_multiply()
 {
 	POP_FLOATS(x,y);
-	box_double(x * y);
+	ctx->push(allot_float(x * y));
 }
 
 void factor_vm::primitive_float_divfloat()
 {
 	POP_FLOATS(x,y);
-	box_double(x / y);
+	ctx->push(allot_float(x / y));
 }
 
 void factor_vm::primitive_float_mod()
 {
 	POP_FLOATS(x,y);
-	box_double(fmod(x,y));
+	ctx->push(allot_float(fmod(x,y)));
 }
 
 void factor_vm::primitive_float_less()
 {
 	POP_FLOATS(x,y);
-	box_boolean(x < y);
+	ctx->push(tag_boolean(x < y));
 }
 
 void factor_vm::primitive_float_lesseq()
 {
 	POP_FLOATS(x,y);
-	box_boolean(x <= y);
+	ctx->push(tag_boolean(x <= y));
 }
 
 void factor_vm::primitive_float_greater()
 {
 	POP_FLOATS(x,y);
-	box_boolean(x > y);
+	ctx->push(tag_boolean(x > y));
 }
 
 void factor_vm::primitive_float_greatereq()
 {
 	POP_FLOATS(x,y);
-	box_boolean(x >= y);
+	ctx->push(tag_boolean(x >= y));
 }
 
 void factor_vm::primitive_float_bits()
 {
-	box_unsigned_4(float_bits(untag_float_check(dpop())));
+	ctx->push(from_unsigned_4(float_bits(untag_float_check(ctx->pop()))));
 }
 
 void factor_vm::primitive_bits_float()
 {
-	box_float(bits_float(to_cell(dpop())));
+	ctx->push(allot_float(bits_float(to_cell(ctx->pop()))));
 }
 
 void factor_vm::primitive_double_bits()
 {
-	box_unsigned_8(double_bits(untag_float_check(dpop())));
+	ctx->push(from_unsigned_8(double_bits(untag_float_check(ctx->pop()))));
 }
 
 void factor_vm::primitive_bits_double()
 {
-	box_double(bits_double(to_unsigned_8(dpop())));
+	ctx->push(allot_float(bits_double(to_unsigned_8(ctx->pop()))));
 }
 
+/* Cannot allocate */
 fixnum factor_vm::to_fixnum(cell tagged)
 {
 	switch(TAG(tagged))
@@ -394,99 +395,100 @@ VM_C_API cell to_cell(cell tagged, factor_vm *parent)
 	return parent->to_cell(tagged);
 }
 
-void factor_vm::box_signed_1(s8 n)
+cell factor_vm::from_signed_1(s8 n)
 {
-	dpush(tag_fixnum(n));
+	return tag_fixnum(n);
 }
 
-VM_C_API void box_signed_1(s8 n, factor_vm *parent)
+VM_C_API cell from_signed_1(s8 n, factor_vm *parent)
 {
-	return parent->box_signed_1(n);
+	return parent->from_signed_1(n);
 }
 
-void factor_vm::box_unsigned_1(u8 n)
+cell factor_vm::from_unsigned_1(u8 n)
 {
-	dpush(tag_fixnum(n));
+	return tag_fixnum(n);
 }
 
-VM_C_API void box_unsigned_1(u8 n, factor_vm *parent)
+VM_C_API cell from_unsigned_1(u8 n, factor_vm *parent)
 {
-	return parent->box_unsigned_1(n);
+	return parent->from_unsigned_1(n);
 }
 
-void factor_vm::box_signed_2(s16 n)
+cell factor_vm::from_signed_2(s16 n)
 {
-	dpush(tag_fixnum(n));
+	return tag_fixnum(n);
 }
 
-VM_C_API void box_signed_2(s16 n, factor_vm *parent)
+VM_C_API cell from_signed_2(s16 n, factor_vm *parent)
 {
-	return parent->box_signed_2(n);
+	return parent->from_signed_2(n);
 }
 
-void factor_vm::box_unsigned_2(u16 n)
+cell factor_vm::from_unsigned_2(u16 n)
 {
-	dpush(tag_fixnum(n));
+	return tag_fixnum(n);
 }
 
-VM_C_API void box_unsigned_2(u16 n, factor_vm *parent)
+VM_C_API cell from_unsigned_2(u16 n, factor_vm *parent)
 {
-	return parent->box_unsigned_2(n);
+	return parent->from_unsigned_2(n);
 }
 
-void factor_vm::box_signed_4(s32 n)
+cell factor_vm::from_signed_4(s32 n)
 {
-	dpush(allot_integer(n));
+	return allot_integer(n);
 }
 
-VM_C_API void box_signed_4(s32 n, factor_vm *parent)
+VM_C_API cell from_signed_4(s32 n, factor_vm *parent)
 {
-	return parent->box_signed_4(n);
+	return parent->from_signed_4(n);
 }
 
-void factor_vm::box_unsigned_4(u32 n)
+cell factor_vm::from_unsigned_4(u32 n)
 {
-	dpush(allot_cell(n));
+	return allot_cell(n);
 }
 
-VM_C_API void box_unsigned_4(u32 n, factor_vm *parent)
+VM_C_API cell from_unsigned_4(u32 n, factor_vm *parent)
 {
-	return parent->box_unsigned_4(n);
+	return parent->from_unsigned_4(n);
 }
 
-void factor_vm::box_signed_cell(fixnum integer)
+cell factor_vm::from_signed_cell(fixnum integer)
 {
-	dpush(allot_integer(integer));
+	return allot_integer(integer);
 }
 
-VM_C_API void box_signed_cell(fixnum integer, factor_vm *parent)
+cell factor_vm::from_unsigned_cell(cell integer)
 {
-	return parent->box_signed_cell(integer);
+	return allot_cell(integer);
 }
 
-void factor_vm::box_unsigned_cell(cell cell)
+VM_C_API cell from_signed_cell(fixnum integer, factor_vm *parent)
 {
-	dpush(allot_cell(cell));
+	return parent->from_signed_cell(integer);
 }
 
-VM_C_API void box_unsigned_cell(cell cell, factor_vm *parent)
+VM_C_API cell from_unsigned_cell(cell integer, factor_vm *parent)
 {
-	return parent->box_unsigned_cell(cell);
+	return parent->from_unsigned_cell(integer);
 }
 
-void factor_vm::box_signed_8(s64 n)
+cell factor_vm::from_signed_8(s64 n)
 {
 	if(n < fixnum_min || n > fixnum_max)
-		dpush(tag<bignum>(long_long_to_bignum(n)));
+		return tag<bignum>(long_long_to_bignum(n));
 	else
-		dpush(tag_fixnum(n));
+		return tag_fixnum(n);
 }
 
-VM_C_API void box_signed_8(s64 n, factor_vm *parent)
+VM_C_API cell from_signed_8(s64 n, factor_vm *parent)
 {
-	return parent->box_signed_8(n);
+	return parent->from_signed_8(n);
 }
 
+/* Cannot allocate */
 s64 factor_vm::to_signed_8(cell obj)
 {
 	switch(tagged<object>(obj).type())
@@ -506,19 +508,20 @@ VM_C_API s64 to_signed_8(cell obj, factor_vm *parent)
 	return parent->to_signed_8(obj);
 }
 
-void factor_vm::box_unsigned_8(u64 n)
+cell factor_vm::from_unsigned_8(u64 n)
 {
 	if(n > (u64)fixnum_max)
-		dpush(tag<bignum>(ulong_long_to_bignum(n)));
+		return tag<bignum>(ulong_long_to_bignum(n));
 	else
-		dpush(tag_fixnum(n));
+		return tag_fixnum(n);
 }
 
-VM_C_API void box_unsigned_8(u64 n, factor_vm *parent)
+VM_C_API cell from_unsigned_8(u64 n, factor_vm *parent)
 {
-	return parent->box_unsigned_8(n);
+	return parent->from_unsigned_8(n);
 }
 
+/* Cannot allocate */
 u64 factor_vm::to_unsigned_8(cell obj)
 {
 	switch(tagged<object>(obj).type())
@@ -538,16 +541,12 @@ VM_C_API u64 to_unsigned_8(cell obj, factor_vm *parent)
 	return parent->to_unsigned_8(obj);
 }
  
-void factor_vm::box_float(float flo)
+VM_C_API cell from_float(float flo, factor_vm *parent)
 {
-        dpush(allot_float(flo));
-}
-
-VM_C_API void box_float(float flo, factor_vm *parent)
-{
-	return parent->box_float(flo);
+	return parent->allot_float(flo);
 }
 
+/* Cannot allocate */
 float factor_vm::to_float(cell value)
 {
 	return untag_float_check(value);
@@ -558,16 +557,12 @@ VM_C_API float to_float(cell value, factor_vm *parent)
 	return parent->to_float(value);
 }
 
-void factor_vm::box_double(double flo)
+VM_C_API cell from_double(double flo, factor_vm *parent)
 {
-        dpush(allot_float(flo));
-}
-
-VM_C_API void box_double(double flo, factor_vm *parent)
-{
-	return parent->box_double(flo);
+	return parent->allot_float(flo);
 }
 
+/* Cannot allocate */
 double factor_vm::to_double(cell value)
 {
 	return untag_float_check(value);
@@ -582,7 +577,7 @@ VM_C_API double to_double(cell value, factor_vm *parent)
 overflow, they call these functions. */
 inline void factor_vm::overflow_fixnum_add(fixnum x, fixnum y)
 {
-	drepl(tag<bignum>(fixnum_to_bignum(
+	ctx->replace(tag<bignum>(fixnum_to_bignum(
 		untag_fixnum(x) + untag_fixnum(y))));
 }
 
@@ -593,7 +588,7 @@ VM_ASM_API void overflow_fixnum_add(fixnum x, fixnum y, factor_vm *parent)
 
 inline void factor_vm::overflow_fixnum_subtract(fixnum x, fixnum y)
 {
-	drepl(tag<bignum>(fixnum_to_bignum(
+	ctx->replace(tag<bignum>(fixnum_to_bignum(
 		untag_fixnum(x) - untag_fixnum(y))));
 }
 
@@ -608,7 +603,7 @@ inline void factor_vm::overflow_fixnum_multiply(fixnum x, fixnum y)
 	GC_BIGNUM(bx);
 	bignum *by = fixnum_to_bignum(y);
 	GC_BIGNUM(by);
-	drepl(tag<bignum>(bignum_multiply(bx,by)));
+	ctx->replace(tag<bignum>(bignum_multiply(bx,by)));
 }
 
 VM_ASM_API void overflow_fixnum_multiply(fixnum x, fixnum y, factor_vm *parent)
diff --git a/vm/math.hpp b/vm/math.hpp
index 2fed585f98..9f677d254b 100644
--- a/vm/math.hpp
+++ b/vm/math.hpp
@@ -60,13 +60,13 @@ inline double factor_vm::fixnum_to_float(cell tagged)
 
 inline cell factor_vm::unbox_array_size()
 {
-	cell obj = dpeek();
+	cell obj = ctx->peek();
 	if(TAG(obj) == FIXNUM_TYPE)
 	{
 		fixnum n = untag_fixnum(obj);
 		if(n >= 0 && n < (fixnum)array_size_max)
 		{
-			dpop();
+			ctx->pop();
 			return n;
 		}
 	}
@@ -74,21 +74,21 @@ inline cell factor_vm::unbox_array_size()
 	return unbox_array_size_slow();
 }
 
-VM_C_API void box_float(float flo, factor_vm *vm);
+VM_C_API cell from_float(float flo, factor_vm *vm);
 VM_C_API float to_float(cell value, factor_vm *vm);
-VM_C_API void box_double(double flo, factor_vm *vm);
+VM_C_API cell from_double(double flo, factor_vm *vm);
 VM_C_API double to_double(cell value, factor_vm *vm);
 
-VM_C_API void box_signed_1(s8 n, factor_vm *vm);
-VM_C_API void box_unsigned_1(u8 n, factor_vm *vm);
-VM_C_API void box_signed_2(s16 n, factor_vm *vm);
-VM_C_API void box_unsigned_2(u16 n, factor_vm *vm);
-VM_C_API void box_signed_4(s32 n, factor_vm *vm);
-VM_C_API void box_unsigned_4(u32 n, factor_vm *vm);
-VM_C_API void box_signed_cell(fixnum integer, factor_vm *vm);
-VM_C_API void box_unsigned_cell(cell cell, factor_vm *vm);
-VM_C_API void box_signed_8(s64 n, factor_vm *vm);
-VM_C_API void box_unsigned_8(u64 n, factor_vm *vm);
+VM_C_API cell from_signed_1(s8 n, factor_vm *vm);
+VM_C_API cell from_unsigned_1(u8 n, factor_vm *vm);
+VM_C_API cell from_signed_2(s16 n, factor_vm *vm);
+VM_C_API cell from_unsigned_2(u16 n, factor_vm *vm);
+VM_C_API cell from_signed_4(s32 n, factor_vm *vm);
+VM_C_API cell from_unsigned_4(u32 n, factor_vm *vm);
+VM_C_API cell from_signed_cell(fixnum integer, factor_vm *vm);
+VM_C_API cell from_unsigned_cell(cell integer, factor_vm *vm);
+VM_C_API cell from_signed_8(s64 n, factor_vm *vm);
+VM_C_API cell from_unsigned_8(u64 n, factor_vm *vm);
 
 VM_C_API s64 to_signed_8(cell obj, factor_vm *vm);
 VM_C_API u64 to_unsigned_8(cell obj, factor_vm *vm);
@@ -96,8 +96,8 @@ VM_C_API u64 to_unsigned_8(cell obj, factor_vm *vm);
 VM_C_API fixnum to_fixnum(cell tagged, factor_vm *vm);
 VM_C_API cell to_cell(cell tagged, factor_vm *vm);
 
-VM_ASM_API void overflow_fixnum_add(fixnum x, fixnum y, factor_vm *vm);
-VM_ASM_API void overflow_fixnum_subtract(fixnum x, fixnum y, factor_vm *vm);
-VM_ASM_API void overflow_fixnum_multiply(fixnum x, fixnum y, factor_vm *vm);
+VM_ASM_API void overflow_fixnum_add(fixnum x, fixnum y, factor_vm *parent);
+VM_ASM_API void overflow_fixnum_subtract(fixnum x, fixnum y, factor_vm *parent);
+VM_ASM_API void overflow_fixnum_multiply(fixnum x, fixnum y, factor_vm *parent);
 
 }
diff --git a/vm/objects.cpp b/vm/objects.cpp
index 15a74d0312..494aca3c5b 100644
--- a/vm/objects.cpp
+++ b/vm/objects.cpp
@@ -5,22 +5,22 @@ namespace factor
 
 void factor_vm::primitive_special_object()
 {
-	fixnum e = untag_fixnum(dpeek());
-	drepl(special_objects[e]);
+	fixnum e = untag_fixnum(ctx->peek());
+	ctx->replace(special_objects[e]);
 }
 
 void factor_vm::primitive_set_special_object()
 {
-	fixnum e = untag_fixnum(dpop());
-	cell value = dpop();
+	fixnum e = untag_fixnum(ctx->pop());
+	cell value = ctx->pop();
 	special_objects[e] = value;
 }
 
 void factor_vm::primitive_identity_hashcode()
 {
-	cell tagged = dpeek();
+	cell tagged = ctx->peek();
 	object *obj = untag<object>(tagged);
-	drepl(tag_fixnum(obj->hashcode()));
+	ctx->replace(tag_fixnum(obj->hashcode()));
 }
 
 void factor_vm::compute_identity_hashcode(object *obj)
@@ -32,15 +32,15 @@ void factor_vm::compute_identity_hashcode(object *obj)
 
 void factor_vm::primitive_compute_identity_hashcode()
 {
-	object *obj = untag<object>(dpop());
+	object *obj = untag<object>(ctx->pop());
 	compute_identity_hashcode(obj);
 }
 
 void factor_vm::primitive_set_slot()
 {
-	fixnum slot = untag_fixnum(dpop());
-	object *obj = untag<object>(dpop());
-	cell value = dpop();
+	fixnum slot = untag_fixnum(ctx->pop());
+	object *obj = untag<object>(ctx->pop());
+	cell value = ctx->pop();
 
 	cell *slot_ptr = &obj->slots()[slot];
 	*slot_ptr = value;
@@ -65,7 +65,7 @@ cell factor_vm::clone_object(cell obj_)
 
 void factor_vm::primitive_clone()
 {
-	drepl(clone_object(dpeek()));
+	ctx->replace(clone_object(ctx->peek()));
 }
 
 /* Size of the object pointed to by a tagged pointer */
@@ -79,7 +79,7 @@ cell factor_vm::object_size(cell tagged)
 
 void factor_vm::primitive_size()
 {
-	box_unsigned_cell(object_size(dpop()));
+	ctx->push(allot_cell(object_size(ctx->pop())));
 }
 
 struct slot_become_visitor {
@@ -114,8 +114,8 @@ struct object_become_visitor {
    to coalesce equal but distinct quotations and wrappers. */
 void factor_vm::primitive_become()
 {
-	array *new_objects = untag_check<array>(dpop());
-	array *old_objects = untag_check<array>(dpop());
+	array *new_objects = untag_check<array>(ctx->pop());
+	array *old_objects = untag_check<array>(ctx->pop());
 
 	cell capacity = array_capacity(new_objects);
 	if(capacity != array_capacity(old_objects))
diff --git a/vm/os-macosx.mm b/vm/os-macosx.mm
index 394198521f..101169be06 100644
--- a/vm/os-macosx.mm
+++ b/vm/os-macosx.mm
@@ -14,12 +14,12 @@ NS_DURING
 		c_to_factor(quot,this);
 		NS_VOIDRETURN;
 NS_HANDLER
-		dpush(allot_alien(false_object,(cell)localException));
+		ctx->push(allot_alien(false_object,(cell)localException));
 		quot = special_objects[OBJ_COCOA_EXCEPTION];
 		if(!tagged<object>(quot).type_p(QUOTATION_TYPE))
 		{
 			/* No Cocoa exception handler was registered, so
-			extra/cocoa/ is not loaded. So we pass the exception
+			basis/cocoa/ is not loaded. So we pass the exception
 			along. */
 			[localException raise];
 		}
diff --git a/vm/os-unix.cpp b/vm/os-unix.cpp
index f47dc1d363..8276d3ee5c 100644
--- a/vm/os-unix.cpp
+++ b/vm/os-unix.cpp
@@ -92,8 +92,8 @@ void factor_vm::ffi_dlclose(dll *dll)
 void factor_vm::primitive_existsp()
 {
 	struct stat sb;
-	char *path = (char *)(untag_check<byte_array>(dpop()) + 1);
-	box_boolean(stat(path,&sb) >= 0);
+	char *path = (char *)(untag_check<byte_array>(ctx->pop()) + 1);
+	ctx->push(tag_boolean(stat(path,&sb) >= 0));
 }
 
 segment::segment(cell size_, bool executable_p)
diff --git a/vm/os-windows.cpp b/vm/os-windows.cpp
index 78426b19ca..5ca666d828 100755
--- a/vm/os-windows.cpp
+++ b/vm/os-windows.cpp
@@ -92,8 +92,8 @@ const vm_char *factor_vm::vm_executable_path()
 
 void factor_vm::primitive_existsp()
 {
-	vm_char *path = untag_check<byte_array>(dpop())->data<vm_char>();
-	box_boolean(windows_stat(path));
+	vm_char *path = untag_check<byte_array>(ctx->pop())->data<vm_char>();
+	ctx->push(tag_boolean(windows_stat(path)));
 }
 
 segment::segment(cell size_, bool executable_p)
diff --git a/vm/primitives.hpp b/vm/primitives.hpp
index 8e4efd8d14..c5ad9d889e 100644
--- a/vm/primitives.hpp
+++ b/vm/primitives.hpp
@@ -1,29 +1,14 @@
 namespace factor
 {
 
-#if defined(FACTOR_X86)
-  extern "C" __attribute__ ((regparm (1))) typedef void (*primitive_type)(factor_vm *parent);
-  #define PRIMITIVE(name) extern "C" __attribute__ ((regparm (1)))  void primitive_##name(factor_vm *parent)
-  #define PRIMITIVE_FORWARD(name) extern "C" __attribute__ ((regparm (1)))  void primitive_##name(factor_vm *parent) \
-  { \
+extern "C" typedef void (*primitive_type)(factor_vm *parent);
+#define PRIMITIVE(name) extern "C" void primitive_##name(factor_vm *parent)
+#define PRIMITIVE_FORWARD(name) extern "C" void primitive_##name(factor_vm *parent) \
+{ \
 	parent->primitive_##name(); \
-  }
-#else
-  extern "C" typedef void (*primitive_type)(factor_vm *parent);
-  #define PRIMITIVE(name) extern "C" void primitive_##name(factor_vm *parent)
-  #define PRIMITIVE_FORWARD(name) extern "C" void primitive_##name(factor_vm *parent) \
-  { \
-	parent->primitive_##name(); \
-  }
-#endif
-extern const primitive_type primitives[];
+}
 
-/* These are defined in assembly */
-PRIMITIVE(fixnum_add);
-PRIMITIVE(fixnum_subtract);
-PRIMITIVE(fixnum_multiply);
-PRIMITIVE(inline_cache_miss);
-PRIMITIVE(inline_cache_miss_tail);
+extern const primitive_type primitives[];
 
 /* These are generated with macros in alien.c */
 PRIMITIVE(alien_signed_cell);
diff --git a/vm/profiler.cpp b/vm/profiler.cpp
index d4f2d757e0..c7acc8fe67 100755
--- a/vm/profiler.cpp
+++ b/vm/profiler.cpp
@@ -60,7 +60,7 @@ void factor_vm::set_profiling(bool profiling)
 
 void factor_vm::primitive_profiling()
 {
-	set_profiling(to_boolean(dpop()));
+	set_profiling(to_boolean(ctx->pop()));
 }
 
 }
diff --git a/vm/quotations.cpp b/vm/quotations.cpp
index d10efb0ad5..05411aab7a 100755
--- a/vm/quotations.cpp
+++ b/vm/quotations.cpp
@@ -297,25 +297,25 @@ void factor_vm::jit_compile_quot(cell quot_, bool relocating)
 
 void factor_vm::primitive_jit_compile()
 {
-	jit_compile_quot(dpop(),true);
+	jit_compile_quot(ctx->pop(),true);
 }
 
 /* push a new quotation on the stack */
 void factor_vm::primitive_array_to_quotation()
 {
 	quotation *quot = allot<quotation>(sizeof(quotation));
-	quot->array = dpeek();
+	quot->array = ctx->peek();
 	quot->cached_effect = false_object;
 	quot->cache_counter = false_object;
 	quot->xt = (void *)lazy_jit_compile;
 	quot->code = NULL;
-	drepl(tag<quotation>(quot));
+	ctx->replace(tag<quotation>(quot));
 }
 
 void factor_vm::primitive_quotation_xt()
 {
-	quotation *quot = untag_check<quotation>(dpeek());
-	drepl(allot_cell((cell)quot->xt));
+	quotation *quot = untag_check<quotation>(ctx->peek());
+	ctx->replace(allot_cell((cell)quot->xt));
 }
 
 /* Allocates memory */
@@ -332,24 +332,23 @@ fixnum factor_vm::quot_code_offset_to_scan(cell quot_, cell offset)
 	return compiler.get_position();
 }
 
-cell factor_vm::lazy_jit_compile_impl(cell quot_, stack_frame *stack)
+cell factor_vm::lazy_jit_compile_impl(cell quot_)
 {
 	data_root<quotation> quot(quot_,this);
-	ctx->callstack_top = stack;
 	jit_compile_quot(quot.value(),true);
 	return quot.value();
 }
 
-VM_ASM_API cell lazy_jit_compile_impl(cell quot_, stack_frame *stack, factor_vm *parent)
+VM_C_API cell lazy_jit_compile_impl(cell quot, factor_vm *parent)
 {
-	return parent->lazy_jit_compile_impl(quot_,stack);
+	return parent->lazy_jit_compile_impl(quot);
 }
 
 void factor_vm::primitive_quot_compiled_p()
 {
-	tagged<quotation> quot(dpop());
+	tagged<quotation> quot(ctx->pop());
 	quot.untag_check(this);
-	dpush(tag_boolean(quot->code != NULL));
+	ctx->push(tag_boolean(quot->code != NULL));
 }
 
 }
diff --git a/vm/quotations.hpp b/vm/quotations.hpp
index 50e7667b4e..cd867dc22f 100755
--- a/vm/quotations.hpp
+++ b/vm/quotations.hpp
@@ -27,6 +27,6 @@ struct quotation_jit : public jit {
 	void iterate_quotation();
 };
 
-VM_ASM_API cell lazy_jit_compile_impl(cell quot, stack_frame *stack, factor_vm *parent);
+VM_C_API cell lazy_jit_compile_impl(cell quot, factor_vm *parent);
 
 }
diff --git a/vm/run.cpp b/vm/run.cpp
index 583d4f658b..dfff8f2f2d 100755
--- a/vm/run.cpp
+++ b/vm/run.cpp
@@ -5,12 +5,12 @@ namespace factor
 
 void factor_vm::primitive_exit()
 {
-	exit(to_fixnum(dpop()));
+	exit(to_fixnum(ctx->pop()));
 }
 
 void factor_vm::primitive_system_micros()
 {
-	box_unsigned_8(system_micros());
+	ctx->push(from_unsigned_8(system_micros()));
 }
 
 void factor_vm::primitive_nano_count()
@@ -18,12 +18,12 @@ void factor_vm::primitive_nano_count()
 	u64 nanos = nano_count();
 	if(nanos < last_nano_count) critical_error("Monotonic counter decreased",0);
 	last_nano_count = nanos;
-	box_unsigned_8(nanos);
+	ctx->push(from_unsigned_8(nanos));
 }
 
 void factor_vm::primitive_sleep()
 {
-	sleep_nanos(to_unsigned_8(dpop()));
+	sleep_nanos(to_unsigned_8(ctx->pop()));
 }
 
 }
diff --git a/vm/strings.cpp b/vm/strings.cpp
index d9a32517a4..c7e0354cba 100644
--- a/vm/strings.cpp
+++ b/vm/strings.cpp
@@ -101,9 +101,9 @@ string *factor_vm::allot_string(cell capacity, cell fill)
 
 void factor_vm::primitive_string()
 {
-	cell initial = to_cell(dpop());
+	cell initial = to_cell(ctx->pop());
 	cell length = unbox_array_size();
-	dpush(tag<string>(allot_string(length,initial)));
+	ctx->push(tag<string>(allot_string(length,initial)));
 }
 
 bool factor_vm::reallot_string_in_place_p(string *str, cell capacity)
@@ -157,32 +157,32 @@ string* factor_vm::reallot_string(string *str_, cell capacity)
 
 void factor_vm::primitive_resize_string()
 {
-	data_root<string> str(dpop(),this);
+	data_root<string> str(ctx->pop(),this);
 	str.untag_check(this);
 	cell capacity = unbox_array_size();
-	dpush(tag<string>(reallot_string(str.untagged(),capacity)));
+	ctx->push(tag<string>(reallot_string(str.untagged(),capacity)));
 }
 
 void factor_vm::primitive_string_nth()
 {
-	string *str = untag<string>(dpop());
-	cell index = untag_fixnum(dpop());
-	dpush(tag_fixnum(str->nth(index)));
+	string *str = untag<string>(ctx->pop());
+	cell index = untag_fixnum(ctx->pop());
+	ctx->push(tag_fixnum(str->nth(index)));
 }
 
 void factor_vm::primitive_set_string_nth_fast()
 {
-	string *str = untag<string>(dpop());
-	cell index = untag_fixnum(dpop());
-	cell value = untag_fixnum(dpop());
+	string *str = untag<string>(ctx->pop());
+	cell index = untag_fixnum(ctx->pop());
+	cell value = untag_fixnum(ctx->pop());
 	set_string_nth_fast(str,index,value);
 }
 
 void factor_vm::primitive_set_string_nth_slow()
 {
-	string *str = untag<string>(dpop());
-	cell index = untag_fixnum(dpop());
-	cell value = untag_fixnum(dpop());
+	string *str = untag<string>(ctx->pop());
+	cell index = untag_fixnum(ctx->pop());
+	cell value = untag_fixnum(ctx->pop());
 	set_string_nth_slow(str,index,value);
 }
 
diff --git a/vm/tuples.cpp b/vm/tuples.cpp
index eaac437753..e09bf17e11 100755
--- a/vm/tuples.cpp
+++ b/vm/tuples.cpp
@@ -6,27 +6,27 @@ namespace factor
 /* push a new tuple on the stack, filling its slots with f */
 void factor_vm::primitive_tuple()
 {
-	data_root<tuple_layout> layout(dpop(),this);
+	data_root<tuple_layout> layout(ctx->pop(),this);
 	tagged<tuple> t(allot<tuple>(tuple_size(layout.untagged())));
 	t->layout = layout.value();
 
 	memset_cell(t->data(),false_object,tuple_size(layout.untagged()) - sizeof(cell));
 
-	dpush(t.value());
+	ctx->push(t.value());
 }
 
 /* push a new tuple on the stack, filling its slots from the stack */
 void factor_vm::primitive_tuple_boa()
 {
-	data_root<tuple_layout> layout(dpop(),this);
+	data_root<tuple_layout> layout(ctx->pop(),this);
 	tagged<tuple> t(allot<tuple>(tuple_size(layout.untagged())));
 	t->layout = layout.value();
 
 	cell size = untag_fixnum(layout.untagged()->size) * sizeof(cell);
-	memcpy(t->data(),(cell *)(ds - size + sizeof(cell)),size);
-	ds -= size;
+	memcpy(t->data(),(cell *)(ctx->datastack - size + sizeof(cell)),size);
+	ctx->datastack -= size;
 
-	dpush(t.value());
+	ctx->push(t.value());
 }
 
 }
diff --git a/vm/vm.hpp b/vm/vm.hpp
index c32070482b..c147ae59b5 100755
--- a/vm/vm.hpp
+++ b/vm/vm.hpp
@@ -92,10 +92,6 @@ struct factor_vm
 	u64 last_nano_count;
 
 	// contexts
-	void reset_datastack();
-	void reset_retainstack();
-	void fix_stacks();
-	void save_stacks();
 	context *alloc_context();
 	void dealloc_context(context *old_context);
 	void nest_stacks(stack_frame *magic_frame);
@@ -375,9 +371,7 @@ struct factor_vm
 	void primitive_set_string_nth_slow();
 
 	//booleans
-	void box_boolean(bool value);
-
-	inline cell tag_boolean(cell untagged)
+	cell tag_boolean(cell untagged)
 	{
 		return (untagged ? true_object : false_object);
 	}
@@ -462,21 +456,19 @@ struct factor_vm
 	void primitive_bits_double();
 	fixnum to_fixnum(cell tagged);
 	cell to_cell(cell tagged);
-	void box_signed_1(s8 n);
-	void box_unsigned_1(u8 n);
-	void box_signed_2(s16 n);
-	void box_unsigned_2(u16 n);
-	void box_signed_4(s32 n);
-	void box_unsigned_4(u32 n);
-	void box_signed_cell(fixnum integer);
-	void box_unsigned_cell(cell cell);
-	void box_signed_8(s64 n);
+	cell from_signed_1(s8 n);
+	cell from_unsigned_1(u8 n);
+	cell from_signed_2(s16 n);
+	cell from_unsigned_2(u16 n);
+	cell from_signed_4(s32 n);
+	cell from_unsigned_4(u32 n);
+	cell from_signed_cell(fixnum integer);
+	cell from_unsigned_cell(cell integer);
+	cell from_signed_8(s64 n);
 	s64 to_signed_8(cell obj);
-	void box_unsigned_8(u64 n);
+	cell from_unsigned_8(u64 n);
 	u64 to_unsigned_8(cell obj);
-	void box_float(float flo);
 	float to_float(cell value);
-	void box_double(double flo);
 	double to_double(cell value);
 	inline void overflow_fixnum_add(fixnum x, fixnum y);
 	inline void overflow_fixnum_subtract(fixnum x, fixnum y);
@@ -498,6 +490,7 @@ struct factor_vm
 	void init_c_io();
 	void io_error();
 	void primitive_fopen();
+	FILE *pop_file_handle();
 	void primitive_fgetc();
 	void primitive_fread();
 	void primitive_fputc();
@@ -582,12 +575,12 @@ struct factor_vm
 	void primitive_innermost_stack_frame_executing();
 	void primitive_innermost_stack_frame_scan();
 	void primitive_set_innermost_stack_frame_quot();
-	void save_callstack_bottom(stack_frame *callstack_bottom);
 	template<typename Iterator> void iterate_callstack(context *ctx, Iterator &iterator);
 
 	//alien
 	char *pinned_alien_offset(cell obj);
 	cell allot_alien(cell delegate_, cell displacement);
+	cell allot_alien(void *address);
 	void primitive_displaced_alien();
 	void primitive_alien_address();
 	void *alien_pointer();
@@ -597,8 +590,6 @@ struct factor_vm
 	void primitive_dll_validp();
 	void primitive_vm_ptr();
 	char *alien_offset(cell obj);
-	char *unbox_alien();
-	void box_alien(void *ptr);
 	void to_value_struct(cell src, void *dest, cell size);
 	void box_value_struct(void *src, cell size);
 	void box_small_struct(cell x, cell y, cell size);
@@ -612,7 +603,7 @@ struct factor_vm
 	code_block *jit_compile_quot(cell owner_, cell quot_, bool relocating);
 	void jit_compile_quot(cell quot_, bool relocating);
 	fixnum quot_code_offset_to_scan(cell quot_, cell offset);
-	cell lazy_jit_compile_impl(cell quot_, stack_frame *stack);
+	cell lazy_jit_compile_impl(cell quot);
 	void primitive_quot_compiled_p();
 
 	//dispatch
diff --git a/vm/words.cpp b/vm/words.cpp
index 347076a529..ec5db15095 100644
--- a/vm/words.cpp
+++ b/vm/words.cpp
@@ -73,27 +73,27 @@ word *factor_vm::allot_word(cell name_, cell vocab_, cell hashcode_)
 /* (word) ( name vocabulary hashcode -- word ) */
 void factor_vm::primitive_word()
 {
-	cell hashcode = dpop();
-	cell vocab = dpop();
-	cell name = dpop();
-	dpush(tag<word>(allot_word(name,vocab,hashcode)));
+	cell hashcode = ctx->pop();
+	cell vocab = ctx->pop();
+	cell name = ctx->pop();
+	ctx->push(tag<word>(allot_word(name,vocab,hashcode)));
 }
 
 /* word-xt ( word -- start end ) */
 void factor_vm::primitive_word_xt()
 {
-	data_root<word> w(dpop(),this);
+	data_root<word> w(ctx->pop(),this);
 	w.untag_check(this);
 
 	if(profiling_p)
 	{
-		dpush(allot_cell((cell)w->profiling->xt()));
-		dpush(allot_cell((cell)w->profiling + w->profiling->size()));
+		ctx->push(allot_cell((cell)w->profiling->xt()));
+		ctx->push(allot_cell((cell)w->profiling + w->profiling->size()));
 	}
 	else
 	{
-		dpush(allot_cell((cell)w->code->xt()));
-		dpush(allot_cell((cell)w->code + w->code->size()));
+		ctx->push(allot_cell((cell)w->code->xt()));
+		ctx->push(allot_cell((cell)w->code + w->code->size()));
 	}
 }
 
@@ -107,15 +107,15 @@ void factor_vm::update_word_xt(word *w)
 
 void factor_vm::primitive_optimized_p()
 {
-	word *w = untag_check<word>(dpeek());
-	drepl(tag_boolean(w->code->optimized_p()));
+	word *w = untag_check<word>(ctx->peek());
+	ctx->replace(tag_boolean(w->code->optimized_p()));
 }
 
 void factor_vm::primitive_wrapper()
 {
 	wrapper *new_wrapper = allot<wrapper>(sizeof(wrapper));
-	new_wrapper->object = dpeek();
-	drepl(tag<wrapper>(new_wrapper));
+	new_wrapper->object = ctx->peek();
+	ctx->replace(tag<wrapper>(new_wrapper));
 }
 
 }

From e96404327e44965dc65e5cf6774878666a6f2e23 Mon Sep 17 00:00:00 2001
From: Slava Pestov <slava@slava-pestovs-macbook-pro.local>
Date: Tue, 22 Dec 2009 15:42:49 +1300
Subject: [PATCH 2/8] Get optimizing compiler working without global register
 variables in VM

---
 basis/alien/c-types/c-types.factor            | 30 ++++----
 .../cfg/instructions/instructions.factor      |  3 +-
 .../save-contexts/save-contexts-tests.factor  |  2 +-
 .../cfg/save-contexts/save-contexts.factor    |  9 ---
 basis/compiler/codegen/codegen.factor         |  8 +--
 basis/compiler/tests/alien.factor             |  2 +
 basis/cpu/architecture/architecture.factor    | 27 +++++++-
 basis/cpu/ppc/ppc.factor                      |  8 +--
 basis/cpu/x86/32/32.factor                    | 26 ++++---
 basis/cpu/x86/64/64.factor                    |  8 +--
 basis/cpu/x86/x86.factor                      | 68 +++++++------------
 basis/io/backend/unix/macosx/macosx.factor    |  2 +-
 basis/vm/vm.factor                            | 21 ++++--
 build-support/factor.sh                       | 15 ----
 vm/Config.openbsd                             |  3 +-
 vm/Config.x86.32                              |  3 -
 vm/aging_collector.cpp                        |  3 +-
 vm/alien.cpp                                  | 24 +++----
 vm/alien.hpp                                  |  6 +-
 vm/code_blocks.hpp                            |  6 +-
 vm/contexts.cpp                               |  3 +
 vm/cpu-ppc.hpp                                | 21 +++---
 vm/cpu-x86.64.S                               | 13 +++-
 vm/cpu-x86.64.hpp                             |  5 +-
 vm/cpu-x86.S                                  | 44 ++++++++----
 vm/data_heap_checker.cpp                      | 20 +++---
 vm/free_list.hpp                              |  9 ++-
 vm/vm.hpp                                     |  6 +-
 28 files changed, 213 insertions(+), 182 deletions(-)

diff --git a/basis/alien/c-types/c-types.factor b/basis/alien/c-types/c-types.factor
index 85a05f3a56..347d157a79 100644
--- a/basis/alien/c-types/c-types.factor
+++ b/basis/alien/c-types/c-types.factor
@@ -344,7 +344,7 @@ SYMBOLS:
         bootstrap-cell >>align
         bootstrap-cell >>align-first
         [ >c-ptr ] >>unboxer-quot
-        "box_alien" >>boxer
+        "allot_alien" >>boxer
         "alien_offset" >>unboxer
     \ void* define-primitive-type
 
@@ -355,7 +355,7 @@ SYMBOLS:
         [ set-alien-signed-8 ] >>setter
         8 >>size
         8-byte-alignment
-        "box_signed_8" >>boxer
+        "from_signed_8" >>boxer
         "to_signed_8" >>unboxer
     \ longlong define-primitive-type
 
@@ -366,7 +366,7 @@ SYMBOLS:
         [ set-alien-unsigned-8 ] >>setter
         8 >>size
         8-byte-alignment
-        "box_unsigned_8" >>boxer
+        "from_unsigned_8" >>boxer
         "to_unsigned_8" >>unboxer
     \ ulonglong define-primitive-type
 
@@ -378,7 +378,7 @@ SYMBOLS:
         bootstrap-cell >>size
         bootstrap-cell >>align
         bootstrap-cell >>align-first
-        "box_signed_cell" >>boxer
+        "from_signed_cell" >>boxer
         "to_fixnum" >>unboxer
     \ long define-primitive-type
 
@@ -390,7 +390,7 @@ SYMBOLS:
         bootstrap-cell >>size
         bootstrap-cell >>align
         bootstrap-cell >>align-first
-        "box_unsigned_cell" >>boxer
+        "from_unsigned_cell" >>boxer
         "to_cell" >>unboxer
     \ ulong define-primitive-type
 
@@ -402,7 +402,7 @@ SYMBOLS:
         4 >>size
         4 >>align
         4 >>align-first
-        "box_signed_4" >>boxer
+        "from_signed_4" >>boxer
         "to_fixnum" >>unboxer
     \ int define-primitive-type
 
@@ -414,7 +414,7 @@ SYMBOLS:
         4 >>size
         4 >>align
         4 >>align-first
-        "box_unsigned_4" >>boxer
+        "from_unsigned_4" >>boxer
         "to_cell" >>unboxer
     \ uint define-primitive-type
 
@@ -426,7 +426,7 @@ SYMBOLS:
         2 >>size
         2 >>align
         2 >>align-first
-        "box_signed_2" >>boxer
+        "from_signed_2" >>boxer
         "to_fixnum" >>unboxer
     \ short define-primitive-type
 
@@ -438,7 +438,7 @@ SYMBOLS:
         2 >>size
         2 >>align
         2 >>align-first
-        "box_unsigned_2" >>boxer
+        "from_unsigned_2" >>boxer
         "to_cell" >>unboxer
     \ ushort define-primitive-type
 
@@ -450,7 +450,7 @@ SYMBOLS:
         1 >>size
         1 >>align
         1 >>align-first
-        "box_signed_1" >>boxer
+        "from_signed_1" >>boxer
         "to_fixnum" >>unboxer
     \ char define-primitive-type
 
@@ -462,7 +462,7 @@ SYMBOLS:
         1 >>size
         1 >>align
         1 >>align-first
-        "box_unsigned_1" >>boxer
+        "from_unsigned_1" >>boxer
         "to_cell" >>unboxer
     \ uchar define-primitive-type
 
@@ -473,7 +473,7 @@ SYMBOLS:
             4 >>size
             4 >>align
             4 >>align-first
-            "box_boolean" >>boxer
+            "from_boolean" >>boxer
             "to_boolean" >>unboxer
     ] [
         <c-type>
@@ -482,7 +482,7 @@ SYMBOLS:
             1 >>size
             1 >>align
             1 >>align-first
-            "box_boolean" >>boxer
+            "from_boolean" >>boxer
             "to_boolean" >>unboxer
     ] if
     \ bool define-primitive-type
@@ -495,7 +495,7 @@ SYMBOLS:
         4 >>size
         4 >>align
         4 >>align-first
-        "box_float" >>boxer
+        "from_float" >>boxer
         "to_float" >>unboxer
         float-rep >>rep
         [ >float ] >>unboxer-quot
@@ -508,7 +508,7 @@ SYMBOLS:
         [ [ >float ] 2dip set-alien-double ] >>setter
         8 >>size
         8-byte-alignment
-        "box_double" >>boxer
+        "from_double" >>boxer
         "to_double" >>unboxer
         double-rep >>rep
         [ >float ] >>unboxer-quot
diff --git a/basis/compiler/cfg/instructions/instructions.factor b/basis/compiler/cfg/instructions/instructions.factor
index 93b960c576..662c12ed33 100644
--- a/basis/compiler/cfg/instructions/instructions.factor
+++ b/basis/compiler/cfg/instructions/instructions.factor
@@ -748,8 +748,7 @@ temp: temp1/int-rep temp2/int-rep
 literal: size data-values tagged-values uninitialized-locs ;
 
 INSN: ##save-context
-temp: temp1/int-rep temp2/int-rep
-literal: callback-allowed? ;
+temp: temp1/int-rep temp2/int-rep ;
 
 ! Instructions used by machine IR only.
 INSN: _prologue
diff --git a/basis/compiler/cfg/save-contexts/save-contexts-tests.factor b/basis/compiler/cfg/save-contexts/save-contexts-tests.factor
index 23646cfcd7..020d000b6a 100644
--- a/basis/compiler/cfg/save-contexts/save-contexts-tests.factor
+++ b/basis/compiler/cfg/save-contexts/save-contexts-tests.factor
@@ -15,7 +15,7 @@ V{
 
 [
     V{
-        T{ ##save-context f 1 2 f }
+        T{ ##save-context f 1 2 }
         T{ ##unary-float-function f 2 3 "sqrt" }
         T{ ##branch }
     }
diff --git a/basis/compiler/cfg/save-contexts/save-contexts.factor b/basis/compiler/cfg/save-contexts/save-contexts.factor
index fd92ace150..4296fb54f9 100644
--- a/basis/compiler/cfg/save-contexts/save-contexts.factor
+++ b/basis/compiler/cfg/save-contexts/save-contexts.factor
@@ -17,19 +17,10 @@ IN: compiler.cfg.save-contexts
         } 1||
     ] any? ;
 
-: needs-callback-context? ( insns -- ? )
-    [
-        {
-            [ ##alien-invoke? ]
-            [ ##alien-indirect? ]
-        } 1||
-    ] any? ;
-
 : insert-save-context ( bb -- )
     dup instructions>> dup needs-save-context? [
         int-rep next-vreg-rep
         int-rep next-vreg-rep
-        pick needs-callback-context?
         \ ##save-context new-insn prefix
         >>instructions drop
     ] [ 2drop ] if ;
diff --git a/basis/compiler/codegen/codegen.factor b/basis/compiler/codegen/codegen.factor
index b1b39c0b07..430b48e1ef 100644
--- a/basis/compiler/codegen/codegen.factor
+++ b/basis/compiler/codegen/codegen.factor
@@ -283,7 +283,7 @@ M: ##gc generate-insn
         [ [ uninitialized-locs>> ] [ temp1>> ] bi wipe-locs ]
         [ data-values>> save-data-regs ]
         [ [ tagged-values>> ] [ temp1>> ] bi save-gc-roots ]
-        [ [ temp1>> ] [ temp2>> ] bi t %save-context ]
+        [ [ temp1>> ] [ temp2>> ] bi %save-context ]
         [ [ tagged-values>> length ] [ temp1>> ] bi %call-gc ]
         [ [ tagged-values>> ] [ temp1>> ] bi load-gc-roots ]
         [ data-values>> load-data-regs ]
@@ -384,7 +384,7 @@ M: c-type-name flatten-value-type c-type flatten-value-type ;
 
 : unbox-parameters ( offset node -- )
     parameters>> swap
-    '[ prepare-unbox-parameters [ %prepare-unbox [ _ + ] dip unbox-parameter ] 3each ]
+    '[ prepare-unbox-parameters [ %pop-stack [ _ + ] dip unbox-parameter ] 3each ]
     [ length neg %inc-d ]
     bi ;
 
@@ -407,7 +407,7 @@ M: c-type-name flatten-value-type c-type flatten-value-type ;
     ] with-param-regs ;
 
 : box-return* ( node -- )
-    return>> [ ] [ box-return ] if-void ;
+    return>> [ ] [ box-return %push-stack ] if-void ;
 
 : check-dlsym ( symbols dll -- )
     dup dll-valid? [
@@ -452,7 +452,7 @@ M: ##alien-indirect generate-insn
 
 ! ##alien-callback
 : box-parameters ( params -- )
-    alien-parameters [ box-parameter ] each-parameter ;
+    alien-parameters [ box-parameter %push-context-stack ] each-parameter ;
 
 : registers>objects ( node -- )
     ! Generate code for boxing input parameters in a callback.
diff --git a/basis/compiler/tests/alien.factor b/basis/compiler/tests/alien.factor
index a2ce533afd..e6abab1267 100644
--- a/basis/compiler/tests/alien.factor
+++ b/basis/compiler/tests/alien.factor
@@ -94,6 +94,8 @@ FUNCTION: TINY ffi_test_17 int x ;
 
 { 1 1 } [ indirect-test-1 ] must-infer-as
 
+[ B{ } indirect-test-1 ] [ { "kernel-error" 3 6 B{ } } = ] must-fail-with
+
 [ 3 ] [ &: ffi_test_1 indirect-test-1 ] unit-test
 
 : indirect-test-1' ( ptr -- )
diff --git a/basis/cpu/architecture/architecture.factor b/basis/cpu/architecture/architecture.factor
index 0a4db25d35..26878f7c1f 100644
--- a/basis/cpu/architecture/architecture.factor
+++ b/basis/cpu/architecture/architecture.factor
@@ -503,8 +503,27 @@ HOOK: dummy-int-params? cpu ( -- ? )
 ! If t, all int parameters are shadowed by dummy FP parameters
 HOOK: dummy-fp-params? cpu ( -- ? )
 
-HOOK: %prepare-unbox cpu ( n -- )
+! Load a value (from the data stack in the ds register).
+! The value is then passed as a parameter to a VM to_*() function
+HOOK: %pop-stack cpu ( n -- )
 
+! Store a value (to the data stack in the VM's current context)
+! The value is passed to a VM to_*() function -- used for
+! callback returns
+HOOK: %pop-context-stack cpu ( -- )
+
+! Store a value (to the data stack in the ds register).
+! The value was returned from a VM from_*() function
+HOOK: %push-stack cpu ( -- )
+
+! Store a value (to the data stack in the VM's current context)
+! The value is returned from a VM from_*() function -- used for
+! callback parameters
+HOOK: %push-context-stack cpu ( -- )
+
+! Call a function to convert a tagged pointer returned by
+! %pop-stack or %pop-context-stack into a value that can be
+! passed to a C function, or returned from a callback
 HOOK: %unbox cpu ( n rep func -- )
 
 HOOK: %unbox-long-long cpu ( n func -- )
@@ -513,6 +532,10 @@ HOOK: %unbox-small-struct cpu ( c-type -- )
 
 HOOK: %unbox-large-struct cpu ( n c-type -- )
 
+! Call a function to convert a value into a tagged pointer,
+! possibly allocating a bignum, float, or alien instance,
+! which is then pushed on the data stack by %push-stack or
+! %push-context-stack
 HOOK: %box cpu ( n rep func -- )
 
 HOOK: %box-long-long cpu ( n func -- )
@@ -527,7 +550,7 @@ HOOK: %save-param-reg cpu ( stack reg rep -- )
 
 HOOK: %load-param-reg cpu ( stack reg rep -- )
 
-HOOK: %save-context cpu ( temp1 temp2 callback-allowed? -- )
+HOOK: %save-context cpu ( temp1 temp2 -- )
 
 HOOK: %prepare-var-args cpu ( -- )
 
diff --git a/basis/cpu/ppc/ppc.factor b/basis/cpu/ppc/ppc.factor
index a7eb3bb4a5..90cd638793 100644
--- a/basis/cpu/ppc/ppc.factor
+++ b/basis/cpu/ppc/ppc.factor
@@ -590,7 +590,7 @@ M:: ppc %save-param-reg ( stack reg rep -- )
 M:: ppc %load-param-reg ( stack reg rep -- )
     reg stack local@ rep load-from-frame ;
 
-M: ppc %prepare-unbox ( n -- )
+M: ppc %pop-stack ( n -- )
     [ 3 ] dip <ds-loc> loc>operand LWZ ;
 
 M: ppc %unbox ( n rep func -- )
@@ -650,13 +650,13 @@ M: ppc %box-large-struct ( n c-type -- )
     [ [ 3 1 ] dip struct-return@ ADDI ] [ heap-size 4 LI ] bi*
     5 %load-vm-addr
     ! Call the function
-    "box_value_struct" f %alien-invoke ;
+    "from_value_struct" f %alien-invoke ;
 
 M:: ppc %save-context ( temp1 temp2 callback-allowed? -- )
     #! Save Factor stack pointers in case the C code calls a
     #! callback which does a GC, which must reliably trace
     #! all roots.
-    temp1 "stack_chain" %load-vm-field-addr
+    temp1 "ctx" %load-vm-field-addr
     temp1 temp1 0 LWZ
     1 temp1 0 STW
     callback-allowed? [
@@ -703,7 +703,7 @@ M: ppc %box-small-struct ( c-type -- )
     #! Box a <= 16-byte struct returned in r3:r4:r5:r6
     heap-size 7 LI
     8 %load-vm-addr
-    "box_medium_struct" f %alien-invoke ;
+    "from_medium_struct" f %alien-invoke ;
 
 : %unbox-struct-1 ( -- )
     ! Alien must be in r3.
diff --git a/basis/cpu/x86/32/32.factor b/basis/cpu/x86/32/32.factor
index 8867ca6597..6996417edf 100644
--- a/basis/cpu/x86/32/32.factor
+++ b/basis/cpu/x86/32/32.factor
@@ -136,7 +136,7 @@ M:: x86.32 %box-large-struct ( n c-type -- )
     8 save-vm-ptr
     4 stack@ c-type heap-size MOV
     0 stack@ EDX MOV
-    "box_value_struct" f %alien-invoke ;
+    "from_value_struct" f %alien-invoke ;
 
 M: x86.32 %prepare-box-struct ( -- )
     ! Compute target address for value struct return
@@ -150,11 +150,17 @@ M: x86.32 %box-small-struct ( c-type -- )
     8 stack@ swap heap-size MOV
     4 stack@ EDX MOV
     0 stack@ EAX MOV
-    "box_small_struct" f %alien-invoke ;
+    "from_small_struct" f %alien-invoke ;
 
-M: x86.32 %prepare-unbox ( -- )
+M: x86.32 %pop-stack ( n -- )
     EAX swap ds-reg reg-stack MOV ;
 
+M: x86.32 %pop-context-stack ( -- )
+    temp-reg %load-context-datastack
+    EAX temp-reg [] MOV
+    EAX EAX [] MOV
+    temp-reg [] bootstrap-cell SUB ;
+
 : call-unbox-func ( func -- )
     4 save-vm-ptr
     0 stack@ EAX MOV
@@ -224,21 +230,23 @@ M: x86.32 %unnest-stacks ( -- )
     "unnest_stacks" f %alien-invoke ;
 
 M: x86.32 %prepare-alien-indirect ( -- )
-    0 save-vm-ptr
-    "unbox_alien" f %alien-invoke
+    EAX ds-reg [] MOV
+    ds-reg 4 SUB
+    4 save-vm-ptr
+    0 stack@ EAX MOV
+    "pinned_alien_offset" f %alien-invoke
     EBP EAX MOV ;
 
 M: x86.32 %alien-indirect ( -- )
     EBP CALL ;
 
 M: x86.32 %alien-callback ( quot -- )
-    ! Fastcall
-    param-reg-1 swap %load-reference
-    param-reg-2 %mov-vm-ptr
+    EAX swap %load-reference
+    EDX %mov-vm-ptr
     "c_to_factor" f %alien-invoke ;
 
 M: x86.32 %callback-value ( ctype -- )
-    0 %prepare-unbox
+    %pop-context-stack
     4 stack@ EAX MOV
     0 save-vm-ptr
     ! Restore data/call/retain stacks
diff --git a/basis/cpu/x86/64/64.factor b/basis/cpu/x86/64/64.factor
index cbc5c4d7e5..06a348b4e6 100644
--- a/basis/cpu/x86/64/64.factor
+++ b/basis/cpu/x86/64/64.factor
@@ -88,7 +88,7 @@ M: x86 %load-param-reg [ swap param@ ] dip %copy ;
         call
     ] with-scope ; inline
 
-M: x86.64 %prepare-unbox ( n -- )
+M: x86.64 %pop-stack ( n -- )
     param-reg-1 swap ds-reg reg-stack MOV ;
 
 M:: x86.64 %unbox ( n rep func -- )
@@ -167,7 +167,7 @@ M: x86.64 %box-small-struct ( c-type -- )
         param-reg-1 0 box-struct-field@ MOV
         param-reg-2 1 box-struct-field@ MOV
         param-reg-4 %mov-vm-ptr
-        "box_small_struct" f %alien-invoke
+        "from_small_struct" f %alien-invoke
     ] with-return-regs ;
 
 : struct-return@ ( n -- operand )
@@ -180,7 +180,7 @@ M: x86.64 %box-large-struct ( n c-type -- )
     param-reg-1 swap struct-return@ LEA
     param-reg-3 %mov-vm-ptr
     ! Copy the struct from the C stack
-    "box_value_struct" f %alien-invoke ;
+    "from_value_struct" f %alien-invoke ;
 
 M: x86.64 %prepare-box-struct ( -- )
     ! Compute target address for value struct return
@@ -219,7 +219,7 @@ M: x86.64 %alien-callback ( quot -- )
     "c_to_factor" f %alien-invoke ;
 
 M: x86.64 %callback-value ( ctype -- )
-    0 %prepare-unbox
+    0 %pop-stack
     RSP 8 SUB
     param-reg-1 PUSH
     param-reg-1 %mov-vm-ptr
diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor
index 36711f7122..465e2d4929 100644
--- a/basis/cpu/x86/x86.factor
+++ b/basis/cpu/x86/x86.factor
@@ -472,6 +472,23 @@ M: x86 %load-gc-root ( gc-root register -- ) swap gc-root@ MOV ;
 M: x86 %alien-global ( dst symbol library -- )
     [ 0 MOV ] 2dip rc-absolute-cell rel-dlsym ;    
 
+M: x86 %push-stack ( -- )
+    ds-reg cell ADD
+    ds-reg [] int-regs return-reg MOV ;
+
+:: %load-context-datastack ( dst -- )
+    ! Load context struct
+    dst "ctx" %vm-field-ptr
+    dst dst [] MOV
+    ! Load context datastack pointer
+    dst "datastack" context-field-offset ADD ;
+
+M: x86 %push-context-stack ( -- )
+    temp-reg %load-context-datastack
+    temp-reg [] bootstrap-cell ADD
+    temp-reg temp-reg [] MOV
+    temp-reg [] int-regs return-reg MOV ;
+
 M: x86 %epilogue ( n -- ) cell - incr-stack-reg ;
 
 :: %boolean ( dst temp word -- )
@@ -649,43 +666,6 @@ M: x86 %fill-vector-reps
         { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep longlong-2-rep ulonglong-2-rep } }
     } available-reps ;
 
-! M:: x86 %broadcast-vector ( dst src rep -- )
-!     rep signed-rep {
-!         { float-4-rep [
-!             dst src float-4-rep %copy
-!             dst dst { 0 0 0 0 } SHUFPS
-!         ] }
-!         { double-2-rep [
-!             dst src MOVDDUP
-!         ] }
-!         { longlong-2-rep [
-!             dst src =
-!             [ dst dst PUNPCKLQDQ ]
-!             [ dst src { 0 1 0 1 } PSHUFD ]
-!             if
-!         ] }
-!         { int-4-rep [
-!             dst src { 0 0 0 0 } PSHUFD
-!         ] }
-!         { short-8-rep [
-!             dst src { 0 0 0 0 } PSHUFLW 
-!             dst dst PUNPCKLQDQ 
-!         ] }
-!         { char-16-rep [
-!             dst src char-16-rep %copy
-!             dst dst PUNPCKLBW
-!             dst dst { 0 0 0 0 } PSHUFLW
-!             dst dst PUNPCKLQDQ
-!         ] }
-!     } case ;
-! 
-! M: x86 %broadcast-vector-reps
-!     {
-!         ! Can't do this with sse1 since it will want to unbox
-!         ! a double-precision float and convert to single precision
-!         { sse2? { float-4-rep double-2-rep longlong-2-rep ulonglong-2-rep int-4-rep uint-4-rep short-8-rep ushort-8-rep char-16-rep uchar-16-rep } }
-!     } available-reps ;
-
 M:: x86 %gather-vector-4 ( dst src1 src2 src3 src4 rep -- )
     rep signed-rep {
         { float-4-rep [
@@ -883,6 +863,7 @@ M: x86 %float>integer-vector-reps
 
 : (%compare-float-vector) ( dst src rep double single -- )
     [ double-2-rep eq? ] 2dip if ; inline
+
 : %compare-float-vector ( dst src rep cc -- )
     {
         { cc<    [ [ CMPLTPD    ] [ CMPLTPS    ] (%compare-float-vector) ] }
@@ -903,6 +884,7 @@ M: x86 %float>integer-vector-reps
         { short-8-rep    [ int16 call ] }
         { char-16-rep    [ int8  call ] }
     } case ; inline
+
 : %compare-int-vector ( dst src rep cc -- )
     {
         { cc= [ [ PCMPEQQ ] [ PCMPEQD ] [ PCMPEQW ] [ PCMPEQB ] (%compare-int-vector) ] }
@@ -921,6 +903,7 @@ M: x86 %compare-vector ( dst src1 src2 rep cc -- )
         { sse2? { double-2-rep char-16-rep uchar-16-rep short-8-rep ushort-8-rep int-4-rep uint-4-rep } }
         { sse4.1? { longlong-2-rep ulonglong-2-rep } }
     } available-reps ;
+
 : %compare-vector-ord-reps ( -- reps )
     {
         { sse? { float-4-rep } }
@@ -1409,6 +1392,7 @@ M: x86 %integer>scalar drop MOVD ;
     } case ;
 
 M: x86.32 %scalar>integer ( dst src rep -- ) %scalar>integer-32 ;
+
 M: x86.64 %scalar>integer ( dst src rep -- )
     {
         { longlong-scalar-rep  [ MOVD ] }
@@ -1424,18 +1408,16 @@ M:: x86 %reload ( dst rep src -- ) dst src rep %copy ;
 
 M: x86 %loop-entry 16 code-alignment [ NOP ] times ;
 
-M:: x86 %save-context ( temp1 temp2 callback-allowed? -- )
+M:: x86 %save-context ( temp1 temp2 -- )
     #! Save Factor stack pointers in case the C code calls a
     #! callback which does a GC, which must reliably trace
     #! all roots.
-    temp1 "stack_chain" %vm-field-ptr
+    temp1 "ctx" %vm-field-ptr
     temp1 temp1 [] MOV
     temp2 stack-reg cell neg [+] LEA
     temp1 [] temp2 MOV
-    callback-allowed? [
-        temp1 2 cells [+] ds-reg MOV
-        temp1 3 cells [+] rs-reg MOV
-    ] when ;
+    temp1 2 cells [+] ds-reg MOV
+    temp1 3 cells [+] rs-reg MOV ;
 
 M: x86 value-struct? drop t ;
 
diff --git a/basis/io/backend/unix/macosx/macosx.factor b/basis/io/backend/unix/macosx/macosx.factor
index e669875448..0bc2b85b32 100644
--- a/basis/io/backend/unix/macosx/macosx.factor
+++ b/basis/io/backend/unix/macosx/macosx.factor
@@ -2,7 +2,7 @@
 ! See http://factorcode.org/license.txt for BSD license.
 USING: io.backend system namespaces io.backend.unix.bsd
 io.backend.unix.multiplexers io.backend.unix.multiplexers.run-loop ;
-IN: io.backend.macosx
+IN: io.backend.unix.macosx
 
 M: macosx init-io ( -- )
     <run-loop-mx> mx set-global ;
diff --git a/basis/vm/vm.factor b/basis/vm/vm.factor
index e3585952db..278296c4d0 100644
--- a/basis/vm/vm.factor
+++ b/basis/vm/vm.factor
@@ -4,7 +4,20 @@ USING: classes.struct alien.c-types alien.syntax ;
 IN: vm
 
 TYPEDEF: uintptr_t cell
-C-TYPE: context
+
+STRUCT: context
+{ callstack-top void* }
+{ callstack-bottom void* }
+{ datastack cell }
+{ callstack cell }
+{ magic-frame void* }
+{ datastack-region void* }
+{ retainstack-region void* }
+{ catchstack-save cell }
+{ current-callback-save cell }
+{ next context* } ;
+
+: context-field-offset ( field -- offset ) context offset-of ; inline
 
 STRUCT: zone
 { start cell }
@@ -13,10 +26,10 @@ STRUCT: zone
 { end cell } ;
 
 STRUCT: vm
-{ stack_chain context* }
+{ ctx context* }
 { nursery zone }
-{ cards_offset cell }
-{ decks_offset cell }
+{ cards-offset cell }
+{ decks-offset cell }
 { userenv cell[70] } ;
 
 : vm-field-offset ( field -- offset ) vm offset-of ; inline
diff --git a/build-support/factor.sh b/build-support/factor.sh
index 4943d3e5c0..d54f9d8a77 100755
--- a/build-support/factor.sh
+++ b/build-support/factor.sh
@@ -63,20 +63,6 @@ check_ret() {
     fi
 }
 
-check_gcc_version() {
-    $ECHO -n "Checking gcc version..."
-    GCC_VERSION=`$CC --version`
-    check_ret gcc
-    if [[ $GCC_VERSION == *3.3.* ]] ; then
-        $ECHO "You have a known buggy version of gcc (3.3)"
-        $ECHO "Install gcc 3.4 or higher and try again."
-        exit_script 3
-    elif [[ $GCC_VERSION == *4.3.* ]] ; then
-       MAKE_OPTS="$MAKE_OPTS SITE_CFLAGS=-fno-forward-propagate"
-    fi
-    $ECHO "ok."
-}
-
 set_downloader() {
     test_program_installed wget curl
     if [[ $? -ne 0 ]] ; then
@@ -124,7 +110,6 @@ check_installed_programs() {
     ensure_program_installed make gmake
     ensure_program_installed md5sum md5
     ensure_program_installed cut
-    check_gcc_version
 }
 
 check_library_exists() {
diff --git a/vm/Config.openbsd b/vm/Config.openbsd
index 74ea32ebb0..a172cbfaba 100644
--- a/vm/Config.openbsd
+++ b/vm/Config.openbsd
@@ -2,6 +2,5 @@ include vm/Config.unix
 PLAF_DLL_OBJS += vm/os-genunix.o vm/os-openbsd.o
 CC = egcc
 CPP = eg++
-# -fno-inline-functions works around a gcc 4.2.0 bug
-CFLAGS += -export-dynamic -fno-inline-functions
+CFLAGS += -export-dynamic
 LIBS = -L/usr/local/lib/ -lm $(X11_UI_LIBS) -lz -lssl -lcrypto -lpthread
diff --git a/vm/Config.x86.32 b/vm/Config.x86.32
index e060ef7019..b7f8bc65f0 100644
--- a/vm/Config.x86.32
+++ b/vm/Config.x86.32
@@ -1,5 +1,2 @@
 BOOT_ARCH = x86
 PLAF_DLL_OBJS += vm/cpu-x86.32.o
-
-# gcc bug workaround
-CFLAGS += -fno-builtin-strlen -fno-builtin-strcat
diff --git a/vm/aging_collector.cpp b/vm/aging_collector.cpp
index bef4dc6202..c832ca792f 100644
--- a/vm/aging_collector.cpp
+++ b/vm/aging_collector.cpp
@@ -49,8 +49,7 @@ void factor_vm::collect_aging()
 		collector.cheneys_algorithm();
 
 		data->reset_generation(&nursery);
-		code->points_to_nursery.clear();
-		code->points_to_aging.clear();
+		code->clear_remembered_set();
 	}
 }
 
diff --git a/vm/alien.cpp b/vm/alien.cpp
index 5fd6cad4af..84d31a69c0 100755
--- a/vm/alien.cpp
+++ b/vm/alien.cpp
@@ -211,46 +211,46 @@ VM_C_API void to_value_struct(cell src, void *dest, cell size, factor_vm *parent
 }
 
 /* For FFI callbacks receiving structs by value */
-void factor_vm::box_value_struct(void *src, cell size)
+cell factor_vm::from_value_struct(void *src, cell size)
 {
 	byte_array *bytes = allot_byte_array(size);
 	memcpy(bytes->data<void>(),src,size);
-	ctx->push(tag<byte_array>(bytes));
+	return tag<byte_array>(bytes);
 }
 
-VM_C_API void box_value_struct(void *src, cell size,factor_vm *parent)
+VM_C_API cell from_value_struct(void *src, cell size, factor_vm *parent)
 {
-	return parent->box_value_struct(src,size);
+	return parent->from_value_struct(src,size);
 }
 
 /* On some x86 OSes, structs <= 8 bytes are returned in registers. */
-void factor_vm::box_small_struct(cell x, cell y, cell size)
+cell factor_vm::from_small_struct(cell x, cell y, cell size)
 {
 	cell data[2];
 	data[0] = x;
 	data[1] = y;
-	box_value_struct(data,size);
+	return from_value_struct(data,size);
 }
 
-VM_C_API void box_small_struct(cell x, cell y, cell size, factor_vm *parent)
+VM_C_API cell from_small_struct(cell x, cell y, cell size, factor_vm *parent)
 {
-	return parent->box_small_struct(x,y,size);
+	return parent->from_small_struct(x,y,size);
 }
 
 /* On OS X/PPC, complex numbers are returned in registers. */
-void factor_vm::box_medium_struct(cell x1, cell x2, cell x3, cell x4, cell size)
+cell factor_vm::from_medium_struct(cell x1, cell x2, cell x3, cell x4, cell size)
 {
 	cell data[4];
 	data[0] = x1;
 	data[1] = x2;
 	data[2] = x3;
 	data[3] = x4;
-	box_value_struct(data,size);
+	return from_value_struct(data,size);
 }
 
-VM_C_API void box_medium_struct(cell x1, cell x2, cell x3, cell x4, cell size, factor_vm *parent)
+VM_C_API cell from_medium_struct(cell x1, cell x2, cell x3, cell x4, cell size, factor_vm *parent)
 {
-	return parent->box_medium_struct(x1, x2, x3, x4, size);
+	return parent->from_medium_struct(x1, x2, x3, x4, size);
 }
 
 void factor_vm::primitive_vm_ptr()
diff --git a/vm/alien.hpp b/vm/alien.hpp
index 906e204ebe..add6f4ba72 100755
--- a/vm/alien.hpp
+++ b/vm/alien.hpp
@@ -5,8 +5,8 @@ VM_C_API char *alien_offset(cell object, factor_vm *vm);
 VM_C_API char *pinned_alien_offset(cell object, factor_vm *vm);
 VM_C_API cell allot_alien(void *address, factor_vm *vm);
 VM_C_API void to_value_struct(cell src, void *dest, cell size, factor_vm *vm);
-VM_C_API void box_value_struct(void *src, cell size,factor_vm *vm);
-VM_C_API void box_small_struct(cell x, cell y, cell size,factor_vm *vm);
-VM_C_API void box_medium_struct(cell x1, cell x2, cell x3, cell x4, cell size,factor_vm *vm);
+VM_C_API cell from_value_struct(void *src, cell size, factor_vm *vm);
+VM_C_API cell from_small_struct(cell x, cell y, cell size, factor_vm *vm);
+VM_C_API cell from_medium_struct(cell x1, cell x2, cell x3, cell x4, cell size, factor_vm *vm);
 
 }
diff --git a/vm/code_blocks.hpp b/vm/code_blocks.hpp
index 075fe389b4..35abe22342 100644
--- a/vm/code_blocks.hpp
+++ b/vm/code_blocks.hpp
@@ -36,7 +36,11 @@ struct code_block
 
 	cell size() const
 	{
-		return header & ~7;
+		cell size = header & ~7;
+#ifdef FACTOR_DEBUG
+		assert(size > 0);
+#endif
+		return size;
 	}
 
 	void *xt() const
diff --git a/vm/contexts.cpp b/vm/contexts.cpp
index 4ea378a6f9..27cbb8e3aa 100644
--- a/vm/contexts.cpp
+++ b/vm/contexts.cpp
@@ -54,6 +54,9 @@ void factor_vm::nest_stacks(stack_frame *magic_frame)
 	new_ctx->current_callback_save = special_objects[OBJ_CURRENT_CALLBACK];
 	new_ctx->catchstack_save = special_objects[OBJ_CATCHSTACK];
 
+	new_ctx->reset_datastack();
+	new_ctx->reset_retainstack();
+
 	new_ctx->next = ctx;
 	ctx = new_ctx;
 }
diff --git a/vm/cpu-ppc.hpp b/vm/cpu-ppc.hpp
index 495eb375ec..e46db4d1f3 100644
--- a/vm/cpu-ppc.hpp
+++ b/vm/cpu-ppc.hpp
@@ -4,9 +4,6 @@ namespace factor
 #define FACTOR_CPU_STRING "ppc"
 #define VM_ASM_API VM_C_API
 
-register cell ds asm("r13");
-register cell rs asm("r14");
-
 /* In the instruction sequence:
 
    LOAD32 r3,...
@@ -81,14 +78,16 @@ inline static unsigned int fpu_status(unsigned int status)
 }
 
 /* Defined in assembly */
-VM_ASM_API void c_to_factor(cell quot, void *vm);
-VM_ASM_API void throw_impl(cell quot, stack_frame *rewind, void *vm);
-VM_ASM_API void lazy_jit_compile(cell quot, void *vm);
-VM_ASM_API void flush_icache(cell start, cell len);
+VM_C_API void c_to_factor(cell quot, void *vm);
+VM_C_API void throw_impl(cell quot, void *new_stack, void *vm);
+VM_C_API void lazy_jit_compile(cell quot, void *vm);
+VM_C_API void flush_icache(cell start, cell len);
 
-VM_ASM_API void set_callstack(stack_frame *to,
-			       stack_frame *from,
-			       cell length,
-			       void *(*memcpy)(void*,const void*, size_t));
+VM_C_API void set_callstack(
+	void *vm,
+	stack_frame *to,
+	stack_frame *from,
+	cell length,
+	void *(*memcpy)(void*,const void*, size_t));
 
 }
diff --git a/vm/cpu-x86.64.S b/vm/cpu-x86.64.S
index 1a943e6d1e..5df375d29f 100644
--- a/vm/cpu-x86.64.S
+++ b/vm/cpu-x86.64.S
@@ -2,6 +2,7 @@
 
 #define STACK_REG %rsp
 #define DS_REG %r14
+#define RS_REG %r15
 #define RETURN_REG %rax
 
 #define CELL_SIZE 8
@@ -18,6 +19,8 @@
 	#define ARG3 %r9
 
 	#define PUSH_NONVOLATILE \
+		push %r15 ; \
+		push %r14 ; \
 		push %r12 ; \
 		push %r13 ; \
 		push %rdi ; \
@@ -31,7 +34,9 @@
 		pop %rsi ; \
 		pop %rdi ; \
 		pop %r13 ; \
-		pop %r12
+		pop %r12 ; \
+		pop %r14 ; \
+		pop %r15
 
 #else
 
@@ -44,9 +49,13 @@
 		push %rbx ; \
 		push %rbp ; \
 		push %r12 ; \
-		push %r13
+		push %r13 ; \
+		push %r14 ; \
+		push %r15
 
 	#define POP_NONVOLATILE \
+		pop %r15 ; \
+		pop %r14 ; \
 		pop %r13 ; \
 		pop %r12 ; \
 		pop %rbp ; \
diff --git a/vm/cpu-x86.64.hpp b/vm/cpu-x86.64.hpp
index 75d432ee13..aa1a77842f 100644
--- a/vm/cpu-x86.64.hpp
+++ b/vm/cpu-x86.64.hpp
@@ -2,9 +2,6 @@ namespace factor
 {
 
 #define FACTOR_CPU_STRING "x86.64"
-
-register cell ds asm("r14");
-register cell rs asm("r15");
-
 #define VM_ASM_API VM_C_API
+
 }
diff --git a/vm/cpu-x86.S b/vm/cpu-x86.S
index c148f7e5eb..013aa481d9 100644
--- a/vm/cpu-x86.S
+++ b/vm/cpu-x86.S
@@ -3,27 +3,30 @@ DEF(F_FASTCALL void,c_to_factor,(CELL quot, void *vm)):
 	mov ARG0,NV0
 	mov ARG1,NV1
 
-    /* Save old stack pointer and align */
-    mov STACK_REG,ARG0
-    and $-16,STACK_REG
-    add $CELL_SIZE,STACK_REG
-    push ARG0
+	push ARG0
+	push ARG1
 
-	/* Create register shadow area for Win64 */
+	/* Save old stack pointer and align */
+	mov STACK_REG,ARG0
+	and $-16,STACK_REG
+	add $CELL_SIZE,STACK_REG
+	push ARG0
+
+	/* Create register shadow area (required for Win64 only) */
 	sub $32,STACK_REG
 
 	/* Load context */
-    mov (NV1),ARG0
+	mov (NV1),ARG0
 
-    /* Save ctx->callstack_bottom */
+	/* Save ctx->callstack_bottom */
 	lea -CELL_SIZE(STACK_REG),ARG1
-    mov ARG1,CELL_SIZE(ARG0)
+	mov ARG1,CELL_SIZE(ARG0)
 
-    /* Load ctx->datastack */
-    mov (CELL_SIZE * 2)(ARG0),DS_REG
+	/* Load ctx->datastack */
+	mov (CELL_SIZE * 2)(ARG0),DS_REG
 
-    /* Load ctx->retainstack */
-    mov (CELL_SIZE * 3)(ARG0),RS_REG
+	/* Load ctx->retainstack */
+	mov (CELL_SIZE * 3)(ARG0),RS_REG
 
 	/* Call quot-xt */
 	mov NV0,ARG0
@@ -33,8 +36,19 @@ DEF(F_FASTCALL void,c_to_factor,(CELL quot, void *vm)):
 	/* Tear down register shadow area */
 	add $32,STACK_REG
 
-    /* Undo stack alignment */
-    mov (STACK_REG),STACK_REG
+	/* Undo stack alignment */
+	mov (STACK_REG),STACK_REG
+
+	/* Load context */
+	pop ARG1
+	pop ARG0
+	mov (ARG1),ARG0
+
+	/* Save ctx->datastack */
+	mov DS_REG,(CELL_SIZE * 2)(ARG0)
+
+	/* Save ctx->retainstack */
+	mov RS_REG,(CELL_SIZE * 3)(ARG0)
 
 	POP_NONVOLATILE
 	ret
diff --git a/vm/data_heap_checker.cpp b/vm/data_heap_checker.cpp
index fb05508e3f..0d79abc15b 100644
--- a/vm/data_heap_checker.cpp
+++ b/vm/data_heap_checker.cpp
@@ -42,16 +42,16 @@ struct slot_checker {
 		char slot_card_value = *(char *)slot_card_pointer;
 		if((slot_card_value & mask) != mask)
 		{
-			printf("card not marked\n");
-			printf("source generation: %d\n",gen);
-			printf("target generation: %d\n",target);
-			printf("object: 0x%lx\n",(cell)obj);
-			printf("object type: %ld\n",obj->type());
-			printf("slot pointer: 0x%lx\n",(cell)slot_ptr);
-			printf("slot value: 0x%lx\n",*slot_ptr);
-			printf("card of object: 0x%lx\n",object_card_pointer);
-			printf("card of slot: 0x%lx\n",slot_card_pointer);
-			printf("\n");
+			std::cout << "card not marked" << std::endl;
+			std::cout << "source generation: " << gen << std::endl;
+			std::cout << "target generation: " << target << std::endl;
+			std::cout << "object: 0x" << std::hex << (cell)obj << std::dec << std::endl;
+			std::cout << "object type: " << obj->type() << std::endl;
+			std::cout << "slot pointer: 0x" << std::hex << (cell)slot_ptr << std::dec << std::endl;
+			std::cout << "slot value: 0x" << std::hex << *slot_ptr << std::dec << std::endl;
+			std::cout << "card of object: 0x" << std::hex << object_card_pointer << std::dec << std::endl;
+			std::cout << "card of slot: 0x" << std::hex << slot_card_pointer << std::dec << std::endl;
+			std::cout << std::endl;
 			parent->factorbug();
 		}
 	}
diff --git a/vm/free_list.hpp b/vm/free_list.hpp
index d934ec34ac..0a0a5c7614 100644
--- a/vm/free_list.hpp
+++ b/vm/free_list.hpp
@@ -15,11 +15,18 @@ struct free_heap_block
 
 	cell size() const
 	{
-		return header & ~7;
+		cell size = header & ~7;
+#ifdef FACTOR_DEBUG
+		assert(size > 0);
+#endif
+		return size;
 	}
 
 	void make_free(cell size)
 	{
+#ifdef FACTOR_DEBUG
+		assert(size > 0);
+#endif
 		header = size | 1;
 	}
 };
diff --git a/vm/vm.hpp b/vm/vm.hpp
index c147ae59b5..ffdb9f2591 100755
--- a/vm/vm.hpp
+++ b/vm/vm.hpp
@@ -591,9 +591,9 @@ struct factor_vm
 	void primitive_vm_ptr();
 	char *alien_offset(cell obj);
 	void to_value_struct(cell src, void *dest, cell size);
-	void box_value_struct(void *src, cell size);
-	void box_small_struct(cell x, cell y, cell size);
-	void box_medium_struct(cell x1, cell x2, cell x3, cell x4, cell size);
+	cell from_value_struct(void *src, cell size);
+	cell from_small_struct(cell x, cell y, cell size);
+	cell from_medium_struct(cell x1, cell x2, cell x3, cell x4, cell size);
 
 	//quotations
 	void primitive_jit_compile();

From 15eb8d1a0a9ca12ef7b1fbef703c8402edeff9f9 Mon Sep 17 00:00:00 2001
From: Slava Pestov <slava@slava-pestovs-macbook-pro.local>
Date: Thu, 24 Dec 2009 01:37:24 +1300
Subject: [PATCH 3/8] vm: remove VM_ASM_API

---
 basis/cpu/x86/32/32.factor        |   7 +-
 basis/cpu/x86/32/bootstrap.factor |  45 ++++++--
 basis/cpu/x86/bootstrap.factor    |  22 +---
 vm/code_block_visitor.hpp         |   2 +-
 vm/cpu-ppc.S                      |   4 +-
 vm/cpu-ppc.hpp                    |   3 +-
 vm/cpu-x86.32.S                   | 168 +++++++++++++++++++++---------
 vm/cpu-x86.32.hpp                 |   1 -
 vm/cpu-x86.64.S                   |  52 ++++++++-
 vm/cpu-x86.64.hpp                 |   1 -
 vm/cpu-x86.S                      |  55 ----------
 vm/cpu-x86.hpp                    |   4 +-
 vm/math.cpp                       |   6 +-
 vm/math.hpp                       |   6 +-
 vm/quotations.cpp                 |   8 +-
 vm/quotations.hpp                 |   2 +-
 vm/vm.hpp                         |   2 +-
 17 files changed, 227 insertions(+), 161 deletions(-)

diff --git a/basis/cpu/x86/32/32.factor b/basis/cpu/x86/32/32.factor
index 6996417edf..f904f7c3fb 100644
--- a/basis/cpu/x86/32/32.factor
+++ b/basis/cpu/x86/32/32.factor
@@ -53,10 +53,6 @@ M:: x86.32 %dispatch ( src temp -- )
     [ align-code ]
     bi ;
 
-! Registers for fastcall
-: param-reg-1 ( -- reg ) EAX ;
-: param-reg-2 ( -- reg ) EDX ;
-
 M: x86.32 pic-tail-reg EBX ;
 
 M: x86.32 reserved-stack-space 4 cells ;
@@ -242,7 +238,8 @@ M: x86.32 %alien-indirect ( -- )
 
 M: x86.32 %alien-callback ( quot -- )
     EAX swap %load-reference
-    EDX %mov-vm-ptr
+    0 stack@ EAX MOV
+    4 save-vm-ptr
     "c_to_factor" f %alien-invoke ;
 
 M: x86.32 %callback-value ( ctype -- )
diff --git a/basis/cpu/x86/32/bootstrap.factor b/basis/cpu/x86/32/bootstrap.factor
index ed5d1950bb..0b7a1fad5c 100644
--- a/basis/cpu/x86/32/bootstrap.factor
+++ b/basis/cpu/x86/32/bootstrap.factor
@@ -1,7 +1,7 @@
 ! Copyright (C) 2007, 2009 Slava Pestov.
 ! See http://factorcode.org/license.txt for BSD license.
-USING: bootstrap.image.private kernel namespaces system
-cpu.x86.assembler cpu.x86.assembler.operands layouts
+USING: bootstrap.image.private kernel kernel.private namespaces
+system cpu.x86.assembler cpu.x86.assembler.operands layouts
 vocabs parser compiler.constants sequences math math.private
 generic.single.private ;
 IN: bootstrap.x86
@@ -12,8 +12,6 @@ IN: bootstrap.x86
 : shift-arg ( -- reg ) ECX ;
 : div-arg ( -- reg ) EAX ;
 : mod-arg ( -- reg ) EDX ;
-: arg1 ( -- reg ) EAX ;
-: arg2 ( -- reg ) EDX ;
 : temp0 ( -- reg ) EAX ;
 : temp1 ( -- reg ) EDX ;
 : temp2 ( -- reg ) ECX ;
@@ -67,6 +65,33 @@ IN: bootstrap.x86
     jit-restore-context
 ] jit-primitive jit-define
 
+[
+    ! load from stack
+    EAX ds-reg [] MOV
+    ! pop stack
+    ds-reg bootstrap-cell SUB
+    ! load VM pointer
+    EDX 0 MOV 0 rc-absolute-cell jit-vm
+]
+[
+    
+    ! pass quotation
+    ESP [] EAX MOV
+    ! pass VM pointer
+    ESP 4 [+] EDX MOV
+    ! call XT
+    EAX quot-xt-offset [+] CALL
+]
+[
+    ! pass quotation
+    ESP 4 [+] EAX MOV
+    ! pass VM pointer
+    ESP 8 [+] EDX MOV
+    ! jump to XT
+    EAX quot-xt-offset [+] JMP
+]
+\ (call) define-sub-primitive*
+
 ! Inline cache miss entry points
 : jit-load-return-address ( -- )
     EBX ESP stack-frame-size bootstrap-cell - [+] MOV ;
@@ -103,7 +128,9 @@ IN: bootstrap.x86
     ds-reg [] ECX MOV
     [ JNO ]
     [
-        ECX EBP MOV
+        ESP [] EAX MOV
+        ESP 4 [+] EDX MOV
+        ESP 8 [+] EBP MOV
         [ 0 CALL ] dip f rc-relative jit-dlsym
     ]
     jit-conditional ;
@@ -124,10 +151,10 @@ IN: bootstrap.x86
     ds-reg [] EAX MOV
     [ JNO ]
     [
-        EAX ECX MOV
-        EAX tag-bits get SAR
-        EDX EBX MOV
-        ECX EBP MOV
+        ECX tag-bits get SAR
+        ESP [] ECX MOV
+        ESP 4 [+] EBX MOV
+        ESP 8 [+] EBP MOV
         0 CALL "overflow_fixnum_multiply" f rc-relative jit-dlsym
     ]
     jit-conditional
diff --git a/basis/cpu/x86/bootstrap.factor b/basis/cpu/x86/bootstrap.factor
index 6eeb3d64ef..d4e50694bf 100644
--- a/basis/cpu/x86/bootstrap.factor
+++ b/basis/cpu/x86/bootstrap.factor
@@ -120,30 +120,18 @@ big-endian off
 
 [
     ! load from stack
-    arg1 ds-reg [] MOV
-    ! pop stack
-    ds-reg bootstrap-cell SUB
-    ! pass vm pointer
-    arg2 0 MOV 0 rc-absolute-cell jit-vm
-]
-[ arg1 quot-xt-offset [+] CALL ]
-[ arg1 quot-xt-offset [+] JMP ]
-\ (call) define-sub-primitive*
-
-[
-    ! load from stack
-    arg1 ds-reg [] MOV
+    temp0 ds-reg [] MOV
     ! pop stack
     ds-reg bootstrap-cell SUB
 ]
-[ arg1 word-xt-offset [+] CALL ]
-[ arg1 word-xt-offset [+] JMP ]
+[ temp0 word-xt-offset [+] CALL ]
+[ temp0 word-xt-offset [+] JMP ]
 \ (execute) define-sub-primitive*
 
 [
-    arg1 ds-reg [] MOV
+    temp0 ds-reg [] MOV
     ds-reg bootstrap-cell SUB
-    arg1 word-xt-offset [+] JMP
+    temp0 word-xt-offset [+] JMP
 ] jit-execute jit-define
 
 [
diff --git a/vm/code_block_visitor.hpp b/vm/code_block_visitor.hpp
index 8819dc8134..0624adb268 100644
--- a/vm/code_block_visitor.hpp
+++ b/vm/code_block_visitor.hpp
@@ -73,7 +73,7 @@ void code_block_visitor<Visitor>::visit_object_code_block(object *obj)
 			if(q->code)
 				parent->set_quot_xt(q,visitor(q->code));
 			else
-				q->xt = (void *)lazy_jit_compile;
+				q->xt = (void *)lazy_jit_compile_impl;
 			break;
 		}
 	case CALLSTACK_TYPE:
diff --git a/vm/cpu-ppc.S b/vm/cpu-ppc.S
index 876b633a02..9517154e1a 100644
--- a/vm/cpu-ppc.S
+++ b/vm/cpu-ppc.S
@@ -225,11 +225,11 @@ DEF(void,throw_impl,(cell quot, F_STACK_FRAME *rewind_to, void *vm)):
 	mtlr r0
 	JUMP_QUOT	   /* call the quotation */
 
-DEF(void,lazy_jit_compile,(cell quot, void *vm)):
+DEF(void,lazy_jit_compile_impl,(cell quot, void *vm)):
 	mr r5,r4	   /* vm ptr is 3rd arg */
 	mr r4,r1	   /* save stack pointer */
 	PROLOGUE
-	bl MANGLE(lazy_jit_compile_impl)
+	bl MANGLE(lazy_jit_compile)
 	EPILOGUE
 	JUMP_QUOT	   /* call the quotation */
 
diff --git a/vm/cpu-ppc.hpp b/vm/cpu-ppc.hpp
index e46db4d1f3..f0f6f80ae3 100644
--- a/vm/cpu-ppc.hpp
+++ b/vm/cpu-ppc.hpp
@@ -2,7 +2,6 @@ namespace factor
 {
 
 #define FACTOR_CPU_STRING "ppc"
-#define VM_ASM_API VM_C_API
 
 /* In the instruction sequence:
 
@@ -80,7 +79,7 @@ inline static unsigned int fpu_status(unsigned int status)
 /* Defined in assembly */
 VM_C_API void c_to_factor(cell quot, void *vm);
 VM_C_API void throw_impl(cell quot, void *new_stack, void *vm);
-VM_C_API void lazy_jit_compile(cell quot, void *vm);
+VM_C_API void lazy_jit_compile_impl(cell quot, void *vm);
 VM_C_API void flush_icache(cell start, cell len);
 
 VM_C_API void set_callstack(
diff --git a/vm/cpu-x86.32.S b/vm/cpu-x86.32.S
index 6c1e0cfc93..ad9f699ede 100644
--- a/vm/cpu-x86.32.S
+++ b/vm/cpu-x86.32.S
@@ -1,50 +1,102 @@
 #include "asm.h"
 
-#define ARG0 %eax
-#define ARG1 %edx
-#define ARG2 %ecx
-#define STACK_REG %esp
 #define DS_REG %esi
 #define RS_REG %edi
 #define RETURN_REG %eax
 
-#define NV0 %ebx
-#define NV1 %ebp
-
-#define CELL_SIZE 4
-#define STACK_PADDING 12
-
-#define PUSH_NONVOLATILE \
-	push %ebx ; \
-	push %ebp ; \
-	push %esi ; \
-	push %edi
-
-#define POP_NONVOLATILE \
-	pop %edi ; \
-	pop %esi ; \
-	pop %ebp ; \
-	pop %ebx
-
 #define QUOT_XT_OFFSET 12
 
-DEF(void,set_callstack,(void *vm, stack_frame *to, stack_frame *from, cell length, void *memcpy)):
-	mov 4(%esp),%ebx                   /* vm */
-	mov 8(%esp),%ebp                   /* to */
-	mov 12(%esp),%edx                  /* from */
-	mov 16(%esp),%ecx                  /* length */
-	mov 20(%esp),%eax                  /* memcpy */
-	sub %ecx,%ebp                      /* compute new stack pointer */
+DEF(void,c_to_factor,(cell quot, void *vm)):
+	/* Load parameters */
+	mov 4(%esp),%eax
+	mov 8(%esp),%edx
+
+	/* Save non-volatile registers */
+	push %ebx
+	push %ebp
+	push %esi
+	push %edi
+
+	/* Save old stack pointer and align */
+	mov %esp,%ebp
+	and $-16,%esp
+	push %ebp
+
+	/* Set up stack frame for the call to the boot quotation */
+	sub $4,%esp
+	push %edx
+	push %eax
+
+	/* Load context */
+	mov (%edx),%ecx
+
+	/* Load ctx->datastack */
+	mov 8(%ecx),DS_REG
+
+	/* Load ctx->retainstack */
+	mov 12(%ecx),RS_REG
+
+	/* Save ctx->callstack_bottom */
+	lea -4(%esp),%ebx
+	mov %ebx,4(%ecx)
+
+	/* Call quot-xt. Parameters are already on the stack */
+	call *QUOT_XT_OFFSET(%eax)
+
+	/* Tear down stack frame for the call to the boot quotation */
+	pop %edx
+	pop %eax
+	add $4,%esp
+
+	/* Undo stack alignment */
+	pop %ebp
 	mov %ebp,%esp
-	push %ecx                          /* pass length */
-	push %edx                          /* pass src */
-	push %ebp                          /* pass dst */
-	call *%eax                         /* call memcpy */
-	add $12,%esp                       /* pop args from the stack */
-	mov (%ebx),%ebx                    /* load context */
-	mov 8(%ebx),DS_REG                 /* load datastack */
-	mov 12(%ebx),RS_REG                /* load retainstack */
-	ret                                /* return _with new stack_ */
+
+	/* Load context */
+	mov (%edx),%ecx
+
+	/* Save ctx->datastack */
+	mov DS_REG,8(%ecx)
+
+	/* Save ctx->retainstack */
+	mov RS_REG,12(%ecx)
+
+	/* Restore non-volatile registers */
+	pop %edi
+	pop %esi
+	pop %ebp
+	pop %ebx
+
+	ret
+
+DEF(void,set_callstack,(void *vm, stack_frame *to, stack_frame *from, cell length, void *memcpy)):
+	/* load arguments */
+	mov 4(%esp),%ebx  /* vm - to non-volatile register */
+	mov 8(%esp),%ebp  /* to */
+	mov 12(%esp),%edx /* from */
+	mov 16(%esp),%ecx /* length */
+	mov 20(%esp),%eax /* memcpy */
+
+	/* compute new stack pointer */
+	sub %ecx,%ebp
+	mov %ebp,%esp
+
+	/* call memcpy */
+	push %ecx /* pass length */
+	push %edx /* pass src */
+	push %ebp /* pass dst */
+	call *%eax
+	add $12,%esp
+
+	/* load context */
+	mov (%ebx),%ecx
+	/* load datastack */
+	mov 8(%ecx),DS_REG
+	/* load retainstack */
+	mov 12(%ecx),RS_REG
+
+	/* return with new stack */
+	ret
 
 DEF(void,throw_impl,(cell quot, void *new_stack, void *vm)):
 	/* clear x87 stack, but preserve rounding mode and exception flags */
@@ -53,36 +105,50 @@ DEF(void,throw_impl,(cell quot, void *new_stack, void *vm)):
 	fninit
 	fldcw (%esp)
 	add $2,%esp
+
 	/* load quotation and vm parameters */
 	mov 4(%esp),%eax
 	mov 12(%esp),%edx
+
 	/* load new stack pointer */
 	mov 8(%esp),%esp
+
 	/* load context */
-	mov (%edx),%ebx
+	mov (%edx),%ecx
 	/* load datastack */
-	mov 8(%ebx),DS_REG
+	mov 8(%ecx),DS_REG
 	/* load retainstack */
-	mov 12(%ebx),RS_REG
+	mov 12(%ecx),RS_REG
+
+	/* pass arguments to error handler */
+	mov %eax,4(%esp)
+	mov %edx,8(%esp)
+
 	/* call the error handler */
 	jmp *QUOT_XT_OFFSET(%eax)
 
-DEF(VM_ASM_API void,lazy_jit_compile,(cell quot, void *vm)):
+DEF(void,lazy_jit_compile_impl,(cell quot, void *vm)):
+	/* load arguments */
+	mov 4(%esp),%eax 
+	mov 8(%esp),%edx
+
 	/* load context */
-	mov (ARG1),%ebx
+	mov (%edx),%ecx
+	/* save datastack */
+	mov DS_REG,8(%ecx)
+	/* save retainstack */
+	mov RS_REG,12(%ecx)
 	/* save callstack */
 	lea -4(%esp),%ebp
-	mov %ebp,(%ebx)
-	/* save datastack */
-	mov DS_REG,8(%ebx)
-	/* save retainstack */
-	mov RS_REG,12(%ebx)
+	mov %ebp,(%ecx)
+
 	/* compile quotation */
 	sub $4,%esp
-	push ARG1
-	push ARG0
-	call MANGLE(lazy_jit_compile_impl)
+	push %edx
+	push %eax
+	call MANGLE(lazy_jit_compile)
 	add $12,%esp
+
 	/* call quotation */
 	jmp *QUOT_XT_OFFSET(%eax)
 
diff --git a/vm/cpu-x86.32.hpp b/vm/cpu-x86.32.hpp
index 24b1761472..6143631abc 100644
--- a/vm/cpu-x86.32.hpp
+++ b/vm/cpu-x86.32.hpp
@@ -2,6 +2,5 @@ namespace factor
 {
 
 #define FACTOR_CPU_STRING "x86.32"
-#define VM_ASM_API VM_C_API __attribute__ ((regparm (3)))
 
 }
diff --git a/vm/cpu-x86.64.S b/vm/cpu-x86.64.S
index 5df375d29f..e8972b533c 100644
--- a/vm/cpu-x86.64.S
+++ b/vm/cpu-x86.64.S
@@ -65,6 +65,52 @@
 
 #define QUOT_XT_OFFSET 28
 
+DEF(F_FASTCALL void,c_to_factor,(CELL quot, void *vm)):
+	PUSH_NONVOLATILE
+	mov ARG0,NV0
+	mov ARG1,NV1
+
+	push ARG0
+	push ARG1
+
+	/* Create register shadow area (required for Win64 only) */
+	sub $32,STACK_REG
+
+	/* Load context */
+	mov (NV1),ARG0
+
+	/* Save ctx->callstack_bottom */
+	lea -CELL_SIZE(STACK_REG),ARG1
+	mov ARG1,CELL_SIZE(ARG0)
+
+	/* Load ctx->datastack */
+	mov (CELL_SIZE * 2)(ARG0),DS_REG
+
+	/* Load ctx->retainstack */
+	mov (CELL_SIZE * 3)(ARG0),RS_REG
+
+	/* Call quot-xt */
+	mov NV0,ARG0
+	mov NV1,ARG1
+	call *QUOT_XT_OFFSET(ARG0)
+
+	/* Tear down register shadow area */
+	add $32,STACK_REG
+
+	/* Load context */
+	pop ARG1
+	pop ARG0
+	mov (ARG1),ARG0
+
+	/* Save ctx->datastack */
+	mov DS_REG,(CELL_SIZE * 2)(ARG0)
+
+	/* Save ctx->retainstack */
+	mov RS_REG,(CELL_SIZE * 3)(ARG0)
+
+	POP_NONVOLATILE
+	ret
+
 /* We pass a function pointer to memcpy to work around a Mac OS X
 ABI limitation which would otherwise require us to do a bizzaro PC-relative
 trampoline to retrieve the function address */
@@ -82,14 +128,14 @@ DEF(F_FASTCALL void,throw_impl,(CELL quot, F_STACK_FRAME *rewind_to, void *vm)):
 	fldcw (STACK_REG)
 	/* rewind_to */
 	mov ARG1,STACK_REG
-	mov ARG2,ARG1  /* make vm ptr 2nd arg in case quot_xt = lazy_jit_compile */
+	mov ARG2,ARG1  /* make vm ptr 2nd arg in case quot_xt = lazy_jit_compile_impl */
 	jmp *QUOT_XT_OFFSET(ARG0)
 
-DEF(F_FASTCALL void,lazy_jit_compile,(CELL quot, void *vm)):
+DEF(F_FASTCALL void,lazy_jit_compile_impl,(CELL quot, void *vm)):
 	mov ARG1,ARG2                /* vm is 3rd arg */
 	mov STACK_REG,ARG1           /* Save stack pointer */
 	sub $STACK_PADDING,STACK_REG
-	call MANGLE(lazy_jit_compile_impl)
+	call MANGLE(lazy_jit_compile)
 	mov RETURN_REG,ARG0          /* No-op on 32-bit */
 	add $STACK_PADDING,STACK_REG
         jmp *QUOT_XT_OFFSET(ARG0)    /* Call the quotation */
diff --git a/vm/cpu-x86.64.hpp b/vm/cpu-x86.64.hpp
index aa1a77842f..e6a759d006 100644
--- a/vm/cpu-x86.64.hpp
+++ b/vm/cpu-x86.64.hpp
@@ -2,6 +2,5 @@ namespace factor
 {
 
 #define FACTOR_CPU_STRING "x86.64"
-#define VM_ASM_API VM_C_API
 
 }
diff --git a/vm/cpu-x86.S b/vm/cpu-x86.S
index 013aa481d9..d59d0df7fb 100644
--- a/vm/cpu-x86.S
+++ b/vm/cpu-x86.S
@@ -1,58 +1,3 @@
-DEF(F_FASTCALL void,c_to_factor,(CELL quot, void *vm)):
-	PUSH_NONVOLATILE
-	mov ARG0,NV0
-	mov ARG1,NV1
-
-	push ARG0
-	push ARG1
-
-	/* Save old stack pointer and align */
-	mov STACK_REG,ARG0
-	and $-16,STACK_REG
-	add $CELL_SIZE,STACK_REG
-	push ARG0
-
-	/* Create register shadow area (required for Win64 only) */
-	sub $32,STACK_REG
-
-	/* Load context */
-	mov (NV1),ARG0
-
-	/* Save ctx->callstack_bottom */
-	lea -CELL_SIZE(STACK_REG),ARG1
-	mov ARG1,CELL_SIZE(ARG0)
-
-	/* Load ctx->datastack */
-	mov (CELL_SIZE * 2)(ARG0),DS_REG
-
-	/* Load ctx->retainstack */
-	mov (CELL_SIZE * 3)(ARG0),RS_REG
-
-	/* Call quot-xt */
-	mov NV0,ARG0
-	mov NV1,ARG1
-	call *QUOT_XT_OFFSET(ARG0)
-
-	/* Tear down register shadow area */
-	add $32,STACK_REG
-
-	/* Undo stack alignment */
-	mov (STACK_REG),STACK_REG
-
-	/* Load context */
-	pop ARG1
-	pop ARG0
-	mov (ARG1),ARG0
-
-	/* Save ctx->datastack */
-	mov DS_REG,(CELL_SIZE * 2)(ARG0)
-
-	/* Save ctx->retainstack */
-	mov RS_REG,(CELL_SIZE * 3)(ARG0)
-
-	POP_NONVOLATILE
-	ret
-
 /* cpu.x86.features calls this */
 DEF(bool,sse_version,(void)):
 	mov $0x1,RETURN_REG
diff --git a/vm/cpu-x86.hpp b/vm/cpu-x86.hpp
index 6c63edf212..35327b2940 100644
--- a/vm/cpu-x86.hpp
+++ b/vm/cpu-x86.hpp
@@ -74,9 +74,9 @@ inline static unsigned int fpu_status(unsigned int status)
 }
 
 /* Defined in assembly */
-VM_ASM_API void c_to_factor(cell quot, void *vm);
+VM_C_API void c_to_factor(cell quot, void *vm);
 VM_C_API void throw_impl(cell quot, void *new_stack, void *vm);
-VM_ASM_API void lazy_jit_compile(cell quot, void *vm);
+VM_C_API void lazy_jit_compile_impl(cell quot, void *vm);
 
 VM_C_API void set_callstack(
 	void *vm,
diff --git a/vm/math.cpp b/vm/math.cpp
index 4deb3b3f08..f2056ee32e 100755
--- a/vm/math.cpp
+++ b/vm/math.cpp
@@ -581,7 +581,7 @@ inline void factor_vm::overflow_fixnum_add(fixnum x, fixnum y)
 		untag_fixnum(x) + untag_fixnum(y))));
 }
 
-VM_ASM_API void overflow_fixnum_add(fixnum x, fixnum y, factor_vm *parent)
+VM_C_API void overflow_fixnum_add(fixnum x, fixnum y, factor_vm *parent)
 {
 	parent->overflow_fixnum_add(x,y);
 }
@@ -592,7 +592,7 @@ inline void factor_vm::overflow_fixnum_subtract(fixnum x, fixnum y)
 		untag_fixnum(x) - untag_fixnum(y))));
 }
 
-VM_ASM_API void overflow_fixnum_subtract(fixnum x, fixnum y, factor_vm *parent)
+VM_C_API void overflow_fixnum_subtract(fixnum x, fixnum y, factor_vm *parent)
 {
 	parent->overflow_fixnum_subtract(x,y);
 }
@@ -606,7 +606,7 @@ inline void factor_vm::overflow_fixnum_multiply(fixnum x, fixnum y)
 	ctx->replace(tag<bignum>(bignum_multiply(bx,by)));
 }
 
-VM_ASM_API void overflow_fixnum_multiply(fixnum x, fixnum y, factor_vm *parent)
+VM_C_API void overflow_fixnum_multiply(fixnum x, fixnum y, factor_vm *parent)
 {
 	parent->overflow_fixnum_multiply(x,y);
 }
diff --git a/vm/math.hpp b/vm/math.hpp
index 9f677d254b..d78ae54010 100644
--- a/vm/math.hpp
+++ b/vm/math.hpp
@@ -96,8 +96,8 @@ VM_C_API u64 to_unsigned_8(cell obj, factor_vm *vm);
 VM_C_API fixnum to_fixnum(cell tagged, factor_vm *vm);
 VM_C_API cell to_cell(cell tagged, factor_vm *vm);
 
-VM_ASM_API void overflow_fixnum_add(fixnum x, fixnum y, factor_vm *parent);
-VM_ASM_API void overflow_fixnum_subtract(fixnum x, fixnum y, factor_vm *parent);
-VM_ASM_API void overflow_fixnum_multiply(fixnum x, fixnum y, factor_vm *parent);
+VM_C_API void overflow_fixnum_add(fixnum x, fixnum y, factor_vm *parent);
+VM_C_API void overflow_fixnum_subtract(fixnum x, fixnum y, factor_vm *parent);
+VM_C_API void overflow_fixnum_multiply(fixnum x, fixnum y, factor_vm *parent);
 
 }
diff --git a/vm/quotations.cpp b/vm/quotations.cpp
index 05411aab7a..89665c551a 100755
--- a/vm/quotations.cpp
+++ b/vm/quotations.cpp
@@ -307,7 +307,7 @@ void factor_vm::primitive_array_to_quotation()
 	quot->array = ctx->peek();
 	quot->cached_effect = false_object;
 	quot->cache_counter = false_object;
-	quot->xt = (void *)lazy_jit_compile;
+	quot->xt = (void *)lazy_jit_compile_impl;
 	quot->code = NULL;
 	ctx->replace(tag<quotation>(quot));
 }
@@ -332,16 +332,16 @@ fixnum factor_vm::quot_code_offset_to_scan(cell quot_, cell offset)
 	return compiler.get_position();
 }
 
-cell factor_vm::lazy_jit_compile_impl(cell quot_)
+cell factor_vm::lazy_jit_compile(cell quot_)
 {
 	data_root<quotation> quot(quot_,this);
 	jit_compile_quot(quot.value(),true);
 	return quot.value();
 }
 
-VM_C_API cell lazy_jit_compile_impl(cell quot, factor_vm *parent)
+VM_C_API cell lazy_jit_compile(cell quot, factor_vm *parent)
 {
-	return parent->lazy_jit_compile_impl(quot);
+	return parent->lazy_jit_compile(quot);
 }
 
 void factor_vm::primitive_quot_compiled_p()
diff --git a/vm/quotations.hpp b/vm/quotations.hpp
index cd867dc22f..69755302ea 100755
--- a/vm/quotations.hpp
+++ b/vm/quotations.hpp
@@ -27,6 +27,6 @@ struct quotation_jit : public jit {
 	void iterate_quotation();
 };
 
-VM_C_API cell lazy_jit_compile_impl(cell quot, factor_vm *parent);
+VM_C_API cell lazy_jit_compile(cell quot, factor_vm *parent);
 
 }
diff --git a/vm/vm.hpp b/vm/vm.hpp
index ffdb9f2591..101b47c298 100755
--- a/vm/vm.hpp
+++ b/vm/vm.hpp
@@ -603,7 +603,7 @@ struct factor_vm
 	code_block *jit_compile_quot(cell owner_, cell quot_, bool relocating);
 	void jit_compile_quot(cell quot_, bool relocating);
 	fixnum quot_code_offset_to_scan(cell quot_, cell offset);
-	cell lazy_jit_compile_impl(cell quot);
+	cell lazy_jit_compile(cell quot);
 	void primitive_quot_compiled_p();
 
 	//dispatch

From 7271900e6f40debf90ef00d54a82d1c87cf20224 Mon Sep 17 00:00:00 2001
From: Slava Pestov <slava@slava-pestovs-macbook-pro.local>
Date: Sat, 26 Dec 2009 15:24:21 +1300
Subject: [PATCH 4/8] vm: code cleanup

---
 vm/compaction.cpp     | 28 ++++++++++++++++++++++++++++
 vm/full_collector.cpp | 28 ----------------------------
 vm/words.cpp          |  1 -
 3 files changed, 28 insertions(+), 29 deletions(-)

diff --git a/vm/compaction.cpp b/vm/compaction.cpp
index 3cdf005e03..7a062998a7 100644
--- a/vm/compaction.cpp
+++ b/vm/compaction.cpp
@@ -155,6 +155,34 @@ struct code_block_compaction_updater {
 	}
 };
 
+/* After a compaction, invalidate any code heap roots which are not
+marked, and also slide the valid roots up so that call sites can be updated
+correctly in case an inline cache compilation triggered compaction. */
+void factor_vm::update_code_roots_for_compaction()
+{
+	std::vector<code_root *>::const_iterator iter = code_roots.begin();
+	std::vector<code_root *>::const_iterator end = code_roots.end();
+
+	mark_bits<code_block> *state = &code->allocator->state;
+
+	for(; iter < end; iter++)
+	{
+		code_root *root = *iter;
+		code_block *block = (code_block *)(root->value & -data_alignment);
+
+		/* Offset of return address within 16-byte allocation line */
+		cell offset = root->value - (cell)block;
+
+		if(root->valid && state->marked_p(block))
+		{
+			block = state->forward_block(block);
+			root->value = (cell)block + offset;
+		}
+		else
+			root->valid = false;
+	}
+}
+
 /* Compact data and code heaps */
 void factor_vm::collect_compact_impl(bool trace_contexts_p)
 {
diff --git a/vm/full_collector.cpp b/vm/full_collector.cpp
index 7ee4f41413..4de2814f1d 100644
--- a/vm/full_collector.cpp
+++ b/vm/full_collector.cpp
@@ -57,34 +57,6 @@ void factor_vm::update_code_roots_for_sweep()
 	}
 }
 
-/* After a compaction, invalidate any code heap roots which are not
-marked as above, and also slide the valid roots up so that call sites
-can be updated correctly. */
-void factor_vm::update_code_roots_for_compaction()
-{
-	std::vector<code_root *>::const_iterator iter = code_roots.begin();
-	std::vector<code_root *>::const_iterator end = code_roots.end();
-
-	mark_bits<code_block> *state = &code->allocator->state;
-
-	for(; iter < end; iter++)
-	{
-		code_root *root = *iter;
-		code_block *block = (code_block *)(root->value & -data_alignment);
-
-		/* Offset of return address within 16-byte allocation line */
-		cell offset = root->value - (cell)block;
-
-		if(root->valid && state->marked_p(block))
-		{
-			block = state->forward_block(block);
-			root->value = (cell)block + offset;
-		}
-		else
-			root->valid = false;
-	}
-}
-
 void factor_vm::collect_mark_impl(bool trace_contexts_p)
 {
 	full_collector collector(this);
diff --git a/vm/words.cpp b/vm/words.cpp
index ec5db15095..4b3dad71df 100644
--- a/vm/words.cpp
+++ b/vm/words.cpp
@@ -34,7 +34,6 @@ void factor_vm::compile_all_words()
 			jit_compile_word(word.value(),word->def,false);
 
 		update_word_xt(word.untagged());
-
 	}
 }
 

From 67153bf4bba4cae80ab1c53a4d3fc583f850b0dc Mon Sep 17 00:00:00 2001
From: Slava Pestov <slava@slava-pestovs-macbook-pro.local>
Date: Sat, 26 Dec 2009 15:24:46 +1300
Subject: [PATCH 5/8] Fix bug in c_to_factor

---
 basis/cpu/x86/32/32.factor        |  3 ++-
 basis/cpu/x86/32/bootstrap.factor | 19 ++-----------------
 vm/cpu-x86.32.S                   | 12 ++----------
 3 files changed, 6 insertions(+), 28 deletions(-)

diff --git a/basis/cpu/x86/32/32.factor b/basis/cpu/x86/32/32.factor
index f904f7c3fb..70c7b9d513 100644
--- a/basis/cpu/x86/32/32.factor
+++ b/basis/cpu/x86/32/32.factor
@@ -215,7 +215,8 @@ M:: x86.32 %unbox-large-struct ( n c-type -- )
     "to_value_struct" f %alien-invoke ;
 
 M: x86.32 %nest-stacks ( -- )
-    ! Save current frame. See comment in vm/contexts.hpp
+    ! Save current frame to ctx->magic_frame.
+    ! See comment in vm/contexts.hpp.
     EAX stack-reg stack-frame get total-size>> 3 cells - [+] LEA
     4 save-vm-ptr
     0 stack@ EAX MOV
diff --git a/basis/cpu/x86/32/bootstrap.factor b/basis/cpu/x86/32/bootstrap.factor
index 0b7a1fad5c..bb289851ea 100644
--- a/basis/cpu/x86/32/bootstrap.factor
+++ b/basis/cpu/x86/32/bootstrap.factor
@@ -73,23 +73,8 @@ IN: bootstrap.x86
     ! load VM pointer
     EDX 0 MOV 0 rc-absolute-cell jit-vm
 ]
-[
-    
-    ! pass quotation
-    ESP [] EAX MOV
-    ! pass VM pointer
-    ESP 4 [+] EDX MOV
-    ! call XT
-    EAX quot-xt-offset [+] CALL
-]
-[
-    ! pass quotation
-    ESP 4 [+] EAX MOV
-    ! pass VM pointer
-    ESP 8 [+] EDX MOV
-    ! jump to XT
-    EAX quot-xt-offset [+] JMP
-]
+[ EAX quot-xt-offset [+] CALL ]
+[ EAX quot-xt-offset [+] JMP ]
 \ (call) define-sub-primitive*
 
 ! Inline cache miss entry points
diff --git a/vm/cpu-x86.32.S b/vm/cpu-x86.32.S
index ad9f699ede..04df3db89d 100644
--- a/vm/cpu-x86.32.S
+++ b/vm/cpu-x86.32.S
@@ -40,12 +40,12 @@ DEF(void,c_to_factor,(cell quot, void *vm)):
 	lea -4(%esp),%ebx
 	mov %ebx,4(%ecx)
 
-	/* Call quot-xt. Parameters are already on the stack */
+	/* Call quot-xt */
 	call *QUOT_XT_OFFSET(%eax)
 
 	/* Tear down stack frame for the call to the boot quotation */
-	pop %edx
 	pop %eax
+	pop %edx
 	add $4,%esp
 
 	/* Undo stack alignment */
@@ -120,18 +120,10 @@ DEF(void,throw_impl,(cell quot, void *new_stack, void *vm)):
 	/* load retainstack */
 	mov 12(%ecx),RS_REG
 
-	/* pass arguments to error handler */
-	mov %eax,4(%esp)
-	mov %edx,8(%esp)
-
 	/* call the error handler */
 	jmp *QUOT_XT_OFFSET(%eax)
 
 DEF(void,lazy_jit_compile_impl,(cell quot, void *vm)):
-	/* load arguments */
-	mov 4(%esp),%eax 
-	mov 8(%esp),%edx
-
 	/* load context */
 	mov (%edx),%ecx
 	/* save datastack */

From 08cd0df788511fda949d377fd7d6289820421c8a Mon Sep 17 00:00:00 2001
From: Slava Pestov <slava@slava-pestovs-macbook-pro.local>
Date: Sat, 26 Dec 2009 15:27:39 +1300
Subject: [PATCH 6/8] primitives.factor: move call-clear into kernel.private
 since its unsafe

---
 core/bootstrap/primitives.factor | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/bootstrap/primitives.factor b/core/bootstrap/primitives.factor
index 78c219ae3c..72348ffa03 100644
--- a/core/bootstrap/primitives.factor
+++ b/core/bootstrap/primitives.factor
@@ -503,7 +503,7 @@ tuple
     { "innermost-frame-executing" "kernel.private" (( callstack -- obj )) }
     { "innermost-frame-scan" "kernel.private" (( callstack -- n )) }
     { "set-innermost-frame-quot" "kernel.private" (( n callstack -- )) }
-    { "call-clear" "kernel" (( quot -- )) }
+    { "call-clear" "kernel.private" (( quot -- )) }
     { "resize-byte-array" "byte-arrays" (( n byte-array -- newbyte-array )) }
     { "dll-valid?" "alien.libraries" (( dll -- ? )) }
     { "unimplemented" "kernel.private" (( -- * )) }

From 819e46f42b7518321e40f1f24793f78c6c6b3e02 Mon Sep 17 00:00:00 2001
From: Slava Pestov <slava@slava-pestovs-macbook-pro.local>
Date: Sat, 26 Dec 2009 15:38:01 +1300
Subject: [PATCH 7/8] kernel: fix stack effect declarations of call-clear and
 set-callstack, and add a unit test for call-clear

---
 core/bootstrap/primitives.factor | 4 ++--
 core/kernel/kernel-docs.factor   | 4 ++--
 core/kernel/kernel-tests.factor  | 5 +++++
 3 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/core/bootstrap/primitives.factor b/core/bootstrap/primitives.factor
index 72348ffa03..4a4d178f9e 100644
--- a/core/bootstrap/primitives.factor
+++ b/core/bootstrap/primitives.factor
@@ -430,7 +430,7 @@ tuple
     { "callstack" "kernel" (( -- cs )) }
     { "set-datastack" "kernel" (( ds -- )) }
     { "set-retainstack" "kernel" (( rs -- )) }
-    { "set-callstack" "kernel" (( cs -- )) }
+    { "set-callstack" "kernel" (( cs -- * )) }
     { "(exit)" "system" (( n -- )) }
     { "data-room" "memory" (( -- data-room )) }
     { "code-room" "memory" (( -- code-room )) }
@@ -503,7 +503,7 @@ tuple
     { "innermost-frame-executing" "kernel.private" (( callstack -- obj )) }
     { "innermost-frame-scan" "kernel.private" (( callstack -- n )) }
     { "set-innermost-frame-quot" "kernel.private" (( n callstack -- )) }
-    { "call-clear" "kernel.private" (( quot -- )) }
+    { "call-clear" "kernel.private" (( quot -- * )) }
     { "resize-byte-array" "byte-arrays" (( n byte-array -- newbyte-array )) }
     { "dll-valid?" "alien.libraries" (( dll -- ? )) }
     { "unimplemented" "kernel.private" (( -- * )) }
diff --git a/core/kernel/kernel-docs.factor b/core/kernel/kernel-docs.factor
index 0e8c3368ff..87cb1a4709 100644
--- a/core/kernel/kernel-docs.factor
+++ b/core/kernel/kernel-docs.factor
@@ -46,7 +46,7 @@ HELP: callstack ( -- cs )
 { $values { "cs" callstack } }
 { $description "Outputs a copy of the call stack contents, with the top of the stack at the end of the vector. The stack frame of the caller word is " { $emphasis "not" } " included." } ;
 
-HELP: set-callstack ( cs -- )
+HELP: set-callstack ( cs -- * )
 { $values { "cs" callstack } }
 { $description "Replaces the call stack contents. The end of the vector becomes the top of the stack. Control flow is transferred immediately to the new call stack." } ;
 
@@ -208,7 +208,7 @@ HELP: call
 
 { call POSTPONE: call( } related-words
 
-HELP: call-clear ( quot -- )
+HELP: call-clear ( quot -- * )
 { $values { "quot" callable } }
 { $description "Calls a quotation with an empty call stack. If the quotation returns, Factor will exit.." }
 { $notes "Used to implement " { $link "threads" } "." } ;
diff --git a/core/kernel/kernel-tests.factor b/core/kernel/kernel-tests.factor
index ded2ee9702..bd21490542 100644
--- a/core/kernel/kernel-tests.factor
+++ b/core/kernel/kernel-tests.factor
@@ -164,6 +164,11 @@ IN: kernel.tests
     last-frame
 ] unit-test
 
+: throw-frame-test ( c -- * ) [ gc gc continue ] call-clear ;
+: throw-frame-test' ( -- ) [ throw-frame-test ] callcc0 ;
+
+[ ] [ throw-frame-test' ] unit-test
+
 [ 10 2 3 4 5 ] [ 1 2 3 4 5 [ 10 * ] 4dip ] unit-test
 
 [ 3 -1 5/6 ] [ 1 2 3 4 5 6 [ + ] [ - ] [ / ] 2tri* ] unit-test

From a77f48fe6b273b5cfff51f489abf9666d2b0ea8d Mon Sep 17 00:00:00 2001
From: Slava Pestov <slava@slava-pestovs-macbook-pro.local>
Date: Sun, 27 Dec 2009 01:17:42 +1300
Subject: [PATCH 8/8] Updating x86-64 port for global register variable removal

---
 basis/cpu/x86/64/64.factor        |  18 +++--
 basis/cpu/x86/64/bootstrap.factor |  90 ++++++++++++++-------
 vm/cpu-x86.64.S                   | 127 +++++++++++++++++++-----------
 3 files changed, 157 insertions(+), 78 deletions(-)

diff --git a/basis/cpu/x86/64/64.factor b/basis/cpu/x86/64/64.factor
index 06a348b4e6..f78fe366d9 100644
--- a/basis/cpu/x86/64/64.factor
+++ b/basis/cpu/x86/64/64.factor
@@ -77,9 +77,9 @@ M: stack-params copy-register*
         { [ over integer? ] [ R11 swap MOV              param@ R11 MOV ] }
     } cond ;
 
-M: x86 %save-param-reg [ param@ ] 2dip %copy ;
+M: x86.64 %save-param-reg [ param@ ] 2dip %copy ;
 
-M: x86 %load-param-reg [ swap param@ ] dip %copy ;
+M: x86.64 %load-param-reg [ swap param@ ] dip %copy ;
 
 : with-return-regs ( quot -- )
     [
@@ -91,6 +91,12 @@ M: x86 %load-param-reg [ swap param@ ] dip %copy ;
 M: x86.64 %pop-stack ( n -- )
     param-reg-1 swap ds-reg reg-stack MOV ;
 
+M: x86.64 %pop-context-stack ( -- )
+    temp-reg %load-context-datastack
+    param-reg-1 temp-reg [] MOV
+    param-reg-1 param-reg-1 [] MOV
+    temp-reg [] bootstrap-cell SUB ;
+
 M:: x86.64 %unbox ( n rep func -- )
     param-reg-2 %mov-vm-ptr
     ! Call the unboxer
@@ -206,8 +212,10 @@ M: x86.64 %unnest-stacks ( -- )
     "unnest_stacks" f %alien-invoke ;
 
 M: x86.64 %prepare-alien-indirect ( -- )
-    param-reg-1 %mov-vm-ptr
-    "unbox_alien" f %alien-invoke
+    param-reg-1 ds-reg [] MOV
+    ds-reg 8 SUB
+    param-reg-2 %mov-vm-ptr
+    "pinned_alien_offset" f %alien-invoke
     RBP RAX MOV ;
 
 M: x86.64 %alien-indirect ( -- )
@@ -219,7 +227,7 @@ M: x86.64 %alien-callback ( quot -- )
     "c_to_factor" f %alien-invoke ;
 
 M: x86.64 %callback-value ( ctype -- )
-    0 %pop-stack
+    %pop-context-stack
     RSP 8 SUB
     param-reg-1 PUSH
     param-reg-1 %mov-vm-ptr
diff --git a/basis/cpu/x86/64/bootstrap.factor b/basis/cpu/x86/64/bootstrap.factor
index b77b08abe9..dbc0178ce1 100644
--- a/basis/cpu/x86/64/bootstrap.factor
+++ b/basis/cpu/x86/64/bootstrap.factor
@@ -1,9 +1,9 @@
 ! Copyright (C) 2007, 2009 Slava Pestov.
 ! See http://factorcode.org/license.txt for BSD license.
-USING: bootstrap.image.private kernel namespaces system
-layouts vocabs parser compiler.constants math math.private
-cpu.x86.assembler cpu.x86.assembler.operands sequences
-generic.single.private ;
+USING: bootstrap.image.private kernel kernel.private namespaces
+system layouts vocabs parser compiler.constants math
+math.private cpu.x86.assembler cpu.x86.assembler.operands
+sequences generic.single.private ;
 IN: bootstrap.x86
 
 8 \ cell set
@@ -33,23 +33,52 @@ IN: bootstrap.x86
     RSP stack-frame-size 3 bootstrap-cells - SUB
 ] jit-prolog jit-define
 
+: jit-load-vm ( -- )
+    RBP 0 MOV 0 rc-absolute-cell jit-vm ;
+
 : jit-save-context ( -- )
-    temp0 0 MOV rc-absolute-cell rt-context jit-rel
-    temp0 temp0 [] MOV
-    ! save stack pointer
-    temp1 stack-reg bootstrap-cell neg [+] LEA
-    temp0 [] temp1 MOV ;
+    ! VM pointer must be in RBP already
+    RCX RBP [] MOV
+    ! save ctx->callstack_top
+    RAX RSP -8 [+] LEA
+    RCX [] RAX MOV
+    ! save ctx->datastack
+    RCX 16 [+] ds-reg MOV
+    ! save ctx->retainstack
+    RCX 24 [+] rs-reg MOV ;
+
+: jit-restore-context ( -- )
+    ! VM pointer must be in EBP already
+    RCX RBP [] MOV
+    ! restore ctx->datastack
+    ds-reg RCX 16 [+] MOV
+    ! restore ctx->retainstack
+    rs-reg RCX 24 [+] MOV ;
 
 [
+    jit-load-vm
+    ! save ds, rs registers
     jit-save-context
-    ! load vm ptr
-    arg1 0 MOV rc-absolute-cell rt-vm jit-rel
-    ! load XT
-    temp1 0 MOV rc-absolute-cell rt-primitive jit-rel
-    ! go
-    temp1 CALL
+    ! call the primitive
+    arg1 RBP MOV
+    RAX 0 MOV rc-absolute-cell rt-primitive jit-rel
+    RAX CALL
+    ! restore ds, rs registers
+    jit-restore-context
 ] jit-primitive jit-define
 
+[
+    ! load from stack
+    arg1 ds-reg [] MOV
+    ! pop stack
+    ds-reg bootstrap-cell SUB
+    ! load VM pointer
+    arg2 0 MOV 0 rc-absolute-cell jit-vm
+]
+[ arg1 quot-xt-offset [+] CALL ]
+[ arg1 quot-xt-offset [+] JMP ]
+\ (call) define-sub-primitive*
+
 ! Inline cache miss entry points
 : jit-load-return-address ( -- )
     RBX RSP stack-frame-size bootstrap-cell - [+] MOV ;
@@ -57,10 +86,13 @@ IN: bootstrap.x86
 ! These are always in tail position with an existing stack
 ! frame, and the stack. The frame setup takes this into account.
 : jit-inline-cache-miss ( -- )
+    jit-load-vm
     jit-save-context
     arg1 RBX MOV
-    arg2 0 MOV 0 rc-absolute-cell jit-vm
-    0 CALL "inline_cache_miss" f rc-relative jit-dlsym ;
+    arg2 RBP MOV
+    RAX 0 MOV "inline_cache_miss" f rc-absolute-cell jit-dlsym
+    RAX CALL
+    jit-restore-context ;
 
 [ jit-load-return-address jit-inline-cache-miss ]
 [ RAX CALL ]
@@ -74,17 +106,19 @@ IN: bootstrap.x86
 
 ! Overflowing fixnum arithmetic
 : jit-overflow ( insn func -- )
+    ds-reg 8 SUB
+    jit-load-vm
     jit-save-context
-    arg1 ds-reg bootstrap-cell neg [+] MOV
-    arg2 ds-reg [] MOV
-    ds-reg bootstrap-cell SUB
+    arg1 ds-reg [] MOV
+    arg2 ds-reg 8 [+] MOV
     arg3 arg1 MOV
     [ [ arg3 arg2 ] dip call ] dip
     ds-reg [] arg3 MOV
     [ JNO ]
     [
-        arg3 0 MOV 0 rc-absolute-cell jit-vm
-        [ 0 CALL ] dip f rc-relative jit-dlsym
+        arg3 RBP MOV
+        RAX 0 MOV f rc-absolute-cell jit-dlsym
+        RAX CALL
     ]
     jit-conditional ; inline
 
@@ -93,11 +127,12 @@ IN: bootstrap.x86
 [ [ SUB ] "overflow_fixnum_subtract" jit-overflow ] \ fixnum- define-sub-primitive
 
 [
+    ds-reg 8 SUB
+    jit-load-vm
     jit-save-context
-    RCX ds-reg bootstrap-cell neg [+] MOV
-    RBX ds-reg [] MOV
+    RCX ds-reg [] MOV
+    RBX ds-reg 8 [+] MOV
     RBX tag-bits get SAR
-    ds-reg bootstrap-cell SUB
     RAX RCX MOV
     RBX IMUL
     ds-reg [] RAX MOV
@@ -106,8 +141,9 @@ IN: bootstrap.x86
         arg1 RCX MOV
         arg1 tag-bits get SAR
         arg2 RBX MOV
-        arg3 0 MOV 0 rc-absolute-cell jit-vm
-        0 CALL "overflow_fixnum_multiply" f rc-relative jit-dlsym
+        arg3 RBP MOV
+        RAX 0 MOV "overflow_fixnum_multiply" f rc-absolute-cell jit-dlsym
+        RAX CALL
     ]
     jit-conditional
 ] \ fixnum* define-sub-primitive
diff --git a/vm/cpu-x86.64.S b/vm/cpu-x86.64.S
index e8972b533c..37a6507206 100644
--- a/vm/cpu-x86.64.S
+++ b/vm/cpu-x86.64.S
@@ -1,15 +1,10 @@
 #include "asm.h"
 
-#define STACK_REG %rsp
 #define DS_REG %r14
 #define RS_REG %r15
 #define RETURN_REG %rax
 
-#define CELL_SIZE 8
-#define STACK_PADDING 56
-
-#define NV0 %rbp
-#define NV1 %r12
+#define QUOT_XT_OFFSET 28
 
 #ifdef WINDOWS
 
@@ -63,82 +58,122 @@
 
 #endif
 
-#define QUOT_XT_OFFSET 28
-
-DEF(F_FASTCALL void,c_to_factor,(CELL quot, void *vm)):
+DEF(void,c_to_factor,(cell quot, void *vm)):
 	PUSH_NONVOLATILE
-	mov ARG0,NV0
-	mov ARG1,NV1
 
+	/* Save old stack pointer and align */
+	mov %rsp,%rbp
+	and $-16,%rsp
+	push %rbp
+
+	/* Set up stack frame for the call to the boot quotation */
 	push ARG0
 	push ARG1
 
 	/* Create register shadow area (required for Win64 only) */
-	sub $32,STACK_REG
+	sub $40,%rsp
 
 	/* Load context */
-	mov (NV1),ARG0
+	mov (ARG1),ARG2
 
 	/* Save ctx->callstack_bottom */
-	lea -CELL_SIZE(STACK_REG),ARG1
-	mov ARG1,CELL_SIZE(ARG0)
+	lea -8(%rsp),ARG3
+	mov ARG3,8(ARG2)
 
 	/* Load ctx->datastack */
-	mov (CELL_SIZE * 2)(ARG0),DS_REG
+	mov 16(ARG2),DS_REG
 
 	/* Load ctx->retainstack */
-	mov (CELL_SIZE * 3)(ARG0),RS_REG
+	mov 24(ARG2),RS_REG
 
 	/* Call quot-xt */
-	mov NV0,ARG0
-	mov NV1,ARG1
 	call *QUOT_XT_OFFSET(ARG0)
 
 	/* Tear down register shadow area */
-	add $32,STACK_REG
+	add $40,%rsp
 
-	/* Load context */
+	/* Tear down stack frame for the call to the boot quotation */
 	pop ARG1
 	pop ARG0
-	mov (ARG1),ARG0
+
+	/* Undo stack alignment */
+	pop %rbp
+	mov %rbp,%rsp
+
+	/* Load context */
+	mov (ARG1),ARG2
 
 	/* Save ctx->datastack */
-	mov DS_REG,(CELL_SIZE * 2)(ARG0)
+	mov DS_REG,16(ARG2)
 
 	/* Save ctx->retainstack */
-	mov RS_REG,(CELL_SIZE * 3)(ARG0)
+	mov RS_REG,24(ARG2)
 
 	POP_NONVOLATILE
 	ret
 
-/* We pass a function pointer to memcpy to work around a Mac OS X
-ABI limitation which would otherwise require us to do a bizzaro PC-relative
-trampoline to retrieve the function address */
-DEF(void,set_callstack,(F_STACK_FRAME *to, F_STACK_FRAME *from, CELL length, void *memcpy)):
-	sub ARG2,ARG0                      /* compute new stack pointer */
-	mov ARG0,%rsp
-	call *ARG3                         /* call memcpy */
-	ret                                /* return _with new stack_ */
+DEF(void,set_callstack,(void *vm, stack_frame *to, stack_frame *from, cell length)):
+	/* save VM pointer in non-volatile register */
+	mov ARG0,%rbp
 
-DEF(F_FASTCALL void,throw_impl,(CELL quot, F_STACK_FRAME *rewind_to, void *vm)):
+	/* compute new stack pointer */	
+	sub ARG3,ARG1
+	mov ARG1,%rsp
+
+	/* call memcpy */
+	mov ARG1,ARG0
+	mov ARG2,ARG1
+	mov ARG3,ARG2
+	call MANGLE(memcpy)
+
+	/* load context */
+	mov (%rbp),ARG2
+	/* load datastack */
+	mov 16(ARG2),DS_REG
+	/* load retainstack */
+	mov 24(ARG2),RS_REG
+
+	/* return with new stack */
+	ret
+
+DEF(void,throw_impl,(cell quot, void *new_stack, void *vm)):
 	/* clear x87 stack, but preserve rounding mode and exception flags */
-	sub $2,STACK_REG
-	fnstcw (STACK_REG)
+	sub $2,%rsp
+	fnstcw (%rsp)
 	fninit
-	fldcw (STACK_REG)
-	/* rewind_to */
-	mov ARG1,STACK_REG
-	mov ARG2,ARG1  /* make vm ptr 2nd arg in case quot_xt = lazy_jit_compile_impl */
+	fldcw (%rsp)
+
+	/* shuffle args */
+	mov ARG1,%rsp
+	mov ARG2,ARG1
+
+	/* load context */
+	mov (ARG1),ARG2
+	/* load datastack */
+	mov 16(ARG2),DS_REG
+	/* load retainstack */
+	mov 24(ARG2),RS_REG
+
 	jmp *QUOT_XT_OFFSET(ARG0)
 
-DEF(F_FASTCALL void,lazy_jit_compile_impl,(CELL quot, void *vm)):
-	mov ARG1,ARG2                /* vm is 3rd arg */
-	mov STACK_REG,ARG1           /* Save stack pointer */
-	sub $STACK_PADDING,STACK_REG
+DEF(void,lazy_jit_compile_impl,(cell quot, void *vm)):
+	/* load context */
+	mov (ARG1),ARG2
+	/* save datastack */
+	mov DS_REG,16(ARG2)
+	/* save retainstack */
+	mov RS_REG,24(ARG2)
+	/* save callstack */
+	lea -8(%rsp),%rbp
+	mov %rbp,(ARG2)
+
+	/* compile quotation */
+	sub $8,%rsp
 	call MANGLE(lazy_jit_compile)
-	mov RETURN_REG,ARG0          /* No-op on 32-bit */
-	add $STACK_PADDING,STACK_REG
-        jmp *QUOT_XT_OFFSET(ARG0)    /* Call the quotation */
+	add $8,%rsp
+
+	/* call quotation */
+	jmp *QUOT_XT_OFFSET(RETURN_REG)
 
 DEF(long long,read_timestamp_counter,(void)):
 	mov $0,%rax