Align stack pointer on non-Mac OS X x86-32 platforms, and use aligned loads/stores for SIMD values

db4
Slava Pestov 2009-11-03 23:51:44 -06:00
parent 3c4c05e915
commit 2b1a26228b
7 changed files with 39 additions and 24 deletions

View File

@ -11,9 +11,6 @@ cpu.x86.assembler cpu.x86.assembler.operands cpu.x86
cpu.architecture ;
IN: cpu.x86.32
! We implement the FFI for Linux, OS X and Windows all at once.
! OS X requires that the stack be 16-byte aligned.
M: x86.32 machine-registers
{
{ int-regs { EAX ECX EDX EBP EBX } }

View File

@ -45,8 +45,7 @@ HOOK: extra-stack-space cpu ( stack-frame -- n )
: incr-stack-reg ( n -- )
dup 0 = [ drop ] [ stack-reg swap ADD ] if ;
: align-stack ( n -- n' )
os macosx? cpu x86.64? or [ 16 align ] when ;
: align-stack ( n -- n' ) 16 align ;
M: x86 stack-frame-size ( stack-frame -- i )
[ (stack-frame-size) ]
@ -141,8 +140,10 @@ M: x86 %not int-rep one-operand NOT ;
M: x86 %neg int-rep one-operand NEG ;
M: x86 %log2 BSR ;
! A bit of logic to avoid using MOVSS/MOVSD for reg-reg moves
! since this induces partial register stalls
GENERIC: copy-register* ( dst src rep -- )
GENERIC: copy-unaligned* ( dst src rep -- )
GENERIC: copy-memory* ( dst src rep -- )
M: int-rep copy-register* drop MOV ;
M: tagged-rep copy-register* drop MOV ;
@ -152,17 +153,14 @@ M: float-4-rep copy-register* drop MOVAPS ;
M: double-2-rep copy-register* drop MOVAPS ;
M: vector-rep copy-register* drop MOVDQA ;
M: object copy-unaligned* copy-register* ;
M: float-rep copy-unaligned* drop MOVSS ;
M: double-rep copy-unaligned* drop MOVSD ;
M: float-4-rep copy-unaligned* drop MOVUPS ;
M: double-2-rep copy-unaligned* drop MOVUPS ;
M: vector-rep copy-unaligned* drop MOVDQU ;
M: object copy-memory* copy-register* ;
M: float-rep copy-memory* drop MOVSS ;
M: double-rep copy-memory* drop MOVSD ;
M: x86 %copy ( dst src rep -- )
2over eq? [ 3drop ] [
[ [ dup spill-slot? [ n>> spill@ ] when ] bi@ ] dip
2over [ register? ] both? [ copy-register* ] [ copy-unaligned* ] if
2over [ register? ] both? [ copy-register* ] [ copy-memory* ] if
] if ;
M: x86 %fixnum-add ( label dst src1 src2 -- )

View File

@ -146,7 +146,7 @@ TUPLE: simd class elt-class ops special-wrappers schema-wrappers ctor rep ;
[ rep alien-vector class boa ] >>getter
[ [ underlying>> ] 2dip rep set-alien-vector ] >>setter
16 >>size
8 >>align
16 >>align
rep >>rep
class c:typedef ;
@ -315,7 +315,7 @@ SLOT: underlying2
3bi
] >>setter
32 >>size
8 >>align
16 >>align
rep >>rep
class c:typedef ;

View File

@ -582,3 +582,20 @@ STRUCT: simd-struct
float-4{ 1.0 0.0 1.0 0.0 } pi [ broken 3array ]
[ compile-call ] [ call ] 3bi =
] unit-test
! Spilling SIMD values -- this basically just tests that the
! stack was aligned properly by the runtime
: simd-spill-test-1 ( a b c -- v )
{ float-4 float-4 float } declare
[ v+ ] dip sin v*n ;
[ float-4{ 0 0 0 0 } ]
[ float-4{ 1 2 3 4 } float-4{ 4 5 6 7 } 0.0 simd-spill-test-1 ] unit-test
: simd-spill-test-2 ( a b d c -- v )
{ float float-4 float-4 float } declare
[ [ 3.0 + ] 2dip v+ ] dip sin v*n n*v ;
[ float-4{ 0 0 0 0 } ]
[ 5.0 float-4{ 1 2 3 4 } float-4{ 4 5 6 7 } 0.0 simd-spill-test-2 ] unit-test

View File

@ -19,11 +19,9 @@
#define PUSH_NONVOLATILE \
push %ebx ; \
push %ebp ; \
push %ebp
#define POP_NONVOLATILE \
pop %ebp ; \
pop %ebp ; \
pop %ebx

View File

@ -27,11 +27,9 @@
push %rdi ; \
push %rsi ; \
push %rbx ; \
push %rbp ; \
push %rbp
#define POP_NONVOLATILE \
pop %rbp ; \
pop %rbp ; \
pop %rbx ; \
pop %rsi ; \
@ -50,11 +48,9 @@
push %rbx ; \
push %rbp ; \
push %r12 ; \
push %r13 ; \
push %r13
#define POP_NONVOLATILE \
pop %r13 ; \
pop %r13 ; \
pop %r12 ; \
pop %rbp ; \

View File

@ -43,14 +43,20 @@ DEF(F_FASTCALL void,c_to_factor,(CELL quot, void *vm)):
PUSH_NONVOLATILE
mov ARG0,NV0
mov ARG1,NV1
/* Save old stack pointer and align */
mov STACK_REG,ARG0
and $-16,STACK_REG
add $CELL_SIZE,STACK_REG
push ARG0
/* Create register shadow area for Win64 */
sub $32,STACK_REG
/* Save stack pointer */
lea -CELL_SIZE(STACK_REG),ARG0
call MANGLE(save_callstack_bottom)
/* Call quot-xt */
mov NV0,ARG0
mov NV1,ARG1
@ -59,6 +65,9 @@ DEF(F_FASTCALL void,c_to_factor,(CELL quot, void *vm)):
/* Tear down register shadow area */
add $32,STACK_REG
/* Undo stack alignment */
mov (STACK_REG),STACK_REG
POP_NONVOLATILE
ret