Align stack pointer on non-Mac OS X x86-32 platforms, and use aligned loads/stores for SIMD values
parent
3c4c05e915
commit
2b1a26228b
|
@ -11,9 +11,6 @@ cpu.x86.assembler cpu.x86.assembler.operands cpu.x86
|
|||
cpu.architecture ;
|
||||
IN: cpu.x86.32
|
||||
|
||||
! We implement the FFI for Linux, OS X and Windows all at once.
|
||||
! OS X requires that the stack be 16-byte aligned.
|
||||
|
||||
M: x86.32 machine-registers
|
||||
{
|
||||
{ int-regs { EAX ECX EDX EBP EBX } }
|
||||
|
|
|
@ -45,8 +45,7 @@ HOOK: extra-stack-space cpu ( stack-frame -- n )
|
|||
: incr-stack-reg ( n -- )
|
||||
dup 0 = [ drop ] [ stack-reg swap ADD ] if ;
|
||||
|
||||
: align-stack ( n -- n' )
|
||||
os macosx? cpu x86.64? or [ 16 align ] when ;
|
||||
: align-stack ( n -- n' ) 16 align ;
|
||||
|
||||
M: x86 stack-frame-size ( stack-frame -- i )
|
||||
[ (stack-frame-size) ]
|
||||
|
@ -141,8 +140,10 @@ M: x86 %not int-rep one-operand NOT ;
|
|||
M: x86 %neg int-rep one-operand NEG ;
|
||||
M: x86 %log2 BSR ;
|
||||
|
||||
! A bit of logic to avoid using MOVSS/MOVSD for reg-reg moves
|
||||
! since this induces partial register stalls
|
||||
GENERIC: copy-register* ( dst src rep -- )
|
||||
GENERIC: copy-unaligned* ( dst src rep -- )
|
||||
GENERIC: copy-memory* ( dst src rep -- )
|
||||
|
||||
M: int-rep copy-register* drop MOV ;
|
||||
M: tagged-rep copy-register* drop MOV ;
|
||||
|
@ -152,17 +153,14 @@ M: float-4-rep copy-register* drop MOVAPS ;
|
|||
M: double-2-rep copy-register* drop MOVAPS ;
|
||||
M: vector-rep copy-register* drop MOVDQA ;
|
||||
|
||||
M: object copy-unaligned* copy-register* ;
|
||||
M: float-rep copy-unaligned* drop MOVSS ;
|
||||
M: double-rep copy-unaligned* drop MOVSD ;
|
||||
M: float-4-rep copy-unaligned* drop MOVUPS ;
|
||||
M: double-2-rep copy-unaligned* drop MOVUPS ;
|
||||
M: vector-rep copy-unaligned* drop MOVDQU ;
|
||||
M: object copy-memory* copy-register* ;
|
||||
M: float-rep copy-memory* drop MOVSS ;
|
||||
M: double-rep copy-memory* drop MOVSD ;
|
||||
|
||||
M: x86 %copy ( dst src rep -- )
|
||||
2over eq? [ 3drop ] [
|
||||
[ [ dup spill-slot? [ n>> spill@ ] when ] bi@ ] dip
|
||||
2over [ register? ] both? [ copy-register* ] [ copy-unaligned* ] if
|
||||
2over [ register? ] both? [ copy-register* ] [ copy-memory* ] if
|
||||
] if ;
|
||||
|
||||
M: x86 %fixnum-add ( label dst src1 src2 -- )
|
||||
|
|
|
@ -146,7 +146,7 @@ TUPLE: simd class elt-class ops special-wrappers schema-wrappers ctor rep ;
|
|||
[ rep alien-vector class boa ] >>getter
|
||||
[ [ underlying>> ] 2dip rep set-alien-vector ] >>setter
|
||||
16 >>size
|
||||
8 >>align
|
||||
16 >>align
|
||||
rep >>rep
|
||||
class c:typedef ;
|
||||
|
||||
|
@ -315,7 +315,7 @@ SLOT: underlying2
|
|||
3bi
|
||||
] >>setter
|
||||
32 >>size
|
||||
8 >>align
|
||||
16 >>align
|
||||
rep >>rep
|
||||
class c:typedef ;
|
||||
|
||||
|
|
|
@ -582,3 +582,20 @@ STRUCT: simd-struct
|
|||
float-4{ 1.0 0.0 1.0 0.0 } pi [ broken 3array ]
|
||||
[ compile-call ] [ call ] 3bi =
|
||||
] unit-test
|
||||
|
||||
! Spilling SIMD values -- this basically just tests that the
|
||||
! stack was aligned properly by the runtime
|
||||
|
||||
: simd-spill-test-1 ( a b c -- v )
|
||||
{ float-4 float-4 float } declare
|
||||
[ v+ ] dip sin v*n ;
|
||||
|
||||
[ float-4{ 0 0 0 0 } ]
|
||||
[ float-4{ 1 2 3 4 } float-4{ 4 5 6 7 } 0.0 simd-spill-test-1 ] unit-test
|
||||
|
||||
: simd-spill-test-2 ( a b d c -- v )
|
||||
{ float float-4 float-4 float } declare
|
||||
[ [ 3.0 + ] 2dip v+ ] dip sin v*n n*v ;
|
||||
|
||||
[ float-4{ 0 0 0 0 } ]
|
||||
[ 5.0 float-4{ 1 2 3 4 } float-4{ 4 5 6 7 } 0.0 simd-spill-test-2 ] unit-test
|
||||
|
|
|
@ -19,11 +19,9 @@
|
|||
|
||||
#define PUSH_NONVOLATILE \
|
||||
push %ebx ; \
|
||||
push %ebp ; \
|
||||
push %ebp
|
||||
|
||||
#define POP_NONVOLATILE \
|
||||
pop %ebp ; \
|
||||
pop %ebp ; \
|
||||
pop %ebx
|
||||
|
||||
|
|
|
@ -27,11 +27,9 @@
|
|||
push %rdi ; \
|
||||
push %rsi ; \
|
||||
push %rbx ; \
|
||||
push %rbp ; \
|
||||
push %rbp
|
||||
|
||||
#define POP_NONVOLATILE \
|
||||
pop %rbp ; \
|
||||
pop %rbp ; \
|
||||
pop %rbx ; \
|
||||
pop %rsi ; \
|
||||
|
@ -50,11 +48,9 @@
|
|||
push %rbx ; \
|
||||
push %rbp ; \
|
||||
push %r12 ; \
|
||||
push %r13 ; \
|
||||
push %r13
|
||||
|
||||
#define POP_NONVOLATILE \
|
||||
pop %r13 ; \
|
||||
pop %r13 ; \
|
||||
pop %r12 ; \
|
||||
pop %rbp ; \
|
||||
|
|
15
vm/cpu-x86.S
15
vm/cpu-x86.S
|
@ -43,14 +43,20 @@ DEF(F_FASTCALL void,c_to_factor,(CELL quot, void *vm)):
|
|||
PUSH_NONVOLATILE
|
||||
mov ARG0,NV0
|
||||
mov ARG1,NV1
|
||||
|
||||
|
||||
/* Save old stack pointer and align */
|
||||
mov STACK_REG,ARG0
|
||||
and $-16,STACK_REG
|
||||
add $CELL_SIZE,STACK_REG
|
||||
push ARG0
|
||||
|
||||
/* Create register shadow area for Win64 */
|
||||
sub $32,STACK_REG
|
||||
|
||||
|
||||
/* Save stack pointer */
|
||||
lea -CELL_SIZE(STACK_REG),ARG0
|
||||
call MANGLE(save_callstack_bottom)
|
||||
|
||||
|
||||
/* Call quot-xt */
|
||||
mov NV0,ARG0
|
||||
mov NV1,ARG1
|
||||
|
@ -59,6 +65,9 @@ DEF(F_FASTCALL void,c_to_factor,(CELL quot, void *vm)):
|
|||
/* Tear down register shadow area */
|
||||
add $32,STACK_REG
|
||||
|
||||
/* Undo stack alignment */
|
||||
mov (STACK_REG),STACK_REG
|
||||
|
||||
POP_NONVOLATILE
|
||||
ret
|
||||
|
||||
|
|
Loading…
Reference in New Issue