Align stack pointer on non-Mac OS X x86-32 platforms, and use aligned loads/stores for SIMD values
parent
3c4c05e915
commit
2b1a26228b
|
@ -11,9 +11,6 @@ cpu.x86.assembler cpu.x86.assembler.operands cpu.x86
|
||||||
cpu.architecture ;
|
cpu.architecture ;
|
||||||
IN: cpu.x86.32
|
IN: cpu.x86.32
|
||||||
|
|
||||||
! We implement the FFI for Linux, OS X and Windows all at once.
|
|
||||||
! OS X requires that the stack be 16-byte aligned.
|
|
||||||
|
|
||||||
M: x86.32 machine-registers
|
M: x86.32 machine-registers
|
||||||
{
|
{
|
||||||
{ int-regs { EAX ECX EDX EBP EBX } }
|
{ int-regs { EAX ECX EDX EBP EBX } }
|
||||||
|
|
|
@ -45,8 +45,7 @@ HOOK: extra-stack-space cpu ( stack-frame -- n )
|
||||||
: incr-stack-reg ( n -- )
|
: incr-stack-reg ( n -- )
|
||||||
dup 0 = [ drop ] [ stack-reg swap ADD ] if ;
|
dup 0 = [ drop ] [ stack-reg swap ADD ] if ;
|
||||||
|
|
||||||
: align-stack ( n -- n' )
|
: align-stack ( n -- n' ) 16 align ;
|
||||||
os macosx? cpu x86.64? or [ 16 align ] when ;
|
|
||||||
|
|
||||||
M: x86 stack-frame-size ( stack-frame -- i )
|
M: x86 stack-frame-size ( stack-frame -- i )
|
||||||
[ (stack-frame-size) ]
|
[ (stack-frame-size) ]
|
||||||
|
@ -141,8 +140,10 @@ M: x86 %not int-rep one-operand NOT ;
|
||||||
M: x86 %neg int-rep one-operand NEG ;
|
M: x86 %neg int-rep one-operand NEG ;
|
||||||
M: x86 %log2 BSR ;
|
M: x86 %log2 BSR ;
|
||||||
|
|
||||||
|
! A bit of logic to avoid using MOVSS/MOVSD for reg-reg moves
|
||||||
|
! since this induces partial register stalls
|
||||||
GENERIC: copy-register* ( dst src rep -- )
|
GENERIC: copy-register* ( dst src rep -- )
|
||||||
GENERIC: copy-unaligned* ( dst src rep -- )
|
GENERIC: copy-memory* ( dst src rep -- )
|
||||||
|
|
||||||
M: int-rep copy-register* drop MOV ;
|
M: int-rep copy-register* drop MOV ;
|
||||||
M: tagged-rep copy-register* drop MOV ;
|
M: tagged-rep copy-register* drop MOV ;
|
||||||
|
@ -152,17 +153,14 @@ M: float-4-rep copy-register* drop MOVAPS ;
|
||||||
M: double-2-rep copy-register* drop MOVAPS ;
|
M: double-2-rep copy-register* drop MOVAPS ;
|
||||||
M: vector-rep copy-register* drop MOVDQA ;
|
M: vector-rep copy-register* drop MOVDQA ;
|
||||||
|
|
||||||
M: object copy-unaligned* copy-register* ;
|
M: object copy-memory* copy-register* ;
|
||||||
M: float-rep copy-unaligned* drop MOVSS ;
|
M: float-rep copy-memory* drop MOVSS ;
|
||||||
M: double-rep copy-unaligned* drop MOVSD ;
|
M: double-rep copy-memory* drop MOVSD ;
|
||||||
M: float-4-rep copy-unaligned* drop MOVUPS ;
|
|
||||||
M: double-2-rep copy-unaligned* drop MOVUPS ;
|
|
||||||
M: vector-rep copy-unaligned* drop MOVDQU ;
|
|
||||||
|
|
||||||
M: x86 %copy ( dst src rep -- )
|
M: x86 %copy ( dst src rep -- )
|
||||||
2over eq? [ 3drop ] [
|
2over eq? [ 3drop ] [
|
||||||
[ [ dup spill-slot? [ n>> spill@ ] when ] bi@ ] dip
|
[ [ dup spill-slot? [ n>> spill@ ] when ] bi@ ] dip
|
||||||
2over [ register? ] both? [ copy-register* ] [ copy-unaligned* ] if
|
2over [ register? ] both? [ copy-register* ] [ copy-memory* ] if
|
||||||
] if ;
|
] if ;
|
||||||
|
|
||||||
M: x86 %fixnum-add ( label dst src1 src2 -- )
|
M: x86 %fixnum-add ( label dst src1 src2 -- )
|
||||||
|
|
|
@ -146,7 +146,7 @@ TUPLE: simd class elt-class ops special-wrappers schema-wrappers ctor rep ;
|
||||||
[ rep alien-vector class boa ] >>getter
|
[ rep alien-vector class boa ] >>getter
|
||||||
[ [ underlying>> ] 2dip rep set-alien-vector ] >>setter
|
[ [ underlying>> ] 2dip rep set-alien-vector ] >>setter
|
||||||
16 >>size
|
16 >>size
|
||||||
8 >>align
|
16 >>align
|
||||||
rep >>rep
|
rep >>rep
|
||||||
class c:typedef ;
|
class c:typedef ;
|
||||||
|
|
||||||
|
@ -315,7 +315,7 @@ SLOT: underlying2
|
||||||
3bi
|
3bi
|
||||||
] >>setter
|
] >>setter
|
||||||
32 >>size
|
32 >>size
|
||||||
8 >>align
|
16 >>align
|
||||||
rep >>rep
|
rep >>rep
|
||||||
class c:typedef ;
|
class c:typedef ;
|
||||||
|
|
||||||
|
|
|
@ -582,3 +582,20 @@ STRUCT: simd-struct
|
||||||
float-4{ 1.0 0.0 1.0 0.0 } pi [ broken 3array ]
|
float-4{ 1.0 0.0 1.0 0.0 } pi [ broken 3array ]
|
||||||
[ compile-call ] [ call ] 3bi =
|
[ compile-call ] [ call ] 3bi =
|
||||||
] unit-test
|
] unit-test
|
||||||
|
|
||||||
|
! Spilling SIMD values -- this basically just tests that the
|
||||||
|
! stack was aligned properly by the runtime
|
||||||
|
|
||||||
|
: simd-spill-test-1 ( a b c -- v )
|
||||||
|
{ float-4 float-4 float } declare
|
||||||
|
[ v+ ] dip sin v*n ;
|
||||||
|
|
||||||
|
[ float-4{ 0 0 0 0 } ]
|
||||||
|
[ float-4{ 1 2 3 4 } float-4{ 4 5 6 7 } 0.0 simd-spill-test-1 ] unit-test
|
||||||
|
|
||||||
|
: simd-spill-test-2 ( a b d c -- v )
|
||||||
|
{ float float-4 float-4 float } declare
|
||||||
|
[ [ 3.0 + ] 2dip v+ ] dip sin v*n n*v ;
|
||||||
|
|
||||||
|
[ float-4{ 0 0 0 0 } ]
|
||||||
|
[ 5.0 float-4{ 1 2 3 4 } float-4{ 4 5 6 7 } 0.0 simd-spill-test-2 ] unit-test
|
||||||
|
|
|
@ -19,11 +19,9 @@
|
||||||
|
|
||||||
#define PUSH_NONVOLATILE \
|
#define PUSH_NONVOLATILE \
|
||||||
push %ebx ; \
|
push %ebx ; \
|
||||||
push %ebp ; \
|
|
||||||
push %ebp
|
push %ebp
|
||||||
|
|
||||||
#define POP_NONVOLATILE \
|
#define POP_NONVOLATILE \
|
||||||
pop %ebp ; \
|
|
||||||
pop %ebp ; \
|
pop %ebp ; \
|
||||||
pop %ebx
|
pop %ebx
|
||||||
|
|
||||||
|
|
|
@ -27,11 +27,9 @@
|
||||||
push %rdi ; \
|
push %rdi ; \
|
||||||
push %rsi ; \
|
push %rsi ; \
|
||||||
push %rbx ; \
|
push %rbx ; \
|
||||||
push %rbp ; \
|
|
||||||
push %rbp
|
push %rbp
|
||||||
|
|
||||||
#define POP_NONVOLATILE \
|
#define POP_NONVOLATILE \
|
||||||
pop %rbp ; \
|
|
||||||
pop %rbp ; \
|
pop %rbp ; \
|
||||||
pop %rbx ; \
|
pop %rbx ; \
|
||||||
pop %rsi ; \
|
pop %rsi ; \
|
||||||
|
@ -50,11 +48,9 @@
|
||||||
push %rbx ; \
|
push %rbx ; \
|
||||||
push %rbp ; \
|
push %rbp ; \
|
||||||
push %r12 ; \
|
push %r12 ; \
|
||||||
push %r13 ; \
|
|
||||||
push %r13
|
push %r13
|
||||||
|
|
||||||
#define POP_NONVOLATILE \
|
#define POP_NONVOLATILE \
|
||||||
pop %r13 ; \
|
|
||||||
pop %r13 ; \
|
pop %r13 ; \
|
||||||
pop %r12 ; \
|
pop %r12 ; \
|
||||||
pop %rbp ; \
|
pop %rbp ; \
|
||||||
|
|
15
vm/cpu-x86.S
15
vm/cpu-x86.S
|
@ -43,14 +43,20 @@ DEF(F_FASTCALL void,c_to_factor,(CELL quot, void *vm)):
|
||||||
PUSH_NONVOLATILE
|
PUSH_NONVOLATILE
|
||||||
mov ARG0,NV0
|
mov ARG0,NV0
|
||||||
mov ARG1,NV1
|
mov ARG1,NV1
|
||||||
|
|
||||||
|
/* Save old stack pointer and align */
|
||||||
|
mov STACK_REG,ARG0
|
||||||
|
and $-16,STACK_REG
|
||||||
|
add $CELL_SIZE,STACK_REG
|
||||||
|
push ARG0
|
||||||
|
|
||||||
/* Create register shadow area for Win64 */
|
/* Create register shadow area for Win64 */
|
||||||
sub $32,STACK_REG
|
sub $32,STACK_REG
|
||||||
|
|
||||||
/* Save stack pointer */
|
/* Save stack pointer */
|
||||||
lea -CELL_SIZE(STACK_REG),ARG0
|
lea -CELL_SIZE(STACK_REG),ARG0
|
||||||
call MANGLE(save_callstack_bottom)
|
call MANGLE(save_callstack_bottom)
|
||||||
|
|
||||||
/* Call quot-xt */
|
/* Call quot-xt */
|
||||||
mov NV0,ARG0
|
mov NV0,ARG0
|
||||||
mov NV1,ARG1
|
mov NV1,ARG1
|
||||||
|
@ -59,6 +65,9 @@ DEF(F_FASTCALL void,c_to_factor,(CELL quot, void *vm)):
|
||||||
/* Tear down register shadow area */
|
/* Tear down register shadow area */
|
||||||
add $32,STACK_REG
|
add $32,STACK_REG
|
||||||
|
|
||||||
|
/* Undo stack alignment */
|
||||||
|
mov (STACK_REG),STACK_REG
|
||||||
|
|
||||||
POP_NONVOLATILE
|
POP_NONVOLATILE
|
||||||
ret
|
ret
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue