More work on PowerPC backend; change register usage, free up some more integer and float regs (untested)

db4
Slava Pestov 2008-11-05 04:20:35 -06:00
parent 7aa530c64e
commit 2239f4fb99
4 changed files with 256 additions and 176 deletions

View File

@ -11,8 +11,8 @@ big-endian on
4 jit-code-format set
: ds-reg 14 ;
: rs-reg 15 ;
: ds-reg 30 ;
: rs-reg 31 ;
: factor-area-size ( -- n ) 4 bootstrap-cells ;

View File

@ -1,13 +1,30 @@
! Copyright (C) 2005, 2008 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
IN: cpu.ppc.architecture
USING: alien.c-types
accessors
cpu.architecture
compiler.cfg.registers
cpu.ppc.assembler
kernel
locals
layouts
combinators
make
compiler.cfg.instructions
math.order
system
math
compiler.constants
namespaces compiler.codegen.fixup ;
IN: cpu.ppc
! PowerPC register assignments
! r3-r11, r16-r31: integer vregs
! f0-f13: float vregs
! r12: scratch
! r14: data stack
! r15: retain stack
! PowerPC register assignments:
! r2-r28: integer vregs
! r29: integer scratch
! r30: data stack
! r31: retain stack
! f0-f29: float vregs
! f30, f31: float scratch
<< {
{ [ os macosx? ] [
@ -23,13 +40,15 @@ IN: cpu.ppc.architecture
M: ppc machine-registers
{
{ int-regs { 3 4 5 6 7 8 9 10 11 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 } }
{ double-float-regs { 0 1 2 3 4 5 6 7 8 9 10 11 12 13 } }
{ int-regs T{ range f 2 27 1 } }
{ double-float-regs T{ range f 0 28 1 } }
} ;
: scratch-reg 12 ; inline
: scratch-reg 29 ; inline
: fp-scratch-reg-1 30 ; inline
: fp-scratch-reg-2 31 ; inline
M: ppc two-operand? t ;
M: ppc two-operand? f ;
M: ppc %load-immediate ( reg n -- ) swap LOAD ;
@ -38,8 +57,8 @@ M:: ppc %load-indirect ( reg obj -- )
obj rc-absolute-ppc-2/2 rel-literal
reg reg 0 LWZ ;
: ds-reg 14 ; inline
: rs-reg 15 ; inline
: ds-reg 30 ; inline
: rs-reg 31 ; inline
GENERIC: loc-reg ( loc -- reg )
@ -83,15 +102,14 @@ M: ppc %inc-r ( n -- ) rs-reg (%inc) ;
: xt-save ( n -- i ) 2 cells - ;
M: ppc stack-frame-size ( stack-frame -- i )
local@ factor-area-size + 4 cells align ;
! M: x86 stack-frame-size ( stack-frame -- i )
! [ spill-counts>> [ swap reg-size * ] { } assoc>map sum ]
! [ params>> ]
! [ return>> ]
! tri + +
! 3 cells +
! align-stack ;
[ spill-counts>> [ swap reg-size * ] { } assoc>map sum ]
[ params>> ]
[ return>> ]
tri + +
reserved-area-size +
param-save-size +
factor-area-size +
4 cells align ;
M: ppc %call ( label -- ) BL ;
M: ppc %jump-label ( label -- ) B ;
@ -136,62 +154,57 @@ M: ppc %shr-imm swapd SRWI ;
M: ppc %sar-imm SRAWI ;
M: ppc %not NOT ;
: bignum@ ( n -- offset ) cells bignum tag-number - ; inline
M: ppc %integer>bignum ( dst src temp -- )
[
{ "end" "non-zero" "pos" "store" } [ define-label ] each
! is it zero?
0 over v>operand 0 CMPI
"non-zero" get BNE
dup 0 >bignum %load-literal
"end" get B
! it is non-zero
"non-zero" resolve-label
1 bignum over 3 + cells %allot
1+ v>operand 12 LI ! compute the length
12 11 cell STW ! store the length
! is the fixnum negative?
0 over v>operand 0 CMPI
"pos" get BGE
1 12 LI
! store negative sign
12 11 2 cells STW
! negate fixnum
dup v>operand dup -1 MULI
"store" get B
"pos" resolve-label
0 12 LI
! store positive sign
12 11 2 cells STW
"store" resolve-label
! store the number
dup v>operand 11 3 cells STW
! tag the bignum, store it in reg
bignum %store-tagged
dst 0 >bignum %load-immediate
! Is it zero? Then just go to the end and return this zero
0 src 0 CMPI
"end" get BEQ
! Allocate a bignum
dst 4 cells bignum temp %allot
! Write length
2 temp LI
dst 1 bignum@ temp STW
! Store value
dst 3 bignum@ src STW
! Compute sign
temp src MR
temp cell-bits 1- SRAWI
temp temp 1 ANDI
! Store sign
dst 2 bignum@ temp STW
! Make negative value positive
temp temp temp ADD
temp temp NEG
temp temp 1 ADDI
temp src temp MULLW
! Store the bignum
dst 3 bignum@ temp STW
"end" resolve-label
] with-scope ;
: bignum@ ( n -- offset ) cells bignum tag-number - ; inline
M:: %bignum>integer ( dst src -- )
M:: %bignum>integer ( dst src temp -- )
[
"end" define-label
scratch-reg src 1 bignum@ LWZ
temp src 1 bignum@ LWZ
! if the length is 1, its just the sign and nothing else,
! so output 0
0 dst LI
0 scratch-reg 1 v>operand CMPI
0 temp 1 v>operand CMPI
"end" get BEQ
! load the value
dst src 3 bignum@ LWZ
! load the sign
scratch-reg src 2 bignum@ LWZ
temp src 2 bignum@ LWZ
! branchless arithmetic: we want to turn 0 into 1,
! and 1 into -1
scratch-reg scratch-reg 1 SLWI
scratch-reg scratch-reg NEG
scratch-reg scratch-reg 1 ADDI
temp temp temp ADD
temp temp 1 SUBI
! multiply value by sign
dst dst scratch-reg MULLW
dst dst temp MULLW
"end" resolve-label
] with-scope ;
@ -200,20 +213,21 @@ M: ppc %sub-float FSUB ;
M: ppc %mul-float FMUL ;
M: ppc %div-float FDIV ;
M: ppc %integer>float
HEX: 4330 "scratch" operand LIS
"scratch" operand 1 0 param@ STW
"in" operand dup HEX: 8000 XORIS
"in" operand 1 cell param@ STW
"f1" operand 1 0 param@ LFD
4503601774854144.0 "in" operand load-indirect
"f2" operand "in" operand float-offset LFD
"f1" operand "f1" operand "f2" operand FSUB ;
M: ppc %integer>float ( dst src -- )
HEX: 4330 scratch-reg LIS
scratch-reg 1 0 param@ STW
scratch-reg src MR
scratch-reg dup HEX: 8000 XORIS
scratch-reg 1 cell param@ STW
fp-scratch-reg-2 1 0 param@ LFD
4503601774854144.0 scratch-reg load-indirect
fp-scratch-reg-2 scratch-reg float-offset LFD
fp-scratch-reg-2 fp-scratch-reg-2 fp-scratch-reg-2 FSUB ;
M: ppc %float>integer
"scratch" operand "in" operand FCTIWZ
"scratch" operand 1 0 param@ STFD
"out" operand 1 cell param@ LWZ ;
M:: ppc %float>integer ( dst src -- )
fp-scratch-reg-1 src FCTIWZ
fp-scratch-reg-2 1 0 param@ STFD
dst 1 4 param@ LWZ ;
M: ppc %copy ( dst src -- ) MR ;
@ -407,11 +421,32 @@ M: ppc %compare-branch (%compare) %branch ;
M: ppc %compare-imm-branch (%compare-imm) %branch ;
M: ppc %compare-float-branch (%compare-float) %branch ;
! M: ppc %spill-integer ( src n -- ) spill-integer@ swap MOV ;
! M: ppc %reload-integer ( dst n -- ) spill-integer@ MOV ;
!
! M: ppc %spill-float ( src n -- ) spill-float@ swap MOVSD ;
! M: ppc %reload-float ( dst n -- ) spill-float@ MOVSD ;
: spill-integer-base ( stack-frame -- n )
[ params>> ] [ return>> ] bi + ;
: stack@ 1 swap ; inline
: spill-integer@ ( n -- op )
cells
stack-frame get spill-integer-base
+ stack@ ;
: spill-float-base ( stack-frame -- n )
[ spill-counts>> int-regs swap at int-regs reg-size * ]
[ params>> ]
[ return>> ]
tri + + ;
: spill-float@ ( n -- op )
double-float-regs reg-size *
stack-frame get spill-float-base
+ stack@ ;
M: ppc %spill-integer ( src n -- ) spill-integer@ STW ;
M: ppc %reload-integer ( dst n -- ) spill-integer@ LWZ ;
M: ppc %spill-float ( src n -- ) spill-float@ STFD ;
M: ppc %reload-float ( dst n -- ) spill-float@ LFD ;
M: ppc %loop-entry ;

View File

@ -4,30 +4,32 @@ in the public domain. */
/* Note that the XT is passed to the quotation in r11 */
#define CALL_OR_JUMP_QUOT \
lwz r11,9(r3) /* load quotation-xt slot */ XX \
lwz r11,9(r3) /* load quotation-xt slot */ XX \
#define CALL_QUOT \
CALL_OR_JUMP_QUOT XX \
mtlr r11 /* prepare to call XT with quotation in r3 */ XX \
blrl /* go */
CALL_OR_JUMP_QUOT XX \
mtlr r11 /* prepare to call XT with quotation in r3 */ XX \
blrl /* go */
#define JUMP_QUOT \
CALL_OR_JUMP_QUOT XX \
mtctr r11 /* prepare to call XT with quotation in r3 */ XX \
bctr /* go */
CALL_OR_JUMP_QUOT XX \
mtctr r11 /* prepare to call XT with quotation in r3 */ XX \
bctr /* go */
#define PARAM_SIZE 32
#define SAVED_REGS_SIZE 96
#define SAVED_INT_REGS_SIZE 96
#define FRAME (RESERVED_SIZE + PARAM_SIZE + SAVED_REGS_SIZE + 8)
#define SAVED_FP_REGS_SIZE 144
#define FRAME (RESERVED_SIZE + PARAM_SIZE + SAVED_INT_REGS_SIZE + SAVED_FP_REGS_SIZE + 8)
#if defined( __APPLE__)
#define LR_SAVE 8
#define RESERVED_SIZE 24
#define LR_SAVE 8
#define RESERVED_SIZE 24
#else
#define LR_SAVE 4
#define RESERVED_SIZE 8
#define LR_SAVE 4
#define RESERVED_SIZE 8
#endif
#define SAVE_LR(reg) stw reg,(LR_SAVE + FRAME)(r1)
@ -36,99 +38,142 @@ in the public domain. */
#define SAVE_AT(offset) (RESERVED_SIZE + PARAM_SIZE + 4 * offset)
#define SAVE(register,offset) stw register,SAVE_AT(offset)(r1)
#define SAVE_INT(register,offset) stw register,SAVE_AT(offset)(r1)
#define RESTORE_INT(register,offset) lwz register,SAVE_AT(offset)(r1)
#define RESTORE(register,offset) lwz register,SAVE_AT(offset)(r1)
#define SAVE_FP(register,offset) stfd register,SAVE_AT(offset)(r1)
#define RESTORE_FP(register,offset) lfd register,SAVE_AT(offset)(r1)
#define PROLOGUE \
mflr r0 XX /* get caller's return address */ \
stwu r1,-FRAME(r1) XX /* create a stack frame to hold non-volatile registers */ \
SAVE_LR(r0)
mflr r0 XX /* get caller's return address */ \
stwu r1,-FRAME(r1) XX /* create a stack frame to hold non-volatile registers */ \
SAVE_LR(r0)
#define EPILOGUE \
LOAD_LR(r0) XX \
lwz r1,0(r1) XX /* destroy the stack frame */ \
mtlr r0 /* get ready to return */
lwz r1,0(r1) XX /* destroy the stack frame */ \
mtlr r0 /* get ready to return */
/* We have to save and restore nonvolatile registers because
the Factor compiler treats the entire register file as volatile. */
DEF(void,c_to_factor,(CELL quot)):
PROLOGUE
PROLOGUE
SAVE(r13,0) /* save GPRs */
/* don't save ds pointer */
/* don't save rs pointer */
SAVE(r16,3)
SAVE(r17,4)
SAVE(r18,5)
SAVE(r19,6)
SAVE(r20,7)
SAVE(r21,8)
SAVE(r22,9)
SAVE(r23,10)
SAVE(r24,11)
SAVE(r25,12)
SAVE(r26,13)
SAVE(r27,14)
SAVE(r28,15)
SAVE(r29,16)
SAVE(r30,17)
SAVE(r31,18)
SAVE(r3,19) /* save quotation since we're about to mangle it */
SAVE_INT(r13,0) /* save GPRs */
/* don't save ds pointer */
/* don't save rs pointer */
SAVE_INT(r16,3)
SAVE_INT(r17,4)
SAVE_INT(r18,5)
SAVE_INT(r19,6)
SAVE_INT(r20,7)
SAVE_INT(r21,8)
SAVE_INT(r22,9)
SAVE_INT(r23,10)
SAVE_INT(r24,11)
SAVE_INT(r25,12)
SAVE_INT(r26,13)
SAVE_INT(r27,14)
SAVE_INT(r28,15)
SAVE_INT(r29,16)
SAVE_INT(r30,17)
SAVE_INT(r31,18)
mr r3,r1 /* pass call stack pointer as an argument */
SAVE_FP(f14,20) /* save FPRs */
SAVE_FP(f15,22)
SAVE_FP(f16,24)
SAVE_FP(f17,26)
SAVE_FP(f18,28)
SAVE_FP(f19,30)
SAVE_FP(f20,32)
SAVE_FP(f21,34)
SAVE_FP(f22,36)
SAVE_FP(f23,38)
SAVE_FP(f24,40)
SAVE_FP(f25,42)
SAVE_FP(f26,44)
SAVE_FP(f27,46)
SAVE_FP(f28,48)
SAVE_FP(f29,50)
SAVE_FP(f30,52)
SAVE_FP(f31,54)
SAVE_INT(r3,19) /* save quotation since we're about to mangle it */
mr r3,r1 /* pass call stack pointer as an argument */
bl MANGLE(save_callstack_bottom)
RESTORE(r3,19) /* restore quotation */
CALL_QUOT
RESTORE_INT(r3,19) /* restore quotation */
CALL_QUOT
RESTORE(r31,18) /* restore GPRs */
RESTORE(r30,17)
RESTORE(r29,16)
RESTORE(r28,15)
RESTORE(r27,14)
RESTORE(r26,13)
RESTORE(r25,12)
RESTORE(r24,11)
RESTORE(r23,10)
RESTORE(r22,9)
RESTORE(r21,8)
RESTORE(r20,7)
RESTORE(r19,6)
RESTORE(r18,5)
RESTORE(r17,4)
RESTORE(r16,3)
/* don't restore rs pointer */
/* don't restore ds pointer */
RESTORE(r13,0)
RESTORE_FP(f31,54)
RESTORE_FP(f30,52)
RESTORE_FP(f29,50)
RESTORE_FP(f28,48)
RESTORE_FP(f27,46)
RESTORE_FP(f26,44)
RESTORE_FP(f25,42)
RESTORE_FP(f24,40)
RESTORE_FP(f23,38)
RESTORE_FP(f22,36)
RESTORE_FP(f21,34)
RESTORE_FP(f20,32)
RESTORE_FP(f19,30)
RESTORE_FP(f18,28)
RESTORE_FP(f17,26)
RESTORE_FP(f16,24)
RESTORE_FP(f15,22)
RESTORE_FP(f14,20) /* save FPRs */
EPILOGUE
blr
RESTORE_INT(r31,18) /* restore GPRs */
RESTORE_INT(r30,17)
RESTORE_INT(r29,16)
RESTORE_INT(r28,15)
RESTORE_INT(r27,14)
RESTORE_INT(r26,13)
RESTORE_INT(r25,12)
RESTORE_INT(r24,11)
RESTORE_INT(r23,10)
RESTORE_INT(r22,9)
RESTORE_INT(r21,8)
RESTORE_INT(r20,7)
RESTORE_INT(r19,6)
RESTORE_INT(r18,5)
RESTORE_INT(r17,4)
RESTORE_INT(r16,3)
/* don't restore rs pointer */
/* don't restore ds pointer */
RESTORE_INT(r13,0)
EPILOGUE
blr
/* We pass a function pointer to memcpy in r6 to work around a Mac OS X ABI
limitation which would otherwise require us to do a bizzaro PC-relative
trampoline to retrieve the function address */
DEF(void,set_callstack,(F_STACK_FRAME *to, F_STACK_FRAME *from, CELL length, void *memcpy)):
sub r1,r3,r5 /* compute new stack pointer */
mr r3,r1 /* start of destination of memcpy() */
stwu r1,-64(r1) /* setup fake stack frame for memcpy() */
mtlr r6 /* prepare to call memcpy() */
blrl /* go */
lwz r1,0(r1) /* tear down fake stack frame */
lwz r0,LR_SAVE(r1) /* we have restored the stack; load return address */
mtlr r0 /* prepare to return to restored callstack */
blr /* go */
sub r1,r3,r5 /* compute new stack pointer */
mr r3,r1 /* start of destination of memcpy() */
stwu r1,-64(r1) /* setup fake stack frame for memcpy() */
mtlr r6 /* prepare to call memcpy() */
blrl /* go */
lwz r1,0(r1) /* tear down fake stack frame */
lwz r0,LR_SAVE(r1) /* we have restored the stack; load return address */
mtlr r0 /* prepare to return to restored callstack */
blr /* go */
DEF(void,throw_impl,(CELL quot, F_STACK_FRAME *rewind_to)):
mr r1,r4 /* compute new stack pointer */
mr r1,r4 /* compute new stack pointer */
lwz r0,LR_SAVE(r1) /* we have rewound the stack; load return address */
mtlr r0
JUMP_QUOT /* call the quotation */
JUMP_QUOT /* call the quotation */
DEF(void,lazy_jit_compile,(CELL quot)):
mr r4,r1 /* save stack pointer */
mr r4,r1 /* save stack pointer */
PROLOGUE
bl MANGLE(primitive_jit_compile)
EPILOGUE
JUMP_QUOT /* call the quotation */
JUMP_QUOT /* call the quotation */
/* Thanks to Joshua Grams for this code.
@ -136,19 +181,19 @@ On PowerPC processors, we must flush the instruction cache manually
after writing to the code heap. */
DEF(void,flush_icache,(void *start, int len)):
/* compute number of cache lines to flush */
add r4,r4,r3
clrrwi r3,r3,5 /* align addr to next lower cache line boundary */
sub r4,r4,r3 /* then n_lines = (len + 0x1f) / 0x20 */
addi r4,r4,0x1f
srwi. r4,r4,5 /* note '.' suffix */
beqlr /* if n_lines == 0, just return. */
mtctr r4 /* flush cache lines */
0: dcbf 0,r3 /* for each line... */
sync
icbi 0,r3
addi r3,r3,0x20
bdnz 0b
sync /* finish up */
isync
blr
/* compute number of cache lines to flush */
add r4,r4,r3
clrrwi r3,r3,5 /* align addr to next lower cache line boundary */
sub r4,r4,r3 /* then n_lines = (len + 0x1f) / 0x20 */
addi r4,r4,0x1f
srwi. r4,r4,5 /* note '.' suffix */
beqlr /* if n_lines == 0, just return. */
mtctr r4 /* flush cache lines */
0: dcbf 0,r3 /* for each line... */
sync
icbi 0,r3
addi r3,r3,0x20
bdnz 0b
sync /* finish up */
isync
blr

View File

@ -1,8 +1,8 @@
#define FACTOR_CPU_STRING "ppc"
#define F_FASTCALL
register CELL ds asm("r14");
register CELL rs asm("r15");
register CELL ds asm("r30");
register CELL rs asm("r31");
void c_to_factor(CELL quot);
void undefined(CELL word);