More work on PowerPC backend; change register usage, free up some more integer and float regs (untested)

db4
Slava Pestov 2008-11-05 04:20:35 -06:00
parent 7aa530c64e
commit 2239f4fb99
4 changed files with 256 additions and 176 deletions

View File

@ -11,8 +11,8 @@ big-endian on
4 jit-code-format set 4 jit-code-format set
: ds-reg 14 ; : ds-reg 30 ;
: rs-reg 15 ; : rs-reg 31 ;
: factor-area-size ( -- n ) 4 bootstrap-cells ; : factor-area-size ( -- n ) 4 bootstrap-cells ;

View File

@ -1,13 +1,30 @@
! Copyright (C) 2005, 2008 Slava Pestov. ! Copyright (C) 2005, 2008 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license. ! See http://factorcode.org/license.txt for BSD license.
IN: cpu.ppc.architecture USING: alien.c-types
accessors
cpu.architecture
compiler.cfg.registers
cpu.ppc.assembler
kernel
locals
layouts
combinators
make
compiler.cfg.instructions
math.order
system
math
compiler.constants
namespaces compiler.codegen.fixup ;
IN: cpu.ppc
! PowerPC register assignments ! PowerPC register assignments:
! r3-r11, r16-r31: integer vregs ! r2-r28: integer vregs
! f0-f13: float vregs ! r29: integer scratch
! r12: scratch ! r30: data stack
! r14: data stack ! r31: retain stack
! r15: retain stack ! f0-f29: float vregs
! f30, f31: float scratch
<< { << {
{ [ os macosx? ] [ { [ os macosx? ] [
@ -23,13 +40,15 @@ IN: cpu.ppc.architecture
M: ppc machine-registers M: ppc machine-registers
{ {
{ int-regs { 3 4 5 6 7 8 9 10 11 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 } } { int-regs T{ range f 2 27 1 } }
{ double-float-regs { 0 1 2 3 4 5 6 7 8 9 10 11 12 13 } } { double-float-regs T{ range f 0 28 1 } }
} ; } ;
: scratch-reg 12 ; inline : scratch-reg 29 ; inline
: fp-scratch-reg-1 30 ; inline
: fp-scratch-reg-2 31 ; inline
M: ppc two-operand? t ; M: ppc two-operand? f ;
M: ppc %load-immediate ( reg n -- ) swap LOAD ; M: ppc %load-immediate ( reg n -- ) swap LOAD ;
@ -38,8 +57,8 @@ M:: ppc %load-indirect ( reg obj -- )
obj rc-absolute-ppc-2/2 rel-literal obj rc-absolute-ppc-2/2 rel-literal
reg reg 0 LWZ ; reg reg 0 LWZ ;
: ds-reg 14 ; inline : ds-reg 30 ; inline
: rs-reg 15 ; inline : rs-reg 31 ; inline
GENERIC: loc-reg ( loc -- reg ) GENERIC: loc-reg ( loc -- reg )
@ -83,15 +102,14 @@ M: ppc %inc-r ( n -- ) rs-reg (%inc) ;
: xt-save ( n -- i ) 2 cells - ; : xt-save ( n -- i ) 2 cells - ;
M: ppc stack-frame-size ( stack-frame -- i ) M: ppc stack-frame-size ( stack-frame -- i )
local@ factor-area-size + 4 cells align ; [ spill-counts>> [ swap reg-size * ] { } assoc>map sum ]
[ params>> ]
! M: x86 stack-frame-size ( stack-frame -- i ) [ return>> ]
! [ spill-counts>> [ swap reg-size * ] { } assoc>map sum ] tri + +
! [ params>> ] reserved-area-size +
! [ return>> ] param-save-size +
! tri + + factor-area-size +
! 3 cells + 4 cells align ;
! align-stack ;
M: ppc %call ( label -- ) BL ; M: ppc %call ( label -- ) BL ;
M: ppc %jump-label ( label -- ) B ; M: ppc %jump-label ( label -- ) B ;
@ -136,62 +154,57 @@ M: ppc %shr-imm swapd SRWI ;
M: ppc %sar-imm SRAWI ; M: ppc %sar-imm SRAWI ;
M: ppc %not NOT ; M: ppc %not NOT ;
: bignum@ ( n -- offset ) cells bignum tag-number - ; inline
M: ppc %integer>bignum ( dst src temp -- ) M: ppc %integer>bignum ( dst src temp -- )
[ [
{ "end" "non-zero" "pos" "store" } [ define-label ] each { "end" "non-zero" "pos" "store" } [ define-label ] each
! is it zero? dst 0 >bignum %load-immediate
0 over v>operand 0 CMPI ! Is it zero? Then just go to the end and return this zero
"non-zero" get BNE 0 src 0 CMPI
dup 0 >bignum %load-literal "end" get BEQ
"end" get B ! Allocate a bignum
! it is non-zero dst 4 cells bignum temp %allot
"non-zero" resolve-label ! Write length
1 bignum over 3 + cells %allot 2 temp LI
1+ v>operand 12 LI ! compute the length dst 1 bignum@ temp STW
12 11 cell STW ! store the length ! Store value
! is the fixnum negative? dst 3 bignum@ src STW
0 over v>operand 0 CMPI ! Compute sign
"pos" get BGE temp src MR
1 12 LI temp cell-bits 1- SRAWI
! store negative sign temp temp 1 ANDI
12 11 2 cells STW ! Store sign
! negate fixnum dst 2 bignum@ temp STW
dup v>operand dup -1 MULI ! Make negative value positive
"store" get B temp temp temp ADD
"pos" resolve-label temp temp NEG
0 12 LI temp temp 1 ADDI
! store positive sign temp src temp MULLW
12 11 2 cells STW ! Store the bignum
"store" resolve-label dst 3 bignum@ temp STW
! store the number
dup v>operand 11 3 cells STW
! tag the bignum, store it in reg
bignum %store-tagged
"end" resolve-label "end" resolve-label
] with-scope ; ] with-scope ;
: bignum@ ( n -- offset ) cells bignum tag-number - ; inline M:: %bignum>integer ( dst src temp -- )
M:: %bignum>integer ( dst src -- )
[ [
"end" define-label "end" define-label
scratch-reg src 1 bignum@ LWZ temp src 1 bignum@ LWZ
! if the length is 1, its just the sign and nothing else, ! if the length is 1, its just the sign and nothing else,
! so output 0 ! so output 0
0 dst LI 0 dst LI
0 scratch-reg 1 v>operand CMPI 0 temp 1 v>operand CMPI
"end" get BEQ "end" get BEQ
! load the value ! load the value
dst src 3 bignum@ LWZ dst src 3 bignum@ LWZ
! load the sign ! load the sign
scratch-reg src 2 bignum@ LWZ temp src 2 bignum@ LWZ
! branchless arithmetic: we want to turn 0 into 1, ! branchless arithmetic: we want to turn 0 into 1,
! and 1 into -1 ! and 1 into -1
scratch-reg scratch-reg 1 SLWI temp temp temp ADD
scratch-reg scratch-reg NEG temp temp 1 SUBI
scratch-reg scratch-reg 1 ADDI
! multiply value by sign ! multiply value by sign
dst dst scratch-reg MULLW dst dst temp MULLW
"end" resolve-label "end" resolve-label
] with-scope ; ] with-scope ;
@ -200,20 +213,21 @@ M: ppc %sub-float FSUB ;
M: ppc %mul-float FMUL ; M: ppc %mul-float FMUL ;
M: ppc %div-float FDIV ; M: ppc %div-float FDIV ;
M: ppc %integer>float M: ppc %integer>float ( dst src -- )
HEX: 4330 "scratch" operand LIS HEX: 4330 scratch-reg LIS
"scratch" operand 1 0 param@ STW scratch-reg 1 0 param@ STW
"in" operand dup HEX: 8000 XORIS scratch-reg src MR
"in" operand 1 cell param@ STW scratch-reg dup HEX: 8000 XORIS
"f1" operand 1 0 param@ LFD scratch-reg 1 cell param@ STW
4503601774854144.0 "in" operand load-indirect fp-scratch-reg-2 1 0 param@ LFD
"f2" operand "in" operand float-offset LFD 4503601774854144.0 scratch-reg load-indirect
"f1" operand "f1" operand "f2" operand FSUB ; fp-scratch-reg-2 scratch-reg float-offset LFD
fp-scratch-reg-2 fp-scratch-reg-2 fp-scratch-reg-2 FSUB ;
M: ppc %float>integer M:: ppc %float>integer ( dst src -- )
"scratch" operand "in" operand FCTIWZ fp-scratch-reg-1 src FCTIWZ
"scratch" operand 1 0 param@ STFD fp-scratch-reg-2 1 0 param@ STFD
"out" operand 1 cell param@ LWZ ; dst 1 4 param@ LWZ ;
M: ppc %copy ( dst src -- ) MR ; M: ppc %copy ( dst src -- ) MR ;
@ -407,11 +421,32 @@ M: ppc %compare-branch (%compare) %branch ;
M: ppc %compare-imm-branch (%compare-imm) %branch ; M: ppc %compare-imm-branch (%compare-imm) %branch ;
M: ppc %compare-float-branch (%compare-float) %branch ; M: ppc %compare-float-branch (%compare-float) %branch ;
! M: ppc %spill-integer ( src n -- ) spill-integer@ swap MOV ; : spill-integer-base ( stack-frame -- n )
! M: ppc %reload-integer ( dst n -- ) spill-integer@ MOV ; [ params>> ] [ return>> ] bi + ;
!
! M: ppc %spill-float ( src n -- ) spill-float@ swap MOVSD ; : stack@ 1 swap ; inline
! M: ppc %reload-float ( dst n -- ) spill-float@ MOVSD ;
: spill-integer@ ( n -- op )
cells
stack-frame get spill-integer-base
+ stack@ ;
: spill-float-base ( stack-frame -- n )
[ spill-counts>> int-regs swap at int-regs reg-size * ]
[ params>> ]
[ return>> ]
tri + + ;
: spill-float@ ( n -- op )
double-float-regs reg-size *
stack-frame get spill-float-base
+ stack@ ;
M: ppc %spill-integer ( src n -- ) spill-integer@ STW ;
M: ppc %reload-integer ( dst n -- ) spill-integer@ LWZ ;
M: ppc %spill-float ( src n -- ) spill-float@ STFD ;
M: ppc %reload-float ( dst n -- ) spill-float@ LFD ;
M: ppc %loop-entry ; M: ppc %loop-entry ;

View File

@ -4,30 +4,32 @@ in the public domain. */
/* Note that the XT is passed to the quotation in r11 */ /* Note that the XT is passed to the quotation in r11 */
#define CALL_OR_JUMP_QUOT \ #define CALL_OR_JUMP_QUOT \
lwz r11,9(r3) /* load quotation-xt slot */ XX \ lwz r11,9(r3) /* load quotation-xt slot */ XX \
#define CALL_QUOT \ #define CALL_QUOT \
CALL_OR_JUMP_QUOT XX \ CALL_OR_JUMP_QUOT XX \
mtlr r11 /* prepare to call XT with quotation in r3 */ XX \ mtlr r11 /* prepare to call XT with quotation in r3 */ XX \
blrl /* go */ blrl /* go */
#define JUMP_QUOT \ #define JUMP_QUOT \
CALL_OR_JUMP_QUOT XX \ CALL_OR_JUMP_QUOT XX \
mtctr r11 /* prepare to call XT with quotation in r3 */ XX \ mtctr r11 /* prepare to call XT with quotation in r3 */ XX \
bctr /* go */ bctr /* go */
#define PARAM_SIZE 32 #define PARAM_SIZE 32
#define SAVED_REGS_SIZE 96 #define SAVED_INT_REGS_SIZE 96
#define FRAME (RESERVED_SIZE + PARAM_SIZE + SAVED_REGS_SIZE + 8) #define SAVED_FP_REGS_SIZE 144
#define FRAME (RESERVED_SIZE + PARAM_SIZE + SAVED_INT_REGS_SIZE + SAVED_FP_REGS_SIZE + 8)
#if defined( __APPLE__) #if defined( __APPLE__)
#define LR_SAVE 8 #define LR_SAVE 8
#define RESERVED_SIZE 24 #define RESERVED_SIZE 24
#else #else
#define LR_SAVE 4 #define LR_SAVE 4
#define RESERVED_SIZE 8 #define RESERVED_SIZE 8
#endif #endif
#define SAVE_LR(reg) stw reg,(LR_SAVE + FRAME)(r1) #define SAVE_LR(reg) stw reg,(LR_SAVE + FRAME)(r1)
@ -36,99 +38,142 @@ in the public domain. */
#define SAVE_AT(offset) (RESERVED_SIZE + PARAM_SIZE + 4 * offset) #define SAVE_AT(offset) (RESERVED_SIZE + PARAM_SIZE + 4 * offset)
#define SAVE(register,offset) stw register,SAVE_AT(offset)(r1) #define SAVE_INT(register,offset) stw register,SAVE_AT(offset)(r1)
#define RESTORE_INT(register,offset) lwz register,SAVE_AT(offset)(r1)
#define RESTORE(register,offset) lwz register,SAVE_AT(offset)(r1) #define SAVE_FP(register,offset) stfd register,SAVE_AT(offset)(r1)
#define RESTORE_FP(register,offset) lfd register,SAVE_AT(offset)(r1)
#define PROLOGUE \ #define PROLOGUE \
mflr r0 XX /* get caller's return address */ \ mflr r0 XX /* get caller's return address */ \
stwu r1,-FRAME(r1) XX /* create a stack frame to hold non-volatile registers */ \ stwu r1,-FRAME(r1) XX /* create a stack frame to hold non-volatile registers */ \
SAVE_LR(r0) SAVE_LR(r0)
#define EPILOGUE \ #define EPILOGUE \
LOAD_LR(r0) XX \ LOAD_LR(r0) XX \
lwz r1,0(r1) XX /* destroy the stack frame */ \ lwz r1,0(r1) XX /* destroy the stack frame */ \
mtlr r0 /* get ready to return */ mtlr r0 /* get ready to return */
/* We have to save and restore nonvolatile registers because
the Factor compiler treats the entire register file as volatile. */
DEF(void,c_to_factor,(CELL quot)): DEF(void,c_to_factor,(CELL quot)):
PROLOGUE PROLOGUE
SAVE(r13,0) /* save GPRs */ SAVE_INT(r13,0) /* save GPRs */
/* don't save ds pointer */ /* don't save ds pointer */
/* don't save rs pointer */ /* don't save rs pointer */
SAVE(r16,3) SAVE_INT(r16,3)
SAVE(r17,4) SAVE_INT(r17,4)
SAVE(r18,5) SAVE_INT(r18,5)
SAVE(r19,6) SAVE_INT(r19,6)
SAVE(r20,7) SAVE_INT(r20,7)
SAVE(r21,8) SAVE_INT(r21,8)
SAVE(r22,9) SAVE_INT(r22,9)
SAVE(r23,10) SAVE_INT(r23,10)
SAVE(r24,11) SAVE_INT(r24,11)
SAVE(r25,12) SAVE_INT(r25,12)
SAVE(r26,13) SAVE_INT(r26,13)
SAVE(r27,14) SAVE_INT(r27,14)
SAVE(r28,15) SAVE_INT(r28,15)
SAVE(r29,16) SAVE_INT(r29,16)
SAVE(r30,17) SAVE_INT(r30,17)
SAVE(r31,18) SAVE_INT(r31,18)
SAVE(r3,19) /* save quotation since we're about to mangle it */
mr r3,r1 /* pass call stack pointer as an argument */ SAVE_FP(f14,20) /* save FPRs */
SAVE_FP(f15,22)
SAVE_FP(f16,24)
SAVE_FP(f17,26)
SAVE_FP(f18,28)
SAVE_FP(f19,30)
SAVE_FP(f20,32)
SAVE_FP(f21,34)
SAVE_FP(f22,36)
SAVE_FP(f23,38)
SAVE_FP(f24,40)
SAVE_FP(f25,42)
SAVE_FP(f26,44)
SAVE_FP(f27,46)
SAVE_FP(f28,48)
SAVE_FP(f29,50)
SAVE_FP(f30,52)
SAVE_FP(f31,54)
SAVE_INT(r3,19) /* save quotation since we're about to mangle it */
mr r3,r1 /* pass call stack pointer as an argument */
bl MANGLE(save_callstack_bottom) bl MANGLE(save_callstack_bottom)
RESTORE(r3,19) /* restore quotation */ RESTORE_INT(r3,19) /* restore quotation */
CALL_QUOT CALL_QUOT
RESTORE(r31,18) /* restore GPRs */ RESTORE_FP(f31,54)
RESTORE(r30,17) RESTORE_FP(f30,52)
RESTORE(r29,16) RESTORE_FP(f29,50)
RESTORE(r28,15) RESTORE_FP(f28,48)
RESTORE(r27,14) RESTORE_FP(f27,46)
RESTORE(r26,13) RESTORE_FP(f26,44)
RESTORE(r25,12) RESTORE_FP(f25,42)
RESTORE(r24,11) RESTORE_FP(f24,40)
RESTORE(r23,10) RESTORE_FP(f23,38)
RESTORE(r22,9) RESTORE_FP(f22,36)
RESTORE(r21,8) RESTORE_FP(f21,34)
RESTORE(r20,7) RESTORE_FP(f20,32)
RESTORE(r19,6) RESTORE_FP(f19,30)
RESTORE(r18,5) RESTORE_FP(f18,28)
RESTORE(r17,4) RESTORE_FP(f17,26)
RESTORE(r16,3) RESTORE_FP(f16,24)
/* don't restore rs pointer */ RESTORE_FP(f15,22)
/* don't restore ds pointer */ RESTORE_FP(f14,20) /* save FPRs */
RESTORE(r13,0)
EPILOGUE RESTORE_INT(r31,18) /* restore GPRs */
blr RESTORE_INT(r30,17)
RESTORE_INT(r29,16)
RESTORE_INT(r28,15)
RESTORE_INT(r27,14)
RESTORE_INT(r26,13)
RESTORE_INT(r25,12)
RESTORE_INT(r24,11)
RESTORE_INT(r23,10)
RESTORE_INT(r22,9)
RESTORE_INT(r21,8)
RESTORE_INT(r20,7)
RESTORE_INT(r19,6)
RESTORE_INT(r18,5)
RESTORE_INT(r17,4)
RESTORE_INT(r16,3)
/* don't restore rs pointer */
/* don't restore ds pointer */
RESTORE_INT(r13,0)
EPILOGUE
blr
/* We pass a function pointer to memcpy in r6 to work around a Mac OS X ABI /* We pass a function pointer to memcpy in r6 to work around a Mac OS X ABI
limitation which would otherwise require us to do a bizzaro PC-relative limitation which would otherwise require us to do a bizzaro PC-relative
trampoline to retrieve the function address */ trampoline to retrieve the function address */
DEF(void,set_callstack,(F_STACK_FRAME *to, F_STACK_FRAME *from, CELL length, void *memcpy)): DEF(void,set_callstack,(F_STACK_FRAME *to, F_STACK_FRAME *from, CELL length, void *memcpy)):
sub r1,r3,r5 /* compute new stack pointer */ sub r1,r3,r5 /* compute new stack pointer */
mr r3,r1 /* start of destination of memcpy() */ mr r3,r1 /* start of destination of memcpy() */
stwu r1,-64(r1) /* setup fake stack frame for memcpy() */ stwu r1,-64(r1) /* setup fake stack frame for memcpy() */
mtlr r6 /* prepare to call memcpy() */ mtlr r6 /* prepare to call memcpy() */
blrl /* go */ blrl /* go */
lwz r1,0(r1) /* tear down fake stack frame */ lwz r1,0(r1) /* tear down fake stack frame */
lwz r0,LR_SAVE(r1) /* we have restored the stack; load return address */ lwz r0,LR_SAVE(r1) /* we have restored the stack; load return address */
mtlr r0 /* prepare to return to restored callstack */ mtlr r0 /* prepare to return to restored callstack */
blr /* go */ blr /* go */
DEF(void,throw_impl,(CELL quot, F_STACK_FRAME *rewind_to)): DEF(void,throw_impl,(CELL quot, F_STACK_FRAME *rewind_to)):
mr r1,r4 /* compute new stack pointer */ mr r1,r4 /* compute new stack pointer */
lwz r0,LR_SAVE(r1) /* we have rewound the stack; load return address */ lwz r0,LR_SAVE(r1) /* we have rewound the stack; load return address */
mtlr r0 mtlr r0
JUMP_QUOT /* call the quotation */ JUMP_QUOT /* call the quotation */
DEF(void,lazy_jit_compile,(CELL quot)): DEF(void,lazy_jit_compile,(CELL quot)):
mr r4,r1 /* save stack pointer */ mr r4,r1 /* save stack pointer */
PROLOGUE PROLOGUE
bl MANGLE(primitive_jit_compile) bl MANGLE(primitive_jit_compile)
EPILOGUE EPILOGUE
JUMP_QUOT /* call the quotation */ JUMP_QUOT /* call the quotation */
/* Thanks to Joshua Grams for this code. /* Thanks to Joshua Grams for this code.
@ -136,19 +181,19 @@ On PowerPC processors, we must flush the instruction cache manually
after writing to the code heap. */ after writing to the code heap. */
DEF(void,flush_icache,(void *start, int len)): DEF(void,flush_icache,(void *start, int len)):
/* compute number of cache lines to flush */ /* compute number of cache lines to flush */
add r4,r4,r3 add r4,r4,r3
clrrwi r3,r3,5 /* align addr to next lower cache line boundary */ clrrwi r3,r3,5 /* align addr to next lower cache line boundary */
sub r4,r4,r3 /* then n_lines = (len + 0x1f) / 0x20 */ sub r4,r4,r3 /* then n_lines = (len + 0x1f) / 0x20 */
addi r4,r4,0x1f addi r4,r4,0x1f
srwi. r4,r4,5 /* note '.' suffix */ srwi. r4,r4,5 /* note '.' suffix */
beqlr /* if n_lines == 0, just return. */ beqlr /* if n_lines == 0, just return. */
mtctr r4 /* flush cache lines */ mtctr r4 /* flush cache lines */
0: dcbf 0,r3 /* for each line... */ 0: dcbf 0,r3 /* for each line... */
sync sync
icbi 0,r3 icbi 0,r3
addi r3,r3,0x20 addi r3,r3,0x20
bdnz 0b bdnz 0b
sync /* finish up */ sync /* finish up */
isync isync
blr blr

View File

@ -1,8 +1,8 @@
#define FACTOR_CPU_STRING "ppc" #define FACTOR_CPU_STRING "ppc"
#define F_FASTCALL #define F_FASTCALL
register CELL ds asm("r14"); register CELL ds asm("r30");
register CELL rs asm("r15"); register CELL rs asm("r31");
void c_to_factor(CELL quot); void c_to_factor(CELL quot);
void undefined(CELL word); void undefined(CELL word);