Starting to update PowerPC backend for recent VM changes (untested)

db4
Slava Pestov 2010-01-08 19:46:27 +13:00
parent 0fd3c78157
commit 6266b41325
6 changed files with 213 additions and 381 deletions

View File

@ -212,6 +212,9 @@ vm/ffi_test.o: vm/ffi_test.c
.cpp.o:
$(TOOLCHAIN_PREFIX)$(CPP) -c $(CFLAGS) -o $@ $<
.S.o:
$(TOOLCHAIN_PREFIX)$(CC) -x assembler-with-cpp -c $(CFLAGS) -o $@ $<
.mm.o:
$(TOOLCHAIN_PREFIX)$(CPP) -c $(CFLAGS) -o $@ $<

View File

@ -1,9 +1,10 @@
! Copyright (C) 2007, 2009 Slava Pestov.
! Copyright (C) 2007, 2010 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: bootstrap.image.private kernel kernel.private namespaces
system cpu.ppc.assembler compiler.codegen.fixup compiler.units
compiler.constants math math.private layouts words vocabs
slots.private locals locals.backend generic.single.private fry ;
compiler.constants math math.private math.ranges layouts words vocabs
slots.private locals locals.backend generic.single.private fry
sequences ;
FROM: cpu.ppc.assembler => B ;
IN: bootstrap.ppc
@ -13,28 +14,88 @@ big-endian on
CONSTANT: ds-reg 13
CONSTANT: rs-reg 14
CONSTANT: vm-reg 15
CONSTANT: ctx-reg 16
: factor-area-size ( -- n ) 4 bootstrap-cells ;
: factor-area-size ( -- n ) 16 ;
: stack-frame ( -- n )
factor-area-size c-area-size + 4 bootstrap-cells align ;
reserved-size
factor-area-size +
16 align ;
: next-save ( -- n ) stack-frame bootstrap-cell - ;
: xt-save ( -- n ) stack-frame 2 bootstrap-cells - ;
: next-save ( -- n ) stack-frame 4 - ;
: xt-save ( -- n ) stack-frame 8 - ;
: param-size ( -- n ) 32 ;
: save-at ( m -- n ) reserved-size + param-size + ;
: save-int ( register offset -- ) [ 1 ] dip save-at STW ;
: restore-int ( register offset -- ) [ 1 ] dip save-at LWZ ;
: save-fp ( register offset -- ) [ 1 ] dip save-at STFD ;
: restore-fp ( register offset -- ) [ 1 ] dip save-at LFD ;
: save-vec ( register offset -- ) save-at 2 LI 2 1 STVXL ;
: restore-vec ( register offset -- ) save-at 2 LI 2 1 LVXL ;
: nv-int-regs ( -- seq ) 13 31 [a,b] ;
: nv-fp-regs ( -- seq ) 14 31 [a,b] ;
: nv-vec-regs ( -- seq ) 20 31 [a,b] ;
: saved-int-regs-size ( -- n ) 96 ;
: saved-fp-regs-size ( -- n ) 144 ;
: saved-vec-regs-size ( -- n ) 208 ;
: callback-frame-size ( -- n )
reserved-size
param-size +
saved-int-regs-size +
saved-fp-regs-size +
saved-vec-regs-size +
16 align ;
[
0 MFLR
1 1 callback-frame-size neg STWU
0 1 callback-frame-size lr-save + STW
nv-int-regs [ cells save-int ] each-index
nv-fp-regs [ 8 * 80 + save-fp ] each-index
nv-vec-regs [ 16 * 224 + save-vec ] each-index
0 vm-reg LOAD32 rt-vm rc-absolute-ppc-2/2 jit-rel
0 2 LOAD32 rc-absolute-ppc-2/2 rt-xt jit-rel
2 MTLR
BLRL
nv-vec-regs [ 16 * 224 + restore-vec ] each-index
nv-fp-regs [ 8 * 80 + restore-fp ] each-index
nv-int-regs [ cells restore-int ] each-index
0 1 callback-frame-size lr-save + LWZ
1 1 0 LWZ
0 MTLR
BLR
] callback-stub jit-define
: jit-conditional* ( test-quot false-quot -- )
[ '[ bootstrap-cell /i 1 + @ ] ] dip jit-conditional ; inline
[ '[ 4 /i 1 + @ ] ] dip jit-conditional ; inline
: jit-load-context ( -- )
ctx-reg vm-reg vm-context-offset LWZ ;
: jit-save-context ( -- )
4 vm-reg 0 LWZ
1 4 0 STW
ds-reg 4 8 STW
rs-reg 4 12 STW ;
jit-load-context
1 2 context-callstack-top-offset STW
ds-reg ctx-reg context-datastack-offset STW
rs-reg ctx-reg context-retainstack-offset STW ;
: jit-restore-context ( -- )
4 vm-reg 0 LWZ
ds-reg 4 8 LWZ
rs-reg 4 12 LWZ ;
jit-load-context
ds-reg ctx-reg context-datastack-offset LWZ
rs-reg ctx-reg context-retainstack-offset LWZ ;
[
0 3 LOAD32 rc-absolute-ppc-2/2 rt-literal jit-rel
@ -181,7 +242,7 @@ CONSTANT: vm-reg 15
load-tag
0 4 tuple type-number tag-fixnum CMPI
[ BNE ]
[ 4 3 tuple type-number neg bootstrap-cell + LWZ ]
[ 4 3 tuple type-number neg 4 + LWZ ]
jit-conditional*
] pic-tuple jit-define
@ -230,7 +291,7 @@ CONSTANT: vm-reg 15
! key = hashcode(class)
5 4 1 SRAWI
! key &= cache.length - 1
5 5 mega-cache-size get 1 - bootstrap-cell * ANDI
5 5 mega-cache-size get 1 - 4 * ANDI
! cache += array-start-offset
3 3 array-start-offset ADDI
! cache += key
@ -245,7 +306,7 @@ CONSTANT: vm-reg 15
5 4 0 LWZ
5 5 1 ADDI
5 4 0 STW
! ... goto get(cache + bootstrap-cell)
! ... goto get(cache + 4)
3 3 4 LWZ
3 3 word-xt-offset LWZ
3 MTCTR
@ -255,19 +316,12 @@ CONSTANT: vm-reg 15
! fall-through on miss
] mega-lookup jit-define
[
0 2 LOAD32 rc-absolute-ppc-2/2 rt-xt jit-rel
2 MTCTR
BCTR
] callback-stub jit-define
! ! ! Sub-primitives
! Quotations and words
[
3 ds-reg 0 LWZ
ds-reg dup 4 SUBI
4 vm-reg MR
5 3 quot-xt-offset LWZ
]
[ 5 MTLR BLRL ]
@ -288,6 +342,75 @@ CONSTANT: vm-reg 15
4 MTCTR BCTR
] jit-execute jit-define
! Special primitives
[
jit-restore-context
! Save ctx->callstack_bottom
1 ctx-reg context-callstack-bottom-offset STW
! Call quotation
5 3 quot-xt-offset LWZ
5 MTLR
BLRL
jit-save-context
] \ c-to-factor define-sub-primitive
[
! Unwind stack frames
1 4 MR
! Load ds and rs registers
jit-restore-context
! We have changed the stack; load return address again
0 1 stack-frame lr-save + LWZ
0 MTLR
! Call quotation
4 3 quot-xt-offset LWZ
4 MTCTR
BCTR
] \ unwind-native-frames define-sub-primitive
[
! Load callstack object
6 ds-reg 0 LWZ
ds-reg ds-reg 4 SUBI
! Get ctx->callstack_bottom
jit-load-context
3 ctx-reg context-callstack-bottom-offset LWZ
! Get top of callstack object -- 'src' for memcpy
4 6 callstack-top-offset ADDI
! Get callstack length, in bytes --- 'len' for memcpy
5 6 callstack-length-offset LWZ
5 5 tag-bits get SRAWI
! Compute new stack pointer -- 'dst' for memcpy
3 3 5 SUBF
! Install new stack pointer
1 3 MR
! Call memcpy; arguments are now in the correct registers
1 1 -64 STWU
0 2 LOAD32 "factor_memcpy" f rc-absolute-ppc-2/2 jit-dlsym
2 MTLR
BLRL
1 1 0 LWZ
! Return with new callstack
0 1 lr-save stack-frame + LWZ
0 MTLR
BLR
] \ set-callstack define-sub-primitive
[
jit-save-context
4 vm-reg MR
2 0 LOAD32 "lazy_jit_compile" f rc-absolute-ppc-2/2 jit-dlsym
2 MTLR
BLRL
5 3 quot-xt-offset LWZ
]
[ 5 MTLR BLRL ]
[ 5 MTCTR BCTR ]
\ lazy-jit-compile define-combinator-primitive
! Objects
[
3 ds-reg 0 LWZ

View File

@ -1,10 +1,10 @@
! Copyright (C) 2007, 2008 Slava Pestov.
! Copyright (C) 2007, 2010 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: parser layouts system kernel sequences ;
USING: parser system kernel sequences ;
IN: bootstrap.ppc
: c-area-size ( -- n ) 10 bootstrap-cells ;
: lr-save ( -- n ) bootstrap-cell ;
: reserved-size ( -- n ) 24 ;
: lr-save ( -- n ) 4 ;
<< "vocab:cpu/ppc/bootstrap.factor" parse-file suffix! >>
call

View File

@ -1,10 +1,10 @@
! Copyright (C) 2007, 2008 Slava Pestov.
! Copyright (C) 2007, 2010 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: parser layouts system kernel sequences ;
USING: parser system kernel sequences ;
IN: bootstrap.ppc
: c-area-size ( -- n ) 14 bootstrap-cells ;
: lr-save ( -- n ) 2 bootstrap-cells ;
: reserved-size ( -- n ) 24 ;
: lr-save ( -- n ) 8 ;
<< "vocab:cpu/ppc/bootstrap.factor" parse-file suffix! >>
call

View File

@ -1 +1 @@
PLAF_DLL_OBJS += vm/cpu-ppc.o

View File

@ -1,15 +1,8 @@
/* Parts of this file were snarfed from SBCL src/runtime/ppc-assem.S, which is
in the public domain. */
#if defined(__APPLE__) || (defined(WINDOWS) && !defined(__arm__))
#if defined(__APPLE__)
#define MANGLE(sym) _##sym
#else
#define MANGLE(sym) sym
#endif
/* Apple's PPC assembler is out of date? */
#if defined(__APPLE__) && defined(__ppc__)
#define XX @
#else
#define MANGLE(sym) sym
#define XX ;
#endif
@ -17,320 +10,33 @@ in the public domain. */
#define DEF(returns,symbol,args) .globl MANGLE(symbol) XX \
MANGLE(symbol)
#define DS_REG r13
#define RS_REG r14
#define VM_REG r15
#define CALL_OR_JUMP_QUOT \
lwz r11,12(r3) /* load quotation-xt slot */ XX \
#define CALL_QUOT \
CALL_OR_JUMP_QUOT XX \
mtlr r11 /* prepare to call XT with quotation in r3 */ XX \
blrl /* go */
#define JUMP_QUOT \
CALL_OR_JUMP_QUOT XX \
mtctr r11 /* prepare to call XT with quotation in r3 */ XX \
bctr /* go */
#define PARAM_SIZE 32
#define SAVED_INT_REGS_SIZE 96
#define SAVED_FP_REGS_SIZE 144
#define SAVED_V_REGS_SIZE 208
#define FRAME (RESERVED_SIZE + PARAM_SIZE + SAVED_INT_REGS_SIZE + SAVED_FP_REGS_SIZE + SAVED_V_REGS_SIZE + 8)
#if defined( __APPLE__)
#define LR_SAVE 8
#define RESERVED_SIZE 24
#else
#define LR_SAVE 4
#define RESERVED_SIZE 8
#endif
#define SAVE_LR(reg) stw reg,(LR_SAVE + FRAME)(r1)
#define LOAD_LR(reg) lwz reg,(LR_SAVE + FRAME)(r1)
#define SAVE_AT(offset) (RESERVED_SIZE + PARAM_SIZE + 4 * offset)
#define SAVE_INT(register,offset) stw register,SAVE_AT(offset)(r1)
#define RESTORE_INT(register,offset) lwz register,SAVE_AT(offset)(r1)
#define SAVE_FP(register,offset) stfd register,SAVE_AT(offset)(r1)
#define RESTORE_FP(register,offset) lfd register,SAVE_AT(offset)(r1)
#define SAVE_V(register,offset) \
li r2,SAVE_AT(offset) XX \
stvxl register,r2,r1
#define RESTORE_V(register,offset) \
li r2,SAVE_AT(offset) XX \
lvxl register,r2,r1
#define PROLOGUE \
mflr r0 XX /* get caller's return address */ \
stwu r1,-FRAME(r1) XX /* create a stack frame to hold non-volatile registers */ \
SAVE_LR(r0)
#define EPILOGUE \
LOAD_LR(r0) XX \
lwz r1,0(r1) XX /* destroy the stack frame */ \
mtlr r0 /* get ready to return */
/* We have to save and restore nonvolatile registers because
the Factor compiler treats the entire register file as volatile. */
DEF(void,c_to_factor,(cell quot, void *vm)):
PROLOGUE
SAVE_INT(r13,0)
SAVE_INT(r14,1)
SAVE_INT(VM_REG,2)
SAVE_INT(r16,3)
SAVE_INT(r17,4)
SAVE_INT(r18,5)
SAVE_INT(r19,6)
SAVE_INT(r20,7)
SAVE_INT(r21,8)
SAVE_INT(r22,9)
SAVE_INT(r23,10)
SAVE_INT(r24,11)
SAVE_INT(r25,12)
SAVE_INT(r26,13)
SAVE_INT(r27,14)
SAVE_INT(r28,15)
SAVE_INT(r29,16)
SAVE_INT(r30,17)
SAVE_INT(r31,18)
SAVE_FP(f14,20)
SAVE_FP(f15,22)
SAVE_FP(f16,24)
SAVE_FP(f17,26)
SAVE_FP(f18,28)
SAVE_FP(f19,30)
SAVE_FP(f20,32)
SAVE_FP(f21,34)
SAVE_FP(f22,36)
SAVE_FP(f23,38)
SAVE_FP(f24,40)
SAVE_FP(f25,42)
SAVE_FP(f26,44)
SAVE_FP(f27,46)
SAVE_FP(f28,48)
SAVE_FP(f29,50)
SAVE_FP(f30,52)
SAVE_FP(f31,54)
SAVE_V(v20,56)
SAVE_V(v21,60)
SAVE_V(v22,64)
SAVE_V(v23,68)
SAVE_V(v24,72)
SAVE_V(v25,76)
SAVE_V(v26,80)
SAVE_V(v27,84)
SAVE_V(v28,88)
SAVE_V(v29,92)
SAVE_V(v30,96)
SAVE_V(v31,100)
/* r4 vm ptr preserved */
mfvscr v0
li r2,SAVE_AT(104)
stvxl v0,r2,r1
addi r2,r2,0xc
lwzx r5,r2,r1
lis r6,0x1
andc r5,r5,r6
stwx r5,r2,r1
subi r2,r2,0xc
lvxl v0,r2,r1
mtvscr v0
/* Load context */
mr VM_REG,r4
lwz r16,0(VM_REG)
/* Load ctx->datastack */
lwz DS_REG,8(r16)
/* Load ctx->retainstack */
lwz RS_REG,12(r16)
/* Save ctx->callstack_bottom */
stw r1,4(r16)
CALL_QUOT
/* Load context */
lwz r16,0(VM_REG)
/* Save ctx->datastack */
stw DS_REG,8(r16)
/* Save ctx->retainstack */
stw RS_REG,12(r16)
RESTORE_V(v0,104)
mtvscr v0
RESTORE_V(v31,100)
RESTORE_V(v30,96)
RESTORE_V(v29,92)
RESTORE_V(v28,88)
RESTORE_V(v27,84)
RESTORE_V(v26,80)
RESTORE_V(v25,76)
RESTORE_V(v24,72)
RESTORE_V(v23,68)
RESTORE_V(v22,64)
RESTORE_V(v21,60)
RESTORE_V(v20,56)
RESTORE_FP(f31,54)
RESTORE_FP(f30,52)
RESTORE_FP(f29,50)
RESTORE_FP(f28,48)
RESTORE_FP(f27,46)
RESTORE_FP(f26,44)
RESTORE_FP(f25,42)
RESTORE_FP(f24,40)
RESTORE_FP(f23,38)
RESTORE_FP(f22,36)
RESTORE_FP(f21,34)
RESTORE_FP(f20,32)
RESTORE_FP(f19,30)
RESTORE_FP(f18,28)
RESTORE_FP(f17,26)
RESTORE_FP(f16,24)
RESTORE_FP(f15,22)
RESTORE_FP(f14,20)
RESTORE_INT(r31,18)
RESTORE_INT(r30,17)
RESTORE_INT(r29,16)
RESTORE_INT(r28,15)
RESTORE_INT(r27,14)
RESTORE_INT(r26,13)
RESTORE_INT(r25,12)
RESTORE_INT(r24,11)
RESTORE_INT(r23,10)
RESTORE_INT(r22,9)
RESTORE_INT(r21,8)
RESTORE_INT(r20,7)
RESTORE_INT(r19,6)
RESTORE_INT(r18,5)
RESTORE_INT(r17,4)
RESTORE_INT(r16,3)
RESTORE_INT(VM_REG,2)
RESTORE_INT(r14,1)
RESTORE_INT(r13,0)
EPILOGUE
blr
DEF(void,set_callstack,(void *vm, stack_frame *to, stack_frame *from, cell length, void *memcpy)):
/* Save VM pointer in non-volatile register */
mr VM_REG,r3
/* Compute new stack pointer */
sub r1,r4,r6
/* Call memcpy() */
mr r3,r1
mr r4,r5
mr r5,r6
stwu r1,-64(r1)
mtlr r7
blrl
lwz r1,0(r1)
/* Load context */
lwz r16,0(VM_REG)
/* Load ctx->datastack */
lwz DS_REG,8(r16)
/* Load ctx->retainstack */
lwz RS_REG,12(r16)
/* We have changed the stack; load return address again */
lwz r0,LR_SAVE(r1)
mtlr r0
blr
DEF(void,throw_impl,(cell quot, void *new_stack, void *vm)):
/* compute new stack pointer */
mr r1,r4
/* make vm ptr 2nd arg in case quot->xt == lazy_jit_compile */
mr r4,r5
/* Load context */
mr VM_REG,r5
lwz r16,0(VM_REG)
/* Load ctx->datastack */
lwz DS_REG,8(r16)
/* Load ctx->retainstack */
lwz RS_REG,12(r16)
/* We have changed the stack; load return address again */
lwz r0,LR_SAVE(r1)
mtlr r0
/* Call the quotation */
JUMP_QUOT
DEF(void,lazy_jit_compile_impl,(cell quot, void *vm)):
/* Load context */
mr VM_REG,r4
lwz r16,0(VM_REG)
/* Save ctx->datastack */
stw DS_REG,8(r16)
/* Save ctx->retainstack */
stw RS_REG,12(r16)
/* Save ctx->callstack_top */
stw r1,0(r16)
/* Compile quotation */
PROLOGUE
bl MANGLE(lazy_jit_compile)
EPILOGUE
/* Call the quotation */
JUMP_QUOT
/* Thanks to Joshua Grams for this code.
On PowerPC processors, we must flush the instruction cache manually
after writing to the code heap. */
DEF(void,flush_icache,(void *start, int len)):
DEF(void,flush_icache,(void*, int)):
/* compute number of cache lines to flush */
add r4,r4,r3
clrrwi r3,r3,5 /* align addr to next lower cache line boundary */
sub r4,r4,r3 /* then n_lines = (len + 0x1f) / 0x20 */
/* align addr to next lower cache line boundary */
clrrwi r3,r3,5
/* then n_lines = (len + 0x1f) / 0x20 */
sub r4,r4,r3
addi r4,r4,0x1f
srwi. r4,r4,5 /* note '.' suffix */
beqlr /* if n_lines == 0, just return. */
mtctr r4 /* flush cache lines */
0: dcbf 0,r3 /* for each line... */
/* note '.' suffix */
srwi. r4,r4,5
/* if n_lines == 0, just return. */
beqlr
/* flush cache lines */
mtctr r4
/* for each line... */
0: dcbf 0,r3
sync
icbi 0,r3
addi r3,r3,0x20
bdnz 0b
sync /* finish up */
/* finish up */
sync
isync
blr