diff --git a/Makefile b/Makefile index 57b7ef0848..772f3f9875 100755 --- a/Makefile +++ b/Makefile @@ -212,6 +212,9 @@ vm/ffi_test.o: vm/ffi_test.c .cpp.o: $(TOOLCHAIN_PREFIX)$(CPP) -c $(CFLAGS) -o $@ $< +.S.o: + $(TOOLCHAIN_PREFIX)$(CC) -x assembler-with-cpp -c $(CFLAGS) -o $@ $< + .mm.o: $(TOOLCHAIN_PREFIX)$(CPP) -c $(CFLAGS) -o $@ $< diff --git a/basis/cpu/ppc/bootstrap.factor b/basis/cpu/ppc/bootstrap.factor index 837acd0ea1..ba2b404a06 100644 --- a/basis/cpu/ppc/bootstrap.factor +++ b/basis/cpu/ppc/bootstrap.factor @@ -1,9 +1,10 @@ -! Copyright (C) 2007, 2009 Slava Pestov. +! Copyright (C) 2007, 2010 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. USING: bootstrap.image.private kernel kernel.private namespaces system cpu.ppc.assembler compiler.codegen.fixup compiler.units -compiler.constants math math.private layouts words vocabs -slots.private locals locals.backend generic.single.private fry ; +compiler.constants math math.private math.ranges layouts words vocabs +slots.private locals locals.backend generic.single.private fry +sequences ; FROM: cpu.ppc.assembler => B ; IN: bootstrap.ppc @@ -13,28 +14,88 @@ big-endian on CONSTANT: ds-reg 13 CONSTANT: rs-reg 14 CONSTANT: vm-reg 15 +CONSTANT: ctx-reg 16 -: factor-area-size ( -- n ) 4 bootstrap-cells ; +: factor-area-size ( -- n ) 16 ; : stack-frame ( -- n ) - factor-area-size c-area-size + 4 bootstrap-cells align ; + reserved-size + factor-area-size + + 16 align ; -: next-save ( -- n ) stack-frame bootstrap-cell - ; -: xt-save ( -- n ) stack-frame 2 bootstrap-cells - ; +: next-save ( -- n ) stack-frame 4 - ; +: xt-save ( -- n ) stack-frame 8 - ; + +: param-size ( -- n ) 32 ; + +: save-at ( m -- n ) reserved-size + param-size + ; + +: save-int ( register offset -- ) [ 1 ] dip save-at STW ; +: restore-int ( register offset -- ) [ 1 ] dip save-at LWZ ; + +: save-fp ( register offset -- ) [ 1 ] dip save-at STFD ; +: restore-fp ( register offset -- ) [ 1 ] dip save-at LFD ; + +: save-vec ( register offset -- ) save-at 2 LI 2 1 STVXL ; +: restore-vec ( register offset -- ) save-at 2 LI 2 1 LVXL ; + +: nv-int-regs ( -- seq ) 13 31 [a,b] ; +: nv-fp-regs ( -- seq ) 14 31 [a,b] ; +: nv-vec-regs ( -- seq ) 20 31 [a,b] ; + +: saved-int-regs-size ( -- n ) 96 ; +: saved-fp-regs-size ( -- n ) 144 ; +: saved-vec-regs-size ( -- n ) 208 ; + +: callback-frame-size ( -- n ) + reserved-size + param-size + + saved-int-regs-size + + saved-fp-regs-size + + saved-vec-regs-size + + 16 align ; + +[ + 0 MFLR + 1 1 callback-frame-size neg STWU + 0 1 callback-frame-size lr-save + STW + + nv-int-regs [ cells save-int ] each-index + nv-fp-regs [ 8 * 80 + save-fp ] each-index + nv-vec-regs [ 16 * 224 + save-vec ] each-index + + 0 vm-reg LOAD32 rt-vm rc-absolute-ppc-2/2 jit-rel + + 0 2 LOAD32 rc-absolute-ppc-2/2 rt-xt jit-rel + 2 MTLR + BLRL + + nv-vec-regs [ 16 * 224 + restore-vec ] each-index + nv-fp-regs [ 8 * 80 + restore-fp ] each-index + nv-int-regs [ cells restore-int ] each-index + + 0 1 callback-frame-size lr-save + LWZ + 1 1 0 LWZ + 0 MTLR + BLR +] callback-stub jit-define : jit-conditional* ( test-quot false-quot -- ) - [ '[ bootstrap-cell /i 1 + @ ] ] dip jit-conditional ; inline + [ '[ 4 /i 1 + @ ] ] dip jit-conditional ; inline + +: jit-load-context ( -- ) + ctx-reg vm-reg vm-context-offset LWZ ; : jit-save-context ( -- ) - 4 vm-reg 0 LWZ - 1 4 0 STW - ds-reg 4 8 STW - rs-reg 4 12 STW ; + jit-load-context + 1 2 context-callstack-top-offset STW + ds-reg ctx-reg context-datastack-offset STW + rs-reg ctx-reg context-retainstack-offset STW ; : jit-restore-context ( -- ) - 4 vm-reg 0 LWZ - ds-reg 4 8 LWZ - rs-reg 4 12 LWZ ; + jit-load-context + ds-reg ctx-reg context-datastack-offset LWZ + rs-reg ctx-reg context-retainstack-offset LWZ ; [ 0 3 LOAD32 rc-absolute-ppc-2/2 rt-literal jit-rel @@ -181,7 +242,7 @@ CONSTANT: vm-reg 15 load-tag 0 4 tuple type-number tag-fixnum CMPI [ BNE ] - [ 4 3 tuple type-number neg bootstrap-cell + LWZ ] + [ 4 3 tuple type-number neg 4 + LWZ ] jit-conditional* ] pic-tuple jit-define @@ -230,7 +291,7 @@ CONSTANT: vm-reg 15 ! key = hashcode(class) 5 4 1 SRAWI ! key &= cache.length - 1 - 5 5 mega-cache-size get 1 - bootstrap-cell * ANDI + 5 5 mega-cache-size get 1 - 4 * ANDI ! cache += array-start-offset 3 3 array-start-offset ADDI ! cache += key @@ -245,7 +306,7 @@ CONSTANT: vm-reg 15 5 4 0 LWZ 5 5 1 ADDI 5 4 0 STW - ! ... goto get(cache + bootstrap-cell) + ! ... goto get(cache + 4) 3 3 4 LWZ 3 3 word-xt-offset LWZ 3 MTCTR @@ -255,19 +316,12 @@ CONSTANT: vm-reg 15 ! fall-through on miss ] mega-lookup jit-define -[ - 0 2 LOAD32 rc-absolute-ppc-2/2 rt-xt jit-rel - 2 MTCTR - BCTR -] callback-stub jit-define - ! ! ! Sub-primitives ! Quotations and words [ 3 ds-reg 0 LWZ ds-reg dup 4 SUBI - 4 vm-reg MR 5 3 quot-xt-offset LWZ ] [ 5 MTLR BLRL ] @@ -288,6 +342,75 @@ CONSTANT: vm-reg 15 4 MTCTR BCTR ] jit-execute jit-define +! Special primitives +[ + jit-restore-context + ! Save ctx->callstack_bottom + 1 ctx-reg context-callstack-bottom-offset STW + ! Call quotation + 5 3 quot-xt-offset LWZ + 5 MTLR + BLRL + jit-save-context +] \ c-to-factor define-sub-primitive + +[ + ! Unwind stack frames + 1 4 MR + + ! Load ds and rs registers + jit-restore-context + + ! We have changed the stack; load return address again + 0 1 stack-frame lr-save + LWZ + 0 MTLR + + ! Call quotation + 4 3 quot-xt-offset LWZ + 4 MTCTR + BCTR +] \ unwind-native-frames define-sub-primitive + +[ + ! Load callstack object + 6 ds-reg 0 LWZ + ds-reg ds-reg 4 SUBI + ! Get ctx->callstack_bottom + jit-load-context + 3 ctx-reg context-callstack-bottom-offset LWZ + ! Get top of callstack object -- 'src' for memcpy + 4 6 callstack-top-offset ADDI + ! Get callstack length, in bytes --- 'len' for memcpy + 5 6 callstack-length-offset LWZ + 5 5 tag-bits get SRAWI + ! Compute new stack pointer -- 'dst' for memcpy + 3 3 5 SUBF + ! Install new stack pointer + 1 3 MR + ! Call memcpy; arguments are now in the correct registers + 1 1 -64 STWU + 0 2 LOAD32 "factor_memcpy" f rc-absolute-ppc-2/2 jit-dlsym + 2 MTLR + BLRL + 1 1 0 LWZ + ! Return with new callstack + 0 1 lr-save stack-frame + LWZ + 0 MTLR + BLR +] \ set-callstack define-sub-primitive + +[ + jit-save-context + 4 vm-reg MR + 2 0 LOAD32 "lazy_jit_compile" f rc-absolute-ppc-2/2 jit-dlsym + 2 MTLR + BLRL + 5 3 quot-xt-offset LWZ +] +[ 5 MTLR BLRL ] +[ 5 MTCTR BCTR ] +\ lazy-jit-compile define-combinator-primitive + ! Objects [ 3 ds-reg 0 LWZ diff --git a/basis/cpu/ppc/linux/bootstrap.factor b/basis/cpu/ppc/linux/bootstrap.factor index a5250414ab..2f463dea00 100644 --- a/basis/cpu/ppc/linux/bootstrap.factor +++ b/basis/cpu/ppc/linux/bootstrap.factor @@ -1,10 +1,10 @@ -! Copyright (C) 2007, 2008 Slava Pestov. +! Copyright (C) 2007, 2010 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: parser layouts system kernel sequences ; +USING: parser system kernel sequences ; IN: bootstrap.ppc -: c-area-size ( -- n ) 10 bootstrap-cells ; -: lr-save ( -- n ) bootstrap-cell ; +: reserved-size ( -- n ) 24 ; +: lr-save ( -- n ) 4 ; << "vocab:cpu/ppc/bootstrap.factor" parse-file suffix! >> call diff --git a/basis/cpu/ppc/macosx/bootstrap.factor b/basis/cpu/ppc/macosx/bootstrap.factor index 2aa0ddc4a2..0960011c70 100644 --- a/basis/cpu/ppc/macosx/bootstrap.factor +++ b/basis/cpu/ppc/macosx/bootstrap.factor @@ -1,10 +1,10 @@ -! Copyright (C) 2007, 2008 Slava Pestov. +! Copyright (C) 2007, 2010 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: parser layouts system kernel sequences ; +USING: parser system kernel sequences ; IN: bootstrap.ppc -: c-area-size ( -- n ) 14 bootstrap-cells ; -: lr-save ( -- n ) 2 bootstrap-cells ; +: reserved-size ( -- n ) 24 ; +: lr-save ( -- n ) 8 ; << "vocab:cpu/ppc/bootstrap.factor" parse-file suffix! >> call diff --git a/vm/Config.ppc b/vm/Config.ppc index 8b13789179..1ded04dda1 100644 --- a/vm/Config.ppc +++ b/vm/Config.ppc @@ -1 +1 @@ - +PLAF_DLL_OBJS += vm/cpu-ppc.o diff --git a/vm/cpu-ppc.S b/vm/cpu-ppc.S index b387bafbbf..835ed14cc2 100644 --- a/vm/cpu-ppc.S +++ b/vm/cpu-ppc.S @@ -1,367 +1,73 @@ -/* Parts of this file were snarfed from SBCL src/runtime/ppc-assem.S, which is -in the public domain. */ -#if defined(__APPLE__) || (defined(WINDOWS) && !defined(__arm__)) - #define MANGLE(sym) _##sym +#if defined(__APPLE__) + #define MANGLE(sym) _##sym + #define XX @ #else - #define MANGLE(sym) sym -#endif - -/* Apple's PPC assembler is out of date? */ -#if defined(__APPLE__) && defined(__ppc__) - #define XX @ -#else - #define XX ; + #define MANGLE(sym) sym + #define XX ; #endif /* The returns and args are just for documentation */ #define DEF(returns,symbol,args) .globl MANGLE(symbol) XX \ MANGLE(symbol) -#define DS_REG r13 -#define RS_REG r14 -#define VM_REG r15 - -#define CALL_OR_JUMP_QUOT \ - lwz r11,12(r3) /* load quotation-xt slot */ XX \ - -#define CALL_QUOT \ - CALL_OR_JUMP_QUOT XX \ - mtlr r11 /* prepare to call XT with quotation in r3 */ XX \ - blrl /* go */ - -#define JUMP_QUOT \ - CALL_OR_JUMP_QUOT XX \ - mtctr r11 /* prepare to call XT with quotation in r3 */ XX \ - bctr /* go */ - -#define PARAM_SIZE 32 - -#define SAVED_INT_REGS_SIZE 96 - -#define SAVED_FP_REGS_SIZE 144 - -#define SAVED_V_REGS_SIZE 208 - -#define FRAME (RESERVED_SIZE + PARAM_SIZE + SAVED_INT_REGS_SIZE + SAVED_FP_REGS_SIZE + SAVED_V_REGS_SIZE + 8) - -#if defined( __APPLE__) - #define LR_SAVE 8 - #define RESERVED_SIZE 24 -#else - #define LR_SAVE 4 - #define RESERVED_SIZE 8 -#endif - -#define SAVE_LR(reg) stw reg,(LR_SAVE + FRAME)(r1) - -#define LOAD_LR(reg) lwz reg,(LR_SAVE + FRAME)(r1) - -#define SAVE_AT(offset) (RESERVED_SIZE + PARAM_SIZE + 4 * offset) - -#define SAVE_INT(register,offset) stw register,SAVE_AT(offset)(r1) -#define RESTORE_INT(register,offset) lwz register,SAVE_AT(offset)(r1) - -#define SAVE_FP(register,offset) stfd register,SAVE_AT(offset)(r1) -#define RESTORE_FP(register,offset) lfd register,SAVE_AT(offset)(r1) - -#define SAVE_V(register,offset) \ - li r2,SAVE_AT(offset) XX \ - stvxl register,r2,r1 - -#define RESTORE_V(register,offset) \ - li r2,SAVE_AT(offset) XX \ - lvxl register,r2,r1 - -#define PROLOGUE \ - mflr r0 XX /* get caller's return address */ \ - stwu r1,-FRAME(r1) XX /* create a stack frame to hold non-volatile registers */ \ - SAVE_LR(r0) - -#define EPILOGUE \ - LOAD_LR(r0) XX \ - lwz r1,0(r1) XX /* destroy the stack frame */ \ - mtlr r0 /* get ready to return */ - -/* We have to save and restore nonvolatile registers because -the Factor compiler treats the entire register file as volatile. */ -DEF(void,c_to_factor,(cell quot, void *vm)): - PROLOGUE - - SAVE_INT(r13,0) - SAVE_INT(r14,1) - SAVE_INT(VM_REG,2) - SAVE_INT(r16,3) - SAVE_INT(r17,4) - SAVE_INT(r18,5) - SAVE_INT(r19,6) - SAVE_INT(r20,7) - SAVE_INT(r21,8) - SAVE_INT(r22,9) - SAVE_INT(r23,10) - SAVE_INT(r24,11) - SAVE_INT(r25,12) - SAVE_INT(r26,13) - SAVE_INT(r27,14) - SAVE_INT(r28,15) - SAVE_INT(r29,16) - SAVE_INT(r30,17) - SAVE_INT(r31,18) - - SAVE_FP(f14,20) - SAVE_FP(f15,22) - SAVE_FP(f16,24) - SAVE_FP(f17,26) - SAVE_FP(f18,28) - SAVE_FP(f19,30) - SAVE_FP(f20,32) - SAVE_FP(f21,34) - SAVE_FP(f22,36) - SAVE_FP(f23,38) - SAVE_FP(f24,40) - SAVE_FP(f25,42) - SAVE_FP(f26,44) - SAVE_FP(f27,46) - SAVE_FP(f28,48) - SAVE_FP(f29,50) - SAVE_FP(f30,52) - SAVE_FP(f31,54) - - SAVE_V(v20,56) - SAVE_V(v21,60) - SAVE_V(v22,64) - SAVE_V(v23,68) - SAVE_V(v24,72) - SAVE_V(v25,76) - SAVE_V(v26,80) - SAVE_V(v27,84) - SAVE_V(v28,88) - SAVE_V(v29,92) - SAVE_V(v30,96) - SAVE_V(v31,100) - - /* r4 vm ptr preserved */ - mfvscr v0 - li r2,SAVE_AT(104) - stvxl v0,r2,r1 - addi r2,r2,0xc - lwzx r5,r2,r1 - lis r6,0x1 - andc r5,r5,r6 - stwx r5,r2,r1 - subi r2,r2,0xc - lvxl v0,r2,r1 - mtvscr v0 - - - /* Load context */ - mr VM_REG,r4 - lwz r16,0(VM_REG) - - /* Load ctx->datastack */ - lwz DS_REG,8(r16) - - /* Load ctx->retainstack */ - lwz RS_REG,12(r16) - - /* Save ctx->callstack_bottom */ - stw r1,4(r16) - - CALL_QUOT - - /* Load context */ - lwz r16,0(VM_REG) - - /* Save ctx->datastack */ - stw DS_REG,8(r16) - - /* Save ctx->retainstack */ - stw RS_REG,12(r16) - - RESTORE_V(v0,104) - mtvscr v0 - - RESTORE_V(v31,100) - RESTORE_V(v30,96) - RESTORE_V(v29,92) - RESTORE_V(v28,88) - RESTORE_V(v27,84) - RESTORE_V(v26,80) - RESTORE_V(v25,76) - RESTORE_V(v24,72) - RESTORE_V(v23,68) - RESTORE_V(v22,64) - RESTORE_V(v21,60) - RESTORE_V(v20,56) - - RESTORE_FP(f31,54) - RESTORE_FP(f30,52) - RESTORE_FP(f29,50) - RESTORE_FP(f28,48) - RESTORE_FP(f27,46) - RESTORE_FP(f26,44) - RESTORE_FP(f25,42) - RESTORE_FP(f24,40) - RESTORE_FP(f23,38) - RESTORE_FP(f22,36) - RESTORE_FP(f21,34) - RESTORE_FP(f20,32) - RESTORE_FP(f19,30) - RESTORE_FP(f18,28) - RESTORE_FP(f17,26) - RESTORE_FP(f16,24) - RESTORE_FP(f15,22) - RESTORE_FP(f14,20) - - RESTORE_INT(r31,18) - RESTORE_INT(r30,17) - RESTORE_INT(r29,16) - RESTORE_INT(r28,15) - RESTORE_INT(r27,14) - RESTORE_INT(r26,13) - RESTORE_INT(r25,12) - RESTORE_INT(r24,11) - RESTORE_INT(r23,10) - RESTORE_INT(r22,9) - RESTORE_INT(r21,8) - RESTORE_INT(r20,7) - RESTORE_INT(r19,6) - RESTORE_INT(r18,5) - RESTORE_INT(r17,4) - RESTORE_INT(r16,3) - RESTORE_INT(VM_REG,2) - RESTORE_INT(r14,1) - RESTORE_INT(r13,0) - - EPILOGUE - blr - -DEF(void,set_callstack,(void *vm, stack_frame *to, stack_frame *from, cell length, void *memcpy)): - /* Save VM pointer in non-volatile register */ - mr VM_REG,r3 - - /* Compute new stack pointer */ - sub r1,r4,r6 - - /* Call memcpy() */ - mr r3,r1 - mr r4,r5 - mr r5,r6 - stwu r1,-64(r1) - mtlr r7 - blrl - lwz r1,0(r1) - - /* Load context */ - lwz r16,0(VM_REG) - - /* Load ctx->datastack */ - lwz DS_REG,8(r16) - - /* Load ctx->retainstack */ - lwz RS_REG,12(r16) - - /* We have changed the stack; load return address again */ - lwz r0,LR_SAVE(r1) - mtlr r0 - blr - -DEF(void,throw_impl,(cell quot, void *new_stack, void *vm)): - /* compute new stack pointer */ - mr r1,r4 - - /* make vm ptr 2nd arg in case quot->xt == lazy_jit_compile */ - mr r4,r5 - - /* Load context */ - mr VM_REG,r5 - lwz r16,0(VM_REG) - - /* Load ctx->datastack */ - lwz DS_REG,8(r16) - - /* Load ctx->retainstack */ - lwz RS_REG,12(r16) - - /* We have changed the stack; load return address again */ - lwz r0,LR_SAVE(r1) - mtlr r0 - - /* Call the quotation */ - JUMP_QUOT - -DEF(void,lazy_jit_compile_impl,(cell quot, void *vm)): - /* Load context */ - mr VM_REG,r4 - lwz r16,0(VM_REG) - - /* Save ctx->datastack */ - stw DS_REG,8(r16) - - /* Save ctx->retainstack */ - stw RS_REG,12(r16) - - /* Save ctx->callstack_top */ - stw r1,0(r16) - - /* Compile quotation */ - PROLOGUE - bl MANGLE(lazy_jit_compile) - EPILOGUE - - /* Call the quotation */ - JUMP_QUOT - /* Thanks to Joshua Grams for this code. On PowerPC processors, we must flush the instruction cache manually after writing to the code heap. */ -DEF(void,flush_icache,(void *start, int len)): - /* compute number of cache lines to flush */ - add r4,r4,r3 - clrrwi r3,r3,5 /* align addr to next lower cache line boundary */ - sub r4,r4,r3 /* then n_lines = (len + 0x1f) / 0x20 */ - addi r4,r4,0x1f - srwi. r4,r4,5 /* note '.' suffix */ - beqlr /* if n_lines == 0, just return. */ - mtctr r4 /* flush cache lines */ -0: dcbf 0,r3 /* for each line... */ - sync - icbi 0,r3 - addi r3,r3,0x20 - bdnz 0b - sync /* finish up */ - isync - blr +DEF(void,flush_icache,(void*, int)): + /* compute number of cache lines to flush */ + add r4,r4,r3 + /* align addr to next lower cache line boundary */ + clrrwi r3,r3,5 + /* then n_lines = (len + 0x1f) / 0x20 */ + sub r4,r4,r3 + addi r4,r4,0x1f + /* note '.' suffix */ + srwi. r4,r4,5 + /* if n_lines == 0, just return. */ + beqlr + /* flush cache lines */ + mtctr r4 + /* for each line... */ +0: dcbf 0,r3 + sync + icbi 0,r3 + addi r3,r3,0x20 + bdnz 0b + /* finish up */ + sync + isync + blr DEF(void,get_ppc_fpu_env,(void*)): - mffs f0 - stfd f0,0(r3) - blr + mffs f0 + stfd f0,0(r3) + blr DEF(void,set_ppc_fpu_env,(const void*)): - lfd f0,0(r3) - mtfsf 0xff,f0 - blr + lfd f0,0(r3) + mtfsf 0xff,f0 + blr DEF(void,get_ppc_vmx_env,(void*)): - mfvscr v0 - subi r4,r1,16 - li r5,0xf - andc r4,r4,r5 - stvxl v0,0,r4 - li r5,0xc - lwzx r6,r5,r4 - stw r6,0(r3) - blr + mfvscr v0 + subi r4,r1,16 + li r5,0xf + andc r4,r4,r5 + stvxl v0,0,r4 + li r5,0xc + lwzx r6,r5,r4 + stw r6,0(r3) + blr DEF(void,set_ppc_vmx_env,(const void*)): - subi r4,r1,16 - li r5,0xf - andc r4,r4,r5 - li r5,0xc - lwz r6,0(r3) - stwx r6,r5,r4 - lvxl v0,0,r4 - mtvscr v0 - blr + subi r4,r1,16 + li r5,0xf + andc r4,r4,r5 + li r5,0xc + lwz r6,0(r3) + stwx r6,r5,r4 + lvxl v0,0,r4 + mtvscr v0 + blr