diff --git a/basis/cpu/ppc/bootstrap.factor b/basis/cpu/ppc/bootstrap.factor index 6a00dec12f..b09938f4b9 100644 --- a/basis/cpu/ppc/bootstrap.factor +++ b/basis/cpu/ppc/bootstrap.factor @@ -21,43 +21,48 @@ CONSTANT: rs-reg 14 : xt-save ( -- n ) stack-frame 2 bootstrap-cells - ; [ - 0 6 LOAD32 rc-absolute-ppc-2/2 rt-immediate jit-rel - 11 6 profile-count-offset LWZ + 0 3 LOAD32 rc-absolute-ppc-2/2 rt-immediate jit-rel + 11 3 profile-count-offset LWZ 11 11 1 tag-fixnum ADDI - 11 6 profile-count-offset STW - 11 6 word-code-offset LWZ + 11 3 profile-count-offset STW + 11 3 word-code-offset LWZ 11 11 compiled-header-size ADDI 11 MTCTR BCTR ] jit-profiling jit-define [ - 0 6 LOAD32 rc-absolute-ppc-2/2 rt-this jit-rel + 0 3 LOAD32 rc-absolute-ppc-2/2 rt-this jit-rel 0 MFLR 1 1 stack-frame SUBI - 6 1 xt-save STW - stack-frame 6 LI - 6 1 next-save STW + 3 1 xt-save STW + stack-frame 3 LI + 3 1 next-save STW 0 1 lr-save stack-frame + STW ] jit-prolog jit-define [ - 0 6 LOAD32 rc-absolute-ppc-2/2 rt-immediate jit-rel - 6 ds-reg 4 STWU + 0 3 LOAD32 rc-absolute-ppc-2/2 rt-immediate jit-rel + 3 ds-reg 4 STWU ] jit-push-immediate jit-define [ - 0 6 LOAD32 rc-absolute-ppc-2/2 rt-stack-chain jit-rel - 7 6 0 LWZ - 1 7 0 STW - 0 6 LOAD32 rc-absolute-ppc-2/2 rt-primitive jit-rel - 6 MTCTR + 0 3 LOAD32 rc-absolute-ppc-2/2 rt-stack-chain jit-rel + 4 3 0 LWZ + 1 4 0 STW + 0 3 LOAD32 rc-absolute-ppc-2/2 rt-primitive jit-rel + 3 MTCTR BCTR ] jit-primitive jit-define [ 0 BL rc-relative-ppc-3 rt-xt-pic jit-rel ] jit-word-call jit-define -[ 0 B rc-relative-ppc-3 rt-xt jit-rel ] jit-word-jump jit-define +[ + 0 6 LOAD32 rc-absolute-ppc-2/2 rt-here jit-rel + 0 B rc-relative-ppc-3 rt-xt-pic-tail jit-rel +] jit-word-jump jit-define + +[ 0 B rc-relative-ppc-3 rt-xt jit-rel ] jit-word-special jit-define [ 3 ds-reg 0 LWZ @@ -152,6 +157,9 @@ CONSTANT: rs-reg 14 ! ! ! Polymorphic inline caches +! Don't touch r6 here; it's used to pass the tail call site +! address for tail PICs + ! Load a value from a stack position [ 4 ds-reg 0 LWZ rc-absolute-ppc-2 rt-untagged jit-rel @@ -225,7 +233,7 @@ CONSTANT: rs-reg 14 ! if(get(cache) == class) 6 3 0 LWZ 6 0 4 CMP - 5 BNE + 10 BNE ! megamorphic_cache_hits++ 0 4 LOAD32 rc-absolute-ppc-2/2 rt-megamorphic-cache-hits jit-rel 5 4 0 LWZ diff --git a/basis/cpu/ppc/ppc.factor b/basis/cpu/ppc/ppc.factor index a11b0daa86..beee48e5ea 100644 --- a/basis/cpu/ppc/ppc.factor +++ b/basis/cpu/ppc/ppc.factor @@ -3,9 +3,10 @@ USING: accessors assocs sequences kernel combinators make math math.order math.ranges system namespaces locals layouts words alien alien.c-types literals cpu.architecture cpu.ppc.assembler -literals compiler.cfg.registers compiler.cfg.instructions -compiler.constants compiler.codegen compiler.codegen.fixup -compiler.cfg.intrinsics compiler.cfg.stack-frame ; +cpu.ppc.assembler.backend literals compiler.cfg.registers +compiler.cfg.instructions compiler.constants compiler.codegen +compiler.codegen.fixup compiler.cfg.intrinsics +compiler.cfg.stack-frame ; IN: cpu.ppc ! PowerPC register assignments: @@ -116,7 +117,7 @@ M: ppc stack-frame-size ( stack-frame -- i ) M: ppc %call ( word -- ) 0 BL rc-relative-ppc-3 rel-word-pic ; M: ppc %jump ( word -- ) - 0 3 LOAD32 rc-absolute-ppc-2/2 rel-here + 0 6 LOAD32 8 rc-absolute-ppc-2/2 rel-here 0 B rc-relative-ppc-3 rel-word-pic-tail ; M: ppc %jump-label ( label -- ) B ; @@ -130,7 +131,7 @@ M:: ppc %dispatch ( src temp offset -- ) BCTR ; M: ppc %dispatch-label ( word -- ) - 0 , rc-absolute-cell rel-word ; + B{ 0 0 0 0 } % rc-absolute-cell rel-word ; :: (%slot) ( obj slot tag temp -- reg offset ) temp slot obj ADD diff --git a/basis/cpu/x86/bootstrap.factor b/basis/cpu/x86/bootstrap.factor index 994591adcf..474ce2ea46 100644 --- a/basis/cpu/x86/bootstrap.factor +++ b/basis/cpu/x86/bootstrap.factor @@ -233,7 +233,7 @@ big-endian off temp0 temp2 ADD ! if(get(cache) == class) temp0 [] temp1 CMP - bootstrap-cell 4 = 14 18 ? JNE ! Yuck! + bootstrap-cell 4 = 14 22 ? JNE ! Yuck! ! megamorphic_cache_hits++ temp1 0 MOV rc-absolute-cell rt-megamorphic-cache-hits jit-rel temp1 [] 1 ADD diff --git a/vm/cpu-ppc.S b/vm/cpu-ppc.S index f8dad4b2b2..a372b2b1f5 100755 --- a/vm/cpu-ppc.S +++ b/vm/cpu-ppc.S @@ -236,8 +236,10 @@ DEF(void,flush_icache,(void *start, int len)): blr DEF(void,primitive_inline_cache_miss,(void)): - mflr r3 + mflr r6 +DEF(void,primitive_inline_cache_miss_tail,(void)): PROLOGUE + mr r3,r6 bl MANGLE(inline_cache_miss) EPILOGUE mtctr r3 diff --git a/vm/cpu-ppc.hpp b/vm/cpu-ppc.hpp index d393223d8d..ae7f93ebf7 100755 --- a/vm/cpu-ppc.hpp +++ b/vm/cpu-ppc.hpp @@ -7,11 +7,22 @@ namespace factor register cell ds asm("r13"); register cell rs asm("r14"); +/* In the instruction sequence: + + LOAD32 r3,... + B blah + + the offset from the immediate operand to LOAD32 to the instruction after + the branch is two instructions. */ +static const fixnum xt_tail_pic_offset = 4 * 2; + inline static void check_call_site(cell return_address) { #ifdef FACTOR_DEBUG cell insn = *(cell *)return_address; - assert((insn & 0x3) == 0x1); + /* Check that absolute bit is 0 */ + assert((insn & 0x2) == 0x0); + /* Check that instruction is branch */ assert((insn >> 26) == 0x12); #endif } @@ -21,8 +32,8 @@ inline static void check_call_site(cell return_address) inline static void *get_call_target(cell return_address) { return_address -= sizeof(cell); - check_call_site(return_address); + cell insn = *(cell *)return_address; cell unsigned_addr = (insn & B_MASK); fixnum signed_addr = (fixnum)(unsigned_addr << 6) >> 6; @@ -32,19 +43,25 @@ inline static void *get_call_target(cell return_address) inline static void set_call_target(cell return_address, void *target) { return_address -= sizeof(cell); - -#ifdef FACTOR_DEBUG - assert((return_address & ~B_MASK) == 0); check_call_site(return_address); -#endif + cell insn = *(cell *)return_address; - insn = ((insn & ~B_MASK) | (((cell)target - return_address) & B_MASK)); + + fixnum relative_address = ((cell)target - return_address); + insn = ((insn & ~B_MASK) | (relative_address & B_MASK)); *(cell *)return_address = insn; /* Flush the cache line containing the call we just patched */ __asm__ __volatile__ ("icbi 0, %0\n" "sync\n"::"r" (return_address):); } +inline static bool tail_call_site_p(cell return_address) +{ + return_address -= sizeof(cell); + cell insn = *(cell *)return_address; + return (insn & 0x1) == 0; +} + /* Defined in assembly */ VM_ASM_API void c_to_factor(cell quot); VM_ASM_API void throw_impl(cell quot, stack_frame *rewind);