diff --git a/basis/cpu/ppc/bootstrap.factor b/basis/cpu/ppc/bootstrap.factor index 7278fd2092..5451cf2b79 100644 --- a/basis/cpu/ppc/bootstrap.factor +++ b/basis/cpu/ppc/bootstrap.factor @@ -9,8 +9,8 @@ IN: bootstrap.ppc 4 \ cell set big-endian on -CONSTANT: ds-reg 29 -CONSTANT: rs-reg 30 +CONSTANT: ds-reg 13 +CONSTANT: rs-reg 14 : factor-area-size ( -- n ) 4 bootstrap-cells ; @@ -138,6 +138,16 @@ CONSTANT: rs-reg 30 jit-3r> ] jit-3dip jit-define +: prepare-(execute) ( -- operand ) + 3 ds-reg 0 LWZ + ds-reg dup 4 SUBI + 4 3 word-xt-offset LWZ + 4 ; + +[ prepare-(execute) MTCTR BCTR ] jit-execute-jump jit-define + +[ prepare-(execute) MTLR BLRL ] jit-execute-call jit-define + [ 0 1 lr-save stack-frame + LWZ 1 1 stack-frame ADDI @@ -146,7 +156,91 @@ CONSTANT: rs-reg 30 [ BLR ] jit-return jit-define -! Sub-primitives +! ! ! Polymorphic inline caches + +! Load a value from a stack position +[ + 4 ds-reg 0 LWZ rc-absolute-ppc-2 rt-untagged jit-rel +] pic-load jit-define + +! Tag +: load-tag ( -- ) + 4 4 tag-mask get ANDI + 4 4 tag-bits get SLWI ; + +[ load-tag ] pic-tag jit-define + +! Hi-tag +[ + 3 4 MR + load-tag + 0 4 object tag-number tag-fixnum CMPI + 2 BNE + 4 3 object tag-number neg LWZ +] pic-hi-tag jit-define + +! Tuple +[ + 3 4 MR + load-tag + 0 4 tuple tag-number tag-fixnum CMPI + 2 BNE + 4 3 tuple tag-number neg bootstrap-cell + LWZ +] pic-tuple jit-define + +! Hi-tag and tuple +[ + 3 4 MR + load-tag + ! If bits 2 and 3 are set, the tag is either 6 (object) or 7 (tuple) + 0 4 BIN: 110 tag-fixnum CMPI + 5 BLT + ! Untag r3 + 3 3 0 0 31 tag-bits get - RLWINM + ! Set r4 to 0 for objects, and bootstrap-cell for tuples + 4 4 1 tag-fixnum ANDI + 4 4 1 SRAWI + ! Load header cell or tuple layout cell + 4 4 3 LWZX +] pic-hi-tag-tuple jit-define + +[ + 0 4 0 CMPI rc-absolute-ppc-2 rt-immediate jit-rel +] pic-check-tag jit-define + +[ + 0 5 LOAD32 rc-absolute-ppc-2/2 rt-immediate jit-rel + 4 0 5 CMP +] pic-check jit-define + +[ 2 BNE 0 B rc-relative-ppc-3 rt-xt jit-rel ] pic-hit jit-define + +! ! ! Megamorphic caches + +[ + ! cache = ... + 0 3 LOAD32 rc-absolute-ppc-2/2 rt-immediate jit-rel + ! key = class + 5 4 MR + ! key &= cache.length - 1 + 5 5 mega-cache-size get 1- bootstrap-cell * ANDI + ! cache += array-start-offset + 3 3 array-start-offset ADDI + ! cache += key + 3 3 5 ADD + ! if(get(cache) == class) + 6 3 0 LWZ + 6 0 4 CMP + 5 BNE + ! ... goto get(cache + bootstrap-cell) + 3 3 4 LWZ + 3 3 word-xt-offset LWZ + 3 MTCTR + BCTR + ! fall-through on miss +] mega-lookup jit-define + +! ! ! Sub-primitives ! Quotations and words [ @@ -157,14 +251,6 @@ CONSTANT: rs-reg 30 BCTR ] \ (call) define-sub-primitive -[ - 3 ds-reg 0 LWZ - ds-reg dup 4 SUBI - 4 3 word-xt-offset LWZ - 4 MTCTR - BCTR -] \ (execute) define-sub-primitive - ! Objects [ 3 ds-reg 0 LWZ diff --git a/basis/cpu/ppc/ppc.factor b/basis/cpu/ppc/ppc.factor index 85bf188bb8..a6beb42399 100644 --- a/basis/cpu/ppc/ppc.factor +++ b/basis/cpu/ppc/ppc.factor @@ -1,20 +1,19 @@ -! Copyright (C) 2005, 2008 Slava Pestov. +! Copyright (C) 2005, 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. USING: accessors assocs sequences kernel combinators make math math.order math.ranges system namespaces locals layouts words -alien alien.c-types cpu.architecture cpu.ppc.assembler -compiler.cfg.registers compiler.cfg.instructions +alien alien.c-types literals cpu.architecture cpu.ppc.assembler +literals compiler.cfg.registers compiler.cfg.instructions compiler.constants compiler.codegen compiler.codegen.fixup compiler.cfg.intrinsics compiler.cfg.stack-frame ; IN: cpu.ppc ! PowerPC register assignments: -! r2-r27: integer vregs -! r28: integer scratch -! r29: data stack -! r30: retain stack +! r2-r12: integer vregs +! r15-r29 +! r30: integer scratch ! f0-f29: float vregs -! f30, f31: float scratch +! f30: float scratch enable-float-intrinsics @@ -23,11 +22,11 @@ enable-float-intrinsics M: ppc machine-registers { - { int-regs T{ range f 2 26 1 } } - { double-float-regs T{ range f 0 29 1 } } + { int-regs $[ 2 12 [a,b] 15 29 [a,b] append ] } + { double-float-regs $[ 0 29 [a,b] ] } } ; -CONSTANT: scratch-reg 28 +CONSTANT: scratch-reg 30 CONSTANT: fp-scratch-reg 30 M: ppc two-operand? f ; @@ -40,8 +39,8 @@ M: ppc %load-reference ( reg obj -- ) M: ppc %alien-global ( register symbol dll -- ) [ 0 swap LOAD32 ] 2dip rc-absolute-ppc-2/2 rel-dlsym ; -CONSTANT: ds-reg 29 -CONSTANT: rs-reg 30 +CONSTANT: ds-reg 13 +CONSTANT: rs-reg 14 GENERIC: loc-reg ( loc -- reg ) diff --git a/basis/cpu/x86/bootstrap.factor b/basis/cpu/x86/bootstrap.factor index fcd8ed0eee..fc7fbc88b9 100644 --- a/basis/cpu/x86/bootstrap.factor +++ b/basis/cpu/x86/bootstrap.factor @@ -194,7 +194,7 @@ big-endian off [ ! Untag temp0 temp0 tag-mask get bitnot AND - ! Set temp1 to 0 for objects, and 8 for tuples + ! Set temp1 to 0 for objects, and bootstrap-cell for tuples temp1 1 tag-fixnum AND bootstrap-cell 4 = [ temp1 1 SHR ] when ! Load header cell or tuple layout cell diff --git a/basis/math/miller-rabin/miller-rabin.factor b/basis/math/miller-rabin/miller-rabin.factor index 8c36dd96fe..5e999aa956 100755 --- a/basis/math/miller-rabin/miller-rabin.factor +++ b/basis/math/miller-rabin/miller-rabin.factor @@ -8,6 +8,8 @@ IN: math.miller-rabin : >odd ( n -- int ) dup even? [ 1 + ] when ; foldable +: >even ( n -- int ) 0 clear-bit ; foldable + TUPLE: positive-even-expected n ; :: (miller-rabin) ( n trials -- ? ) @@ -97,6 +99,7 @@ PRIVATE> } 1&& ; : next-safe-prime ( n -- q ) + 1 - >even 2 / next-safe-prime-candidate dup >safe-prime-form dup miller-rabin diff --git a/basis/tools/time/time.factor b/basis/tools/time/time.factor index 65e87f976f..948c0d482d 100644 --- a/basis/tools/time/time.factor +++ b/basis/tools/time/time.factor @@ -1,6 +1,6 @@ ! Copyright (C) 2003, 2008 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. -USING: kernel math math.vectors memory io io.styles prettyprint +USING: kernel math memory io io.styles prettyprint namespaces system sequences splitting grouping assocs strings generic.single combinators ; IN: tools.time diff --git a/core/hashtables/hashtables.factor b/core/hashtables/hashtables.factor index 0914134bb6..03bc3e01fd 100644 --- a/core/hashtables/hashtables.factor +++ b/core/hashtables/hashtables.factor @@ -139,14 +139,14 @@ M: hashtable set-at ( value key hash -- ) PRIVATE> M: hashtable >alist - [ array>> [ length 2/ iota ] keep ] [ assoc-size ] bi [ + [ array>> [ length 2/ ] keep ] [ assoc-size ] bi [ [ [ [ 1 fixnum-shift-fast ] dip [ array-nth ] [ [ 1 fixnum+fast ] dip array-nth ] 2bi ] dip pick tombstone? [ 3drop ] [ [ 2array ] dip push-unsafe ] if - ] 2curry each + ] 2curry each-integer ] keep { } like ; M: hashtable clone diff --git a/vm/cpu-ppc.S b/vm/cpu-ppc.S index 5e77c004aa..f8dad4b2b2 100755 --- a/vm/cpu-ppc.S +++ b/vm/cpu-ppc.S @@ -2,7 +2,7 @@ in the public domain. */ #include "asm.h" -#define DS_REG r29 +#define DS_REG r13 DEF(void,primitive_fixnum_add,(void)): lwz r3,0(DS_REG) @@ -45,7 +45,7 @@ multiply_overflow: /* Note that the XT is passed to the quotation in r11 */ #define CALL_OR_JUMP_QUOT \ - lwz r11,14(r3) /* load quotation-xt slot */ XX \ + lwz r11,16(r3) /* load quotation-xt slot */ XX \ #define CALL_QUOT \ CALL_OR_JUMP_QUOT XX \ @@ -100,22 +100,22 @@ the Factor compiler treats the entire register file as volatile. */ DEF(void,c_to_factor,(CELL quot)): PROLOGUE - SAVE_INT(r13,0) /* save GPRs */ - SAVE_INT(r14,1) - SAVE_INT(r15,2) - SAVE_INT(r16,3) - SAVE_INT(r17,4) - SAVE_INT(r18,5) - SAVE_INT(r19,6) - SAVE_INT(r20,7) - SAVE_INT(r21,8) - SAVE_INT(r22,9) - SAVE_INT(r23,10) - SAVE_INT(r24,11) - SAVE_INT(r25,12) - SAVE_INT(r26,13) - SAVE_INT(r27,14) - SAVE_INT(r28,15) + SAVE_INT(r15,0) /* save GPRs */ + SAVE_INT(r16,1) + SAVE_INT(r17,2) + SAVE_INT(r18,3) + SAVE_INT(r19,4) + SAVE_INT(r20,5) + SAVE_INT(r21,6) + SAVE_INT(r22,7) + SAVE_INT(r23,8) + SAVE_INT(r24,9) + SAVE_INT(r25,10) + SAVE_INT(r26,11) + SAVE_INT(r27,12) + SAVE_INT(r28,13) + SAVE_INT(r29,14) + SAVE_INT(r30,15) SAVE_INT(r31,16) SAVE_FP(f14,20) /* save FPRs */ @@ -165,22 +165,22 @@ DEF(void,c_to_factor,(CELL quot)): RESTORE_FP(f14,20) /* save FPRs */ RESTORE_INT(r31,16) /* restore GPRs */ - RESTORE_INT(r28,15) - RESTORE_INT(r27,14) - RESTORE_INT(r26,13) - RESTORE_INT(r25,12) - RESTORE_INT(r24,11) - RESTORE_INT(r23,10) - RESTORE_INT(r22,9) - RESTORE_INT(r21,8) - RESTORE_INT(r20,7) - RESTORE_INT(r19,6) - RESTORE_INT(r18,5) - RESTORE_INT(r17,4) - RESTORE_INT(r16,3) - RESTORE_INT(r15,2) - RESTORE_INT(r14,1) - RESTORE_INT(r13,0) + RESTORE_INT(r30,15) + RESTORE_INT(r29,14) + RESTORE_INT(r28,13) + RESTORE_INT(r27,12) + RESTORE_INT(r26,11) + RESTORE_INT(r25,10) + RESTORE_INT(r24,9) + RESTORE_INT(r23,8) + RESTORE_INT(r22,7) + RESTORE_INT(r21,6) + RESTORE_INT(r20,5) + RESTORE_INT(r19,4) + RESTORE_INT(r18,3) + RESTORE_INT(r17,2) + RESTORE_INT(r16,1) + RESTORE_INT(r15,0) EPILOGUE blr @@ -234,3 +234,11 @@ DEF(void,flush_icache,(void *start, int len)): sync /* finish up */ isync blr + +DEF(void,primitive_inline_cache_miss,(void)): + mflr r3 + PROLOGUE + bl MANGLE(inline_cache_miss) + EPILOGUE + mtctr r3 + bctr diff --git a/vm/cpu-ppc.hpp b/vm/cpu-ppc.hpp index 7e8ae05fac..d393223d8d 100755 --- a/vm/cpu-ppc.hpp +++ b/vm/cpu-ppc.hpp @@ -2,16 +2,58 @@ namespace factor { #define FACTOR_CPU_STRING "ppc" -#define VM_ASM_API +#define VM_ASM_API VM_C_API -register cell ds asm("r29"); -register cell rs asm("r30"); +register cell ds asm("r13"); +register cell rs asm("r14"); -void c_to_factor(cell quot); -void undefined(cell word); -void set_callstack(stack_frame *to, stack_frame *from, cell length, void *memcpy); -void throw_impl(cell quot, stack_frame *rewind); -void lazy_jit_compile(cell quot); -void flush_icache(cell start, cell len); +inline static void check_call_site(cell return_address) +{ +#ifdef FACTOR_DEBUG + cell insn = *(cell *)return_address; + assert((insn & 0x3) == 0x1); + assert((insn >> 26) == 0x12); +#endif +} + +#define B_MASK 0x3fffffc + +inline static void *get_call_target(cell return_address) +{ + return_address -= sizeof(cell); + + check_call_site(return_address); + cell insn = *(cell *)return_address; + cell unsigned_addr = (insn & B_MASK); + fixnum signed_addr = (fixnum)(unsigned_addr << 6) >> 6; + return (void *)(signed_addr + return_address); +} + +inline static void set_call_target(cell return_address, void *target) +{ + return_address -= sizeof(cell); + +#ifdef FACTOR_DEBUG + assert((return_address & ~B_MASK) == 0); + check_call_site(return_address); +#endif + cell insn = *(cell *)return_address; + insn = ((insn & ~B_MASK) | (((cell)target - return_address) & B_MASK)); + *(cell *)return_address = insn; + + /* Flush the cache line containing the call we just patched */ + __asm__ __volatile__ ("icbi 0, %0\n" "sync\n"::"r" (return_address):); +} + +/* Defined in assembly */ +VM_ASM_API void c_to_factor(cell quot); +VM_ASM_API void throw_impl(cell quot, stack_frame *rewind); +VM_ASM_API void lazy_jit_compile(cell quot); +VM_ASM_API void flush_icache(cell start, cell len); + +VM_ASM_API void set_callstack(stack_frame *to, + stack_frame *from, + cell length, + void *(*memcpy)(void*,const void*, size_t)); } diff --git a/vm/cpu-x86.32.S b/vm/cpu-x86.32.S index 3c0db36935..0c08ea7b46 100755 --- a/vm/cpu-x86.32.S +++ b/vm/cpu-x86.32.S @@ -30,7 +30,6 @@ and the callstack top is passed in EDX */ pop %ebx #define QUOT_XT_OFFSET 16 -#define WORD_XT_OFFSET 30 /* We pass a function pointer to memcpy to work around a Mac OS X ABI limitation which would otherwise require us to do a bizzaro PC-relative diff --git a/vm/cpu-x86.64.S b/vm/cpu-x86.64.S index a110bf1d51..5a70280ddf 100644 --- a/vm/cpu-x86.64.S +++ b/vm/cpu-x86.64.S @@ -62,7 +62,6 @@ #endif #define QUOT_XT_OFFSET 36 -#define WORD_XT_OFFSET 66 /* We pass a function pointer to memcpy to work around a Mac OS X ABI limitation which would otherwise require us to do a bizzaro PC-relative diff --git a/vm/inline_cache.cpp b/vm/inline_cache.cpp index 259a3e0c77..59632c4185 100755 --- a/vm/inline_cache.cpp +++ b/vm/inline_cache.cpp @@ -21,6 +21,8 @@ void deallocate_inline_cache(cell return_address) { /* Find the call target. */ void *old_xt = get_call_target(return_address); + check_code_pointer((cell)old_xt); + code_block *old_block = (code_block *)old_xt - 1; cell old_type = old_block->type;