diff --git a/Makefile b/Makefile index dfc0f71ff6..d5c7e00763 100644 --- a/Makefile +++ b/Makefile @@ -40,6 +40,7 @@ DLL_OBJS = $(PLAF_DLL_OBJS) \ vm/errors.o \ vm/factor.o \ vm/image.o \ + vm/inline_cache.o \ vm/io.o \ vm/jit.o \ vm/math.o \ diff --git a/basis/bootstrap/image/image.factor b/basis/bootstrap/image/image.factor index 91aafa9f92..f2dd6e07fd 100644 --- a/basis/bootstrap/image/image.factor +++ b/basis/bootstrap/image/image.factor @@ -5,10 +5,10 @@ hashtables.private io io.binary io.files io.encodings.binary io.pathnames kernel kernel.private math namespaces make parser prettyprint sequences sequences.private strings sbufs vectors words quotations assocs system layouts splitting grouping growable classes -classes.builtin classes.tuple classes.tuple.private -vocabs vocabs.loader source-files definitions debugger -quotations.private sequences.private combinators math.order -math.private accessors slots.private compiler.units compiler.constants +classes.builtin classes.tuple classes.tuple.private vocabs +vocabs.loader source-files definitions debugger quotations.private +sequences.private combinators math.order math.private accessors +slots.private generic.single.private compiler.units compiler.constants fry ; IN: bootstrap.image @@ -162,6 +162,15 @@ SYMBOL: jit-profiling SYMBOL: jit-declare-word SYMBOL: jit-save-stack +! PIC stubs +SYMBOL: pic-tag +SYMBOL: pic-hi-tag +SYMBOL: pic-tuple +SYMBOL: pic-hi-tag-tuple +SYMBOL: pic-check +SYMBOL: pic-hit +SYMBOL: pic-miss-word + ! Default definition for undefined words SYMBOL: undefined-quot @@ -184,17 +193,24 @@ SYMBOL: undefined-quot { jit-return 34 } { jit-profiling 35 } { jit-push-immediate 36 } - { jit-declare-word 42 } - { jit-save-stack 43 } - { jit-dip-word 44 } - { jit-dip 45 } - { jit-2dip-word 46 } - { jit-2dip 47 } - { jit-3dip-word 48 } - { jit-3dip 49 } - { jit-execute-word 50 } - { jit-execute-jump 51 } - { jit-execute-call 52 } + { jit-declare-word 37 } + { jit-save-stack 38 } + { jit-dip-word 39 } + { jit-dip 40 } + { jit-2dip-word 41 } + { jit-2dip 42 } + { jit-3dip-word 43 } + { jit-3dip 44 } + { jit-execute-word 45 } + { jit-execute-jump 46 } + { jit-execute-call 47 } + { pic-tag 48 } + { pic-hi-tag 49 } + { pic-tuple 50 } + { pic-hi-tag-tuple 51 } + { pic-check 52 } + { pic-hit 53 } + { pic-miss-word 54 } { undefined-quot 60 } } ; inline @@ -509,6 +525,7 @@ M: quotation ' \ 2dip jit-2dip-word set \ 3dip jit-3dip-word set \ (execute) jit-execute-word set + \ inline-cache-miss \ pic-miss-word set [ undefined ] undefined-quot set { jit-code-format @@ -537,6 +554,13 @@ M: quotation ' jit-profiling jit-declare-word jit-save-stack + pic-tag + pic-hi-tag + pic-tuple + pic-hi-tag-tuple + pic-check + pic-hit + pic-miss-word undefined-quot } [ emit-userenv ] each ; diff --git a/basis/cpu/x86/assembler/assembler.factor b/basis/cpu/x86/assembler/assembler.factor index 3a98d47416..9b34875bc1 100644 --- a/basis/cpu/x86/assembler/assembler.factor +++ b/basis/cpu/x86/assembler/assembler.factor @@ -321,10 +321,11 @@ M: label CALL (CALL) label-fixup ; M: operand CALL { BIN: 010 t HEX: ff } 1-operand ; GENERIC# JUMPcc 1 ( addr opcode -- ) -: (JUMPcc) ( n -- rel-class ) extended-opcode, 0 4, rc-relative ; -M: f JUMPcc nip (JUMPcc) drop ; -M: callable JUMPcc (JUMPcc) rel-word ; -M: label JUMPcc (JUMPcc) label-fixup ; +: (JUMPcc) ( addr n -- rel-class ) extended-opcode, 4, rc-relative ; +M: f JUMPcc [ 0 ] dip (JUMPcc) 2drop ; +M: integer JUMPcc (JUMPcc) drop ; +M: callable JUMPcc [ 0 ] dip (JUMPcc) rel-word ; +M: label JUMPcc [ 0 ] dip (JUMPcc) label-fixup ; : JO ( dst -- ) HEX: 80 JUMPcc ; : JNO ( dst -- ) HEX: 81 JUMPcc ; @@ -382,6 +383,10 @@ GENERIC: CMP ( dst src -- ) M: immediate CMP swap { BIN: 111 t HEX: 80 } immediate-1/4 ; M: operand CMP OCT: 070 2-operand ; +GENERIC: TEST ( dst src -- ) +M: immediate TEST swap { BIN: 101 t HEX: 84 } immediate-1/4 ; +M: operand TEST OCT: 204 2-operand ; + : XCHG ( dst src -- ) OCT: 207 2-operand ; : BSR ( dst src -- ) swap { HEX: 0f HEX: bd } (2-operand) ; diff --git a/basis/cpu/x86/bootstrap.factor b/basis/cpu/x86/bootstrap.factor index dd17ef4186..77a34277ab 100644 --- a/basis/cpu/x86/bootstrap.factor +++ b/basis/cpu/x86/bootstrap.factor @@ -1,9 +1,9 @@ -! Copyright (C) 2007, 2008 Slava Pestov. +! Copyright (C) 2007, 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. USING: bootstrap.image.private kernel kernel.private namespaces system cpu.x86.assembler layouts compiler.units math math.private compiler.constants vocabs slots.private words -locals.backend ; +locals.backend make sequences combinators ; IN: bootstrap.x86 big-endian off @@ -170,7 +170,57 @@ big-endian off [ 0 RET ] jit-return jit-define -! Sub-primitives +! ! ! Polymorphic inline caches + +! The 'make' trick lets us compute the jump distance for the conditional branches there + +! Tag +[ + ds-reg bootstrap-cell SUB + temp0 tag-bits get AND +] pic-tag jit-define + +! Hi-tag +[ + ds-reg bootstrap-cell SUB + temp0 object tag-number TEST + [ temp0 temp0 object tag-number neg [+] MOV ] { } make + [ length JNE ] [ % ] bi +] pic-hi-tag jit-define + +! Tuple +[ + ds-reg bootstrap-cell SUB + temp0 tuple tag-number TEST + [ temp0 temp0 tuple tag-number neg bootstrap-cell + [+] MOV ] { } make + [ length JNE ] [ % ] bi +] pic-tuple jit-define + +! Hi-tag and tuple +[ + ds-reg bootstrap-cell SUB + ! If bits 2 and 3 are set, the tag is either 6 (object) or 7 (tuple) + temp0 6 TEST + [ + temp1 temp0 MOV + ! Make temp0 untagged + temp0 tag-mask get bitnot AND + ! Set temp1 to 0 for objects, and 4 or 8 for tuples + temp1 1 AND + bootstrap-cell { + { 4 [ temp1 2 SHL ] } + { 8 [ temp1 3 SHL ] } + } case + ! Load header cell or tuple layout cell + temp0 temp0 temp1 [+] MOV + ] [ ] make [ length JNE ] [ % ] bi +] pic-hi-tag-tuple jit-define + +[ temp0 HEX: ffffffff CMP ] pic-check jit-define + +[ f JE rc-relative rt-xt jit-rel ] pic-hit jit-define + +! ! ! Sub-primitives ! Quotations and words [ diff --git a/core/bootstrap/primitives.factor b/core/bootstrap/primitives.factor index 62f23f206d..b618e64d41 100644 --- a/core/bootstrap/primitives.factor +++ b/core/bootstrap/primitives.factor @@ -533,6 +533,7 @@ tuple { "load-locals" "locals.backend" (( ... n -- )) } { "check-datastack" "kernel.private" (( array in# out# -- ? )) } { "lookup-method" "generic.single.private" (( object methods method-cache -- method )) } + { "inline-cache-miss" "generic.single.private" (( generic methods -- )) } } [ [ first3 ] dip swap make-primitive ] each-index ! Bump build number diff --git a/vm/code_block.c b/vm/code_block.c index e7d8bec0ac..391c8cf56e 100644 --- a/vm/code_block.c +++ b/vm/code_block.c @@ -427,7 +427,7 @@ F_CODE_BLOCK *add_code_block( #ifdef FACTOR_DEBUG type_check(ARRAY_TYPE,literals); type_check(BYTE_ARRAY_TYPE,relocation); - assert(hi_tag(code) == ARRAY_TYPE); + assert(untag_header(code->header) == ARRAY_TYPE); #endif CELL code_format = compiled_code_format(); diff --git a/vm/cpu-x86.S b/vm/cpu-x86.S index 7a0d738fe0..5dfc55cbd5 100755 --- a/vm/cpu-x86.S +++ b/vm/cpu-x86.S @@ -60,7 +60,7 @@ DEF(F_FASTCALL void,throw_impl,(CELL quot, F_STACK_FRAME *rewind_to)): mov ARG1,STACK_REG jmp *QUOT_XT_OFFSET(ARG0) -DEF(FASTCALL void,lazy_jit_compile,(CELL quot)): +DEF(F_FASTCALL void,lazy_jit_compile,(CELL quot)): mov STACK_REG,ARG1 /* Save stack pointer */ sub $STACK_PADDING,STACK_REG call MANGLE(lazy_jit_compile_impl) @@ -68,6 +68,10 @@ DEF(FASTCALL void,lazy_jit_compile,(CELL quot)): add $STACK_PADDING,STACK_REG jmp *QUOT_XT_OFFSET(ARG0) /* Call the quotation */ +DEF(F_FASTCALL void,primitive_inline_cache_miss,(void)): + mov (STACK_REG),ARG0 + jmp MANGLE(inline_cache_miss) + #ifdef WINDOWS .section .drectve .ascii " -export:c_to_factor" diff --git a/vm/cpu-x86.h b/vm/cpu-x86.h index 3b08479e4b..d84a480b08 100755 --- a/vm/cpu-x86.h +++ b/vm/cpu-x86.h @@ -1,3 +1,5 @@ +#include + #define FRAME_RETURN_ADDRESS(frame) *(XT *)(frame_successor(frame) + 1) INLINE void flush_icache(CELL start, CELL len) {} @@ -7,3 +9,16 @@ F_FASTCALL void throw_impl(CELL quot, F_STACK_FRAME *rewind_to); F_FASTCALL void lazy_jit_compile(CELL quot); void set_callstack(F_STACK_FRAME *to, F_STACK_FRAME *from, CELL length, void *memcpy); + +INLINE void set_call_site(CELL return_address, CELL target) +{ + /* An x86 CALL instruction looks like so: + |e8|..|..|..|..| + where the ... are a PC-relative jump address. + The return_address points to right after the + instruction. */ +#ifdef FACTOR_DEBUG + assert(*(unsigned char *)(return_address - 5) == 0xe8); +#endif + *(F_FIXNUM *)(return_address - 4) = (target - (return_address - 4)); +} diff --git a/vm/dispatch.c b/vm/dispatch.c index f5febaf707..8093912080 100644 --- a/vm/dispatch.c +++ b/vm/dispatch.c @@ -101,11 +101,32 @@ static void update_method_cache(CELL key, CELL method_cache, CELL method) set_array_nth(array,hashcode + 1,method); } -static CELL lookup_method(CELL object, CELL methods, CELL method_cache) +static CELL lookup_hairy_method(CELL object, CELL methods) +{ + CELL method = array_nth(untag_object(methods),TAG(object)); + if(type_of(method) == WORD_TYPE) + return method; + else + { + switch(TAG(object)) + { + case TUPLE_TYPE: + return lookup_tuple_method(object,method); + break; + case OBJECT_TYPE: + return lookup_hi_tag_method(object,method); + break; + default: + critical_error("Bad methods array",methods); + return -1; + } + } +} + +static CELL lookup_method_with_cache(CELL object, CELL methods, CELL method_cache) { - F_ARRAY *tag_methods = untag_object(methods); if(!HI_TAG_OR_TUPLE_P(object)) - return array_nth(tag_methods,TAG(object)); + return array_nth(untag_object(methods),TAG(object)); else { CELL key = get(HI_TAG_HEADER(object)); @@ -114,23 +135,7 @@ static CELL lookup_method(CELL object, CELL methods, CELL method_cache) return method; else { - method = array_nth(tag_methods,TAG(object)); - if(type_of(method) != WORD_TYPE) - { - switch(TAG(object)) - { - case TUPLE_TYPE: - method = lookup_tuple_method(object,method); - break; - case OBJECT_TYPE: - method = lookup_hi_tag_method(object,method); - break; - default: - critical_error("Bad methods array",methods); - break; - } - } - + method = lookup_hairy_method(object,methods); update_method_cache(key,method_cache,method); return method; } @@ -143,5 +148,23 @@ void primitive_lookup_method(void) CELL methods = get(ds - CELLS); CELL object = get(ds - CELLS * 2); ds -= CELLS * 2; - drepl(lookup_method(object,methods,method_cache)); + drepl(lookup_method_with_cache(object,methods,method_cache)); +} + +/* Next two functions are used for polymorphic inline caching */ + +CELL object_class(CELL object) +{ + if(!HI_TAG_OR_TUPLE_P(object)) + return tag_fixnum(TAG(object)); + else + return get(HI_TAG_HEADER(object)); +} + +CELL lookup_method(CELL object, CELL methods) +{ + if(!HI_TAG_OR_TUPLE_P(object)) + return array_nth(untag_object(methods),TAG(object)); + else + return lookup_hairy_method(object,methods); } diff --git a/vm/inline_cache.c b/vm/inline_cache.c new file mode 100644 index 0000000000..08b3e9bc77 --- /dev/null +++ b/vm/inline_cache.c @@ -0,0 +1,182 @@ +#include "master.h" + +/* Figure out what kind of type check the PIC needs based on the methods +it contains */ +static CELL determine_inline_cache_type(CELL cache_entries) +{ + F_ARRAY *array = untag_object(cache_entries); + + bool seen_hi_tag = false, seen_tuple = false; + + CELL i; + for(i = 0; i < array_capacity(array); i += 2) + { + CELL class = array_nth(array,i); + F_FIXNUM type; + + /* Is it a tuple layout? */ + switch(type_of(class)) + { + case FIXNUM_TYPE: + type = untag_fixnum_fast(class); + if(type >= HEADER_TYPE) + seen_hi_tag = true; + break; + case ARRAY_TYPE: + seen_tuple = true; + break; + default: + critical_error("Expected a fixnum or array",class); + break; + } + } + + if(seen_hi_tag && seen_tuple) return PIC_HI_TAG_TUPLE; + if(seen_hi_tag && !seen_tuple) return PIC_HI_TAG; + if(!seen_hi_tag && seen_tuple) return PIC_TUPLE; + if(!seen_hi_tag && !seen_tuple) return PIC_TAG; + + critical_error("Oops",0); + return -1; +} + +/* picker: one of dup, over, pick + cache_entries: array of class/method pairs */ +static F_CODE_BLOCK *compile_inline_cache(CELL picker, CELL generic_word, CELL cache_entries) +{ + REGISTER_ROOT(picker); + REGISTER_ROOT(generic_word); + REGISTER_ROOT(cache_entries); + + F_JIT jit; + jit_init(&jit,WORD_TYPE,generic_word); + + /* Generate machine code to determine the object's class. */ + jit_emit_subprimitive(&jit,untag_object(picker)); + jit_emit(&jit,userenv[determine_inline_cache_type(cache_entries)]); + + /* Generate machine code to check, in turn, if the class is one of the cached entries. */ + CELL i; + for(i = 0; i < array_capacity(untag_object(cache_entries)); i += 2) + { + /* Class equal? */ + CELL class = array_nth(untag_object(cache_entries),i); + jit_emit_with(&jit,userenv[PIC_CHECK],class); + + /* Yes? Jump to method */ + CELL method = array_nth(untag_object(cache_entries),i + 1); + jit_emit_with(&jit,userenv[PIC_HIT],method); + } + + /* Generate machine code to handle a cache miss, which ultimately results in + this function being called again. + + The inline-cache-miss primitive call receives enough information to + reconstruct the PIC. We also execute the picker again, so that the + object being dispatched on can be popped from the top of the stack. */ + jit_emit_subprimitive(&jit,untag_object(picker)); + jit_push(&jit,generic_word); + jit_push(&jit,cache_entries); + jit_word_jump(&jit,userenv[PIC_MISS_WORD]); + + F_CODE_BLOCK *code = jit_make_code_block(&jit); + jit_dispose(&jit); + + UNREGISTER_ROOT(cache_entries); + UNREGISTER_ROOT(generic_word); + UNREGISTER_ROOT(picker); + + return code; +} + +/* A generic word's definition performs general method lookup. Allocates memory */ +static F_CODE_BLOCK *megamorphic_call_stub(CELL generic_word) +{ + F_WORD *word = untag_word(generic_word); + REGISTER_UNTAGGED(word); + jit_compile(word->def,true); + UNREGISTER_UNTAGGED(word); + return untag_quotation(word->def)->code; +} + +/* Assumes that generic word definitions look like: + [ lookup-method (execute) ] +*/ +static void examine_generic_word(CELL generic_word, CELL *picker, CELL *all_methods) +{ + CELL def = untag_word(generic_word)->def; + F_QUOTATION *quot = untag_quotation(def); + F_ARRAY *array = untag_object(quot->array); + +#ifdef FACTOR_DEBUG + assert(array_capacity(array) == 5); + type_check(WORD_TYPE,array_nth(array,0)); + type_check(ARRAY_TYPE,array_nth(array,1)); + type_check(ARRAY_TYPE,array_nth(array,2)); + type_check(WORD_TYPE,array_nth(array,3)); + type_check(WORD_TYPE,array_nth(array,4)); +#endif + + *picker = array_nth(array,0); + *all_methods = array_nth(array,1); +} + +/* Allocates memory */ +static CELL add_inline_cache_entry(CELL cache_entries, CELL class, CELL method) +{ + F_ARRAY *cache_entries_array = untag_object(cache_entries); + CELL pic_size = array_capacity(cache_entries_array); + cache_entries_array = reallot_array(cache_entries_array,pic_size + 2); + set_array_nth(cache_entries_array,pic_size,class); + set_array_nth(cache_entries_array,pic_size + 1,method); + return tag_object(cache_entries_array); +} + +/* The cache_entries parameter is either f (on cold call site) or an array (on cache miss). +Called from assembly with the actual return address */ +F_FASTCALL XT inline_cache_miss(CELL return_address) +{ + CELL cache_entries = dpop(); + CELL generic_word = dpop(); + CELL object = dpop(); + + F_CODE_BLOCK *block; + + CELL pic_size = (cache_entries == F ? 0 : array_capacity(untag_array(cache_entries))); + + if(pic_size >= max_pic_size) + block = megamorphic_call_stub(generic_word); + else + { + CELL picker, all_methods; + examine_generic_word(generic_word,&picker,&all_methods); + + REGISTER_ROOT(generic_word); + REGISTER_ROOT(cache_entries); + REGISTER_ROOT(picker); + REGISTER_ROOT(all_methods); + + /* Find the right method. */ + CELL class = object_class(object); + CELL method = lookup_method(object,all_methods); + + /* Add a new entry to the PIC. */ + if(cache_entries == F) + cache_entries = allot_array_2(class,method); + else + cache_entries = add_inline_cache_entry(cache_entries,class,method); + + /* Install the new PIC. */ + block = compile_inline_cache(picker,generic_word,cache_entries); + + UNREGISTER_ROOT(all_methods); + UNREGISTER_ROOT(picker); + UNREGISTER_ROOT(cache_entries); + UNREGISTER_ROOT(generic_word); + } + + XT xt = (block + 1); + set_call_site(return_address,(CELL)xt); + + return xt; +} diff --git a/vm/inline_cache.h b/vm/inline_cache.h new file mode 100644 index 0000000000..f924c2c59e --- /dev/null +++ b/vm/inline_cache.h @@ -0,0 +1,8 @@ +int max_pic_size; + +void primitive_inline_cache_miss(void); + +F_FASTCALL XT inline_cache_miss(CELL return_address); + +CELL object_class(CELL object); +CELL lookup_method(CELL object, CELL methods); diff --git a/vm/jit.h b/vm/jit.h index deafb48308..a8738eb835 100644 --- a/vm/jit.h +++ b/vm/jit.h @@ -44,3 +44,14 @@ INLINE void jit_word_jump(F_JIT *jit, CELL word) { jit_emit_with(jit,userenv[JIT_WORD_JUMP],word); } + +/* Allocates memory */ +INLINE void jit_emit_subprimitive(F_JIT *jit, F_WORD *word) +{ + REGISTER_UNTAGGED(word); + if(array_nth(untag_object(word->subprimitive),1) != F) + jit_add_literal(jit,T); + UNREGISTER_UNTAGGED(word); + + jit_emit(jit,word->subprimitive); +} diff --git a/vm/master.h b/vm/master.h index c89d6d2092..c6f2c0a090 100644 --- a/vm/master.h +++ b/vm/master.h @@ -47,6 +47,7 @@ #include "quotations.h" #include "dispatch.h" #include "jit.h" +#include "inline_cache.h" #include "factor.h" #include "utilities.h" diff --git a/vm/primitives.c b/vm/primitives.c index 4281e88fc3..dfdc99f487 100755 --- a/vm/primitives.c +++ b/vm/primitives.c @@ -145,5 +145,6 @@ void *primitives[] = { primitive_jit_compile, primitive_load_locals, primitive_check_datastack, - primitive_lookup_method + primitive_lookup_method, + primitive_inline_cache_miss, }; diff --git a/vm/quotations.c b/vm/quotations.c index d149dab6c9..6860e3acba 100755 --- a/vm/quotations.c +++ b/vm/quotations.c @@ -168,14 +168,7 @@ void jit_compile(CELL quot, bool relocate) /* Intrinsics */ if(word->subprimitive != F) - { - REGISTER_UNTAGGED(word); - if(array_nth(untag_object(word->subprimitive),1) != F) - jit_add_literal(&jit,T); - UNREGISTER_UNTAGGED(word); - - jit_emit(&jit,word->subprimitive); - } + jit_emit_subprimitive(&jit,word); /* The (execute) primitive is special-cased */ else if(obj == userenv[JIT_EXECUTE_WORD]) { diff --git a/vm/run.h b/vm/run.h index ba183fb6d4..2e15365dbd 100755 --- a/vm/run.h +++ b/vm/run.h @@ -48,7 +48,7 @@ typedef enum { JIT_RETURN, JIT_PROFILING, JIT_PUSH_IMMEDIATE, - JIT_DECLARE_WORD = 42, + JIT_DECLARE_WORD, JIT_SAVE_STACK, JIT_DIP_WORD, JIT_DIP, @@ -60,6 +60,15 @@ typedef enum { JIT_EXECUTE_JUMP, JIT_EXECUTE_CALL, + /* Used by polymorphic inline cache generation in inline_cache.c */ + PIC_TAG = 53, + PIC_HI_TAG, + PIC_TUPLE, + PIC_HI_TAG_TUPLE, + PIC_CHECK, + PIC_HIT, + PIC_MISS_WORD, + STACK_TRACES_ENV = 59, UNDEFINED_ENV = 60, /* default quotation for undefined words */ diff --git a/vm/types.c b/vm/types.c index 64f545dec5..b5981dc3b1 100755 --- a/vm/types.c +++ b/vm/types.c @@ -172,7 +172,8 @@ CELL allot_array_4(CELL v1, CELL v2, CELL v3, CELL v4) F_ARRAY *reallot_array(F_ARRAY* array, CELL capacity) { #ifdef FACTOR_DEBUG - assert(untag_header(array->header) == ARRAY_TYPE); + CELL header = untag_header(array->header); + assert(header == ARRAY_TYPE || header == BIGNUM_TYPE); #endif CELL to_copy = array_capacity(array);