Working on inline caching

db4
Slava Pestov 2009-04-28 03:48:37 -05:00
parent d5b51df7e7
commit 4d10105802
17 changed files with 385 additions and 56 deletions

View File

@ -40,6 +40,7 @@ DLL_OBJS = $(PLAF_DLL_OBJS) \
vm/errors.o \
vm/factor.o \
vm/image.o \
vm/inline_cache.o \
vm/io.o \
vm/jit.o \
vm/math.o \

View File

@ -5,10 +5,10 @@ hashtables.private io io.binary io.files io.encodings.binary
io.pathnames kernel kernel.private math namespaces make parser
prettyprint sequences sequences.private strings sbufs vectors words
quotations assocs system layouts splitting grouping growable classes
classes.builtin classes.tuple classes.tuple.private
vocabs vocabs.loader source-files definitions debugger
quotations.private sequences.private combinators math.order
math.private accessors slots.private compiler.units compiler.constants
classes.builtin classes.tuple classes.tuple.private vocabs
vocabs.loader source-files definitions debugger quotations.private
sequences.private combinators math.order math.private accessors
slots.private generic.single.private compiler.units compiler.constants
fry ;
IN: bootstrap.image
@ -162,6 +162,15 @@ SYMBOL: jit-profiling
SYMBOL: jit-declare-word
SYMBOL: jit-save-stack
! PIC stubs
SYMBOL: pic-tag
SYMBOL: pic-hi-tag
SYMBOL: pic-tuple
SYMBOL: pic-hi-tag-tuple
SYMBOL: pic-check
SYMBOL: pic-hit
SYMBOL: pic-miss-word
! Default definition for undefined words
SYMBOL: undefined-quot
@ -184,17 +193,24 @@ SYMBOL: undefined-quot
{ jit-return 34 }
{ jit-profiling 35 }
{ jit-push-immediate 36 }
{ jit-declare-word 42 }
{ jit-save-stack 43 }
{ jit-dip-word 44 }
{ jit-dip 45 }
{ jit-2dip-word 46 }
{ jit-2dip 47 }
{ jit-3dip-word 48 }
{ jit-3dip 49 }
{ jit-execute-word 50 }
{ jit-execute-jump 51 }
{ jit-execute-call 52 }
{ jit-declare-word 37 }
{ jit-save-stack 38 }
{ jit-dip-word 39 }
{ jit-dip 40 }
{ jit-2dip-word 41 }
{ jit-2dip 42 }
{ jit-3dip-word 43 }
{ jit-3dip 44 }
{ jit-execute-word 45 }
{ jit-execute-jump 46 }
{ jit-execute-call 47 }
{ pic-tag 48 }
{ pic-hi-tag 49 }
{ pic-tuple 50 }
{ pic-hi-tag-tuple 51 }
{ pic-check 52 }
{ pic-hit 53 }
{ pic-miss-word 54 }
{ undefined-quot 60 }
} ; inline
@ -509,6 +525,7 @@ M: quotation '
\ 2dip jit-2dip-word set
\ 3dip jit-3dip-word set
\ (execute) jit-execute-word set
\ inline-cache-miss \ pic-miss-word set
[ undefined ] undefined-quot set
{
jit-code-format
@ -537,6 +554,13 @@ M: quotation '
jit-profiling
jit-declare-word
jit-save-stack
pic-tag
pic-hi-tag
pic-tuple
pic-hi-tag-tuple
pic-check
pic-hit
pic-miss-word
undefined-quot
} [ emit-userenv ] each ;

View File

@ -321,10 +321,11 @@ M: label CALL (CALL) label-fixup ;
M: operand CALL { BIN: 010 t HEX: ff } 1-operand ;
GENERIC# JUMPcc 1 ( addr opcode -- )
: (JUMPcc) ( n -- rel-class ) extended-opcode, 0 4, rc-relative ;
M: f JUMPcc nip (JUMPcc) drop ;
M: callable JUMPcc (JUMPcc) rel-word ;
M: label JUMPcc (JUMPcc) label-fixup ;
: (JUMPcc) ( addr n -- rel-class ) extended-opcode, 4, rc-relative ;
M: f JUMPcc [ 0 ] dip (JUMPcc) 2drop ;
M: integer JUMPcc (JUMPcc) drop ;
M: callable JUMPcc [ 0 ] dip (JUMPcc) rel-word ;
M: label JUMPcc [ 0 ] dip (JUMPcc) label-fixup ;
: JO ( dst -- ) HEX: 80 JUMPcc ;
: JNO ( dst -- ) HEX: 81 JUMPcc ;
@ -382,6 +383,10 @@ GENERIC: CMP ( dst src -- )
M: immediate CMP swap { BIN: 111 t HEX: 80 } immediate-1/4 ;
M: operand CMP OCT: 070 2-operand ;
GENERIC: TEST ( dst src -- )
M: immediate TEST swap { BIN: 101 t HEX: 84 } immediate-1/4 ;
M: operand TEST OCT: 204 2-operand ;
: XCHG ( dst src -- ) OCT: 207 2-operand ;
: BSR ( dst src -- ) swap { HEX: 0f HEX: bd } (2-operand) ;

View File

@ -1,9 +1,9 @@
! Copyright (C) 2007, 2008 Slava Pestov.
! Copyright (C) 2007, 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: bootstrap.image.private kernel kernel.private namespaces
system cpu.x86.assembler layouts compiler.units math
math.private compiler.constants vocabs slots.private words
locals.backend ;
locals.backend make sequences combinators ;
IN: bootstrap.x86
big-endian off
@ -170,7 +170,57 @@ big-endian off
[ 0 RET ] jit-return jit-define
! Sub-primitives
! ! ! Polymorphic inline caches
! The 'make' trick lets us compute the jump distance for the conditional branches there
! Tag
[
ds-reg bootstrap-cell SUB
temp0 tag-bits get AND
] pic-tag jit-define
! Hi-tag
[
ds-reg bootstrap-cell SUB
temp0 object tag-number TEST
[ temp0 temp0 object tag-number neg [+] MOV ] { } make
[ length JNE ] [ % ] bi
] pic-hi-tag jit-define
! Tuple
[
ds-reg bootstrap-cell SUB
temp0 tuple tag-number TEST
[ temp0 temp0 tuple tag-number neg bootstrap-cell + [+] MOV ] { } make
[ length JNE ] [ % ] bi
] pic-tuple jit-define
! Hi-tag and tuple
[
ds-reg bootstrap-cell SUB
! If bits 2 and 3 are set, the tag is either 6 (object) or 7 (tuple)
temp0 6 TEST
[
temp1 temp0 MOV
! Make temp0 untagged
temp0 tag-mask get bitnot AND
! Set temp1 to 0 for objects, and 4 or 8 for tuples
temp1 1 AND
bootstrap-cell {
{ 4 [ temp1 2 SHL ] }
{ 8 [ temp1 3 SHL ] }
} case
! Load header cell or tuple layout cell
temp0 temp0 temp1 [+] MOV
] [ ] make [ length JNE ] [ % ] bi
] pic-hi-tag-tuple jit-define
[ temp0 HEX: ffffffff CMP ] pic-check jit-define
[ f JE rc-relative rt-xt jit-rel ] pic-hit jit-define
! ! ! Sub-primitives
! Quotations and words
[

View File

@ -533,6 +533,7 @@ tuple
{ "load-locals" "locals.backend" (( ... n -- )) }
{ "check-datastack" "kernel.private" (( array in# out# -- ? )) }
{ "lookup-method" "generic.single.private" (( object methods method-cache -- method )) }
{ "inline-cache-miss" "generic.single.private" (( generic methods -- )) }
} [ [ first3 ] dip swap make-primitive ] each-index
! Bump build number

View File

@ -427,7 +427,7 @@ F_CODE_BLOCK *add_code_block(
#ifdef FACTOR_DEBUG
type_check(ARRAY_TYPE,literals);
type_check(BYTE_ARRAY_TYPE,relocation);
assert(hi_tag(code) == ARRAY_TYPE);
assert(untag_header(code->header) == ARRAY_TYPE);
#endif
CELL code_format = compiled_code_format();

View File

@ -60,7 +60,7 @@ DEF(F_FASTCALL void,throw_impl,(CELL quot, F_STACK_FRAME *rewind_to)):
mov ARG1,STACK_REG
jmp *QUOT_XT_OFFSET(ARG0)
DEF(FASTCALL void,lazy_jit_compile,(CELL quot)):
DEF(F_FASTCALL void,lazy_jit_compile,(CELL quot)):
mov STACK_REG,ARG1 /* Save stack pointer */
sub $STACK_PADDING,STACK_REG
call MANGLE(lazy_jit_compile_impl)
@ -68,6 +68,10 @@ DEF(FASTCALL void,lazy_jit_compile,(CELL quot)):
add $STACK_PADDING,STACK_REG
jmp *QUOT_XT_OFFSET(ARG0) /* Call the quotation */
DEF(F_FASTCALL void,primitive_inline_cache_miss,(void)):
mov (STACK_REG),ARG0
jmp MANGLE(inline_cache_miss)
#ifdef WINDOWS
.section .drectve
.ascii " -export:c_to_factor"

View File

@ -1,3 +1,5 @@
#include <assert.h>
#define FRAME_RETURN_ADDRESS(frame) *(XT *)(frame_successor(frame) + 1)
INLINE void flush_icache(CELL start, CELL len) {}
@ -7,3 +9,16 @@ F_FASTCALL void throw_impl(CELL quot, F_STACK_FRAME *rewind_to);
F_FASTCALL void lazy_jit_compile(CELL quot);
void set_callstack(F_STACK_FRAME *to, F_STACK_FRAME *from, CELL length, void *memcpy);
INLINE void set_call_site(CELL return_address, CELL target)
{
/* An x86 CALL instruction looks like so:
|e8|..|..|..|..|
where the ... are a PC-relative jump address.
The return_address points to right after the
instruction. */
#ifdef FACTOR_DEBUG
assert(*(unsigned char *)(return_address - 5) == 0xe8);
#endif
*(F_FIXNUM *)(return_address - 4) = (target - (return_address - 4));
}

View File

@ -101,11 +101,32 @@ static void update_method_cache(CELL key, CELL method_cache, CELL method)
set_array_nth(array,hashcode + 1,method);
}
static CELL lookup_method(CELL object, CELL methods, CELL method_cache)
static CELL lookup_hairy_method(CELL object, CELL methods)
{
CELL method = array_nth(untag_object(methods),TAG(object));
if(type_of(method) == WORD_TYPE)
return method;
else
{
switch(TAG(object))
{
case TUPLE_TYPE:
return lookup_tuple_method(object,method);
break;
case OBJECT_TYPE:
return lookup_hi_tag_method(object,method);
break;
default:
critical_error("Bad methods array",methods);
return -1;
}
}
}
static CELL lookup_method_with_cache(CELL object, CELL methods, CELL method_cache)
{
F_ARRAY *tag_methods = untag_object(methods);
if(!HI_TAG_OR_TUPLE_P(object))
return array_nth(tag_methods,TAG(object));
return array_nth(untag_object(methods),TAG(object));
else
{
CELL key = get(HI_TAG_HEADER(object));
@ -114,23 +135,7 @@ static CELL lookup_method(CELL object, CELL methods, CELL method_cache)
return method;
else
{
method = array_nth(tag_methods,TAG(object));
if(type_of(method) != WORD_TYPE)
{
switch(TAG(object))
{
case TUPLE_TYPE:
method = lookup_tuple_method(object,method);
break;
case OBJECT_TYPE:
method = lookup_hi_tag_method(object,method);
break;
default:
critical_error("Bad methods array",methods);
break;
}
}
method = lookup_hairy_method(object,methods);
update_method_cache(key,method_cache,method);
return method;
}
@ -143,5 +148,23 @@ void primitive_lookup_method(void)
CELL methods = get(ds - CELLS);
CELL object = get(ds - CELLS * 2);
ds -= CELLS * 2;
drepl(lookup_method(object,methods,method_cache));
drepl(lookup_method_with_cache(object,methods,method_cache));
}
/* Next two functions are used for polymorphic inline caching */
CELL object_class(CELL object)
{
if(!HI_TAG_OR_TUPLE_P(object))
return tag_fixnum(TAG(object));
else
return get(HI_TAG_HEADER(object));
}
CELL lookup_method(CELL object, CELL methods)
{
if(!HI_TAG_OR_TUPLE_P(object))
return array_nth(untag_object(methods),TAG(object));
else
return lookup_hairy_method(object,methods);
}

182
vm/inline_cache.c Normal file
View File

@ -0,0 +1,182 @@
#include "master.h"
/* Figure out what kind of type check the PIC needs based on the methods
it contains */
static CELL determine_inline_cache_type(CELL cache_entries)
{
F_ARRAY *array = untag_object(cache_entries);
bool seen_hi_tag = false, seen_tuple = false;
CELL i;
for(i = 0; i < array_capacity(array); i += 2)
{
CELL class = array_nth(array,i);
F_FIXNUM type;
/* Is it a tuple layout? */
switch(type_of(class))
{
case FIXNUM_TYPE:
type = untag_fixnum_fast(class);
if(type >= HEADER_TYPE)
seen_hi_tag = true;
break;
case ARRAY_TYPE:
seen_tuple = true;
break;
default:
critical_error("Expected a fixnum or array",class);
break;
}
}
if(seen_hi_tag && seen_tuple) return PIC_HI_TAG_TUPLE;
if(seen_hi_tag && !seen_tuple) return PIC_HI_TAG;
if(!seen_hi_tag && seen_tuple) return PIC_TUPLE;
if(!seen_hi_tag && !seen_tuple) return PIC_TAG;
critical_error("Oops",0);
return -1;
}
/* picker: one of dup, over, pick
cache_entries: array of class/method pairs */
static F_CODE_BLOCK *compile_inline_cache(CELL picker, CELL generic_word, CELL cache_entries)
{
REGISTER_ROOT(picker);
REGISTER_ROOT(generic_word);
REGISTER_ROOT(cache_entries);
F_JIT jit;
jit_init(&jit,WORD_TYPE,generic_word);
/* Generate machine code to determine the object's class. */
jit_emit_subprimitive(&jit,untag_object(picker));
jit_emit(&jit,userenv[determine_inline_cache_type(cache_entries)]);
/* Generate machine code to check, in turn, if the class is one of the cached entries. */
CELL i;
for(i = 0; i < array_capacity(untag_object(cache_entries)); i += 2)
{
/* Class equal? */
CELL class = array_nth(untag_object(cache_entries),i);
jit_emit_with(&jit,userenv[PIC_CHECK],class);
/* Yes? Jump to method */
CELL method = array_nth(untag_object(cache_entries),i + 1);
jit_emit_with(&jit,userenv[PIC_HIT],method);
}
/* Generate machine code to handle a cache miss, which ultimately results in
this function being called again.
The inline-cache-miss primitive call receives enough information to
reconstruct the PIC. We also execute the picker again, so that the
object being dispatched on can be popped from the top of the stack. */
jit_emit_subprimitive(&jit,untag_object(picker));
jit_push(&jit,generic_word);
jit_push(&jit,cache_entries);
jit_word_jump(&jit,userenv[PIC_MISS_WORD]);
F_CODE_BLOCK *code = jit_make_code_block(&jit);
jit_dispose(&jit);
UNREGISTER_ROOT(cache_entries);
UNREGISTER_ROOT(generic_word);
UNREGISTER_ROOT(picker);
return code;
}
/* A generic word's definition performs general method lookup. Allocates memory */
static F_CODE_BLOCK *megamorphic_call_stub(CELL generic_word)
{
F_WORD *word = untag_word(generic_word);
REGISTER_UNTAGGED(word);
jit_compile(word->def,true);
UNREGISTER_UNTAGGED(word);
return untag_quotation(word->def)->code;
}
/* Assumes that generic word definitions look like:
[ <picker> <methods> <cache> lookup-method (execute) ]
*/
static void examine_generic_word(CELL generic_word, CELL *picker, CELL *all_methods)
{
CELL def = untag_word(generic_word)->def;
F_QUOTATION *quot = untag_quotation(def);
F_ARRAY *array = untag_object(quot->array);
#ifdef FACTOR_DEBUG
assert(array_capacity(array) == 5);
type_check(WORD_TYPE,array_nth(array,0));
type_check(ARRAY_TYPE,array_nth(array,1));
type_check(ARRAY_TYPE,array_nth(array,2));
type_check(WORD_TYPE,array_nth(array,3));
type_check(WORD_TYPE,array_nth(array,4));
#endif
*picker = array_nth(array,0);
*all_methods = array_nth(array,1);
}
/* Allocates memory */
static CELL add_inline_cache_entry(CELL cache_entries, CELL class, CELL method)
{
F_ARRAY *cache_entries_array = untag_object(cache_entries);
CELL pic_size = array_capacity(cache_entries_array);
cache_entries_array = reallot_array(cache_entries_array,pic_size + 2);
set_array_nth(cache_entries_array,pic_size,class);
set_array_nth(cache_entries_array,pic_size + 1,method);
return tag_object(cache_entries_array);
}
/* The cache_entries parameter is either f (on cold call site) or an array (on cache miss).
Called from assembly with the actual return address */
F_FASTCALL XT inline_cache_miss(CELL return_address)
{
CELL cache_entries = dpop();
CELL generic_word = dpop();
CELL object = dpop();
F_CODE_BLOCK *block;
CELL pic_size = (cache_entries == F ? 0 : array_capacity(untag_array(cache_entries)));
if(pic_size >= max_pic_size)
block = megamorphic_call_stub(generic_word);
else
{
CELL picker, all_methods;
examine_generic_word(generic_word,&picker,&all_methods);
REGISTER_ROOT(generic_word);
REGISTER_ROOT(cache_entries);
REGISTER_ROOT(picker);
REGISTER_ROOT(all_methods);
/* Find the right method. */
CELL class = object_class(object);
CELL method = lookup_method(object,all_methods);
/* Add a new entry to the PIC. */
if(cache_entries == F)
cache_entries = allot_array_2(class,method);
else
cache_entries = add_inline_cache_entry(cache_entries,class,method);
/* Install the new PIC. */
block = compile_inline_cache(picker,generic_word,cache_entries);
UNREGISTER_ROOT(all_methods);
UNREGISTER_ROOT(picker);
UNREGISTER_ROOT(cache_entries);
UNREGISTER_ROOT(generic_word);
}
XT xt = (block + 1);
set_call_site(return_address,(CELL)xt);
return xt;
}

8
vm/inline_cache.h Normal file
View File

@ -0,0 +1,8 @@
int max_pic_size;
void primitive_inline_cache_miss(void);
F_FASTCALL XT inline_cache_miss(CELL return_address);
CELL object_class(CELL object);
CELL lookup_method(CELL object, CELL methods);

View File

@ -44,3 +44,14 @@ INLINE void jit_word_jump(F_JIT *jit, CELL word)
{
jit_emit_with(jit,userenv[JIT_WORD_JUMP],word);
}
/* Allocates memory */
INLINE void jit_emit_subprimitive(F_JIT *jit, F_WORD *word)
{
REGISTER_UNTAGGED(word);
if(array_nth(untag_object(word->subprimitive),1) != F)
jit_add_literal(jit,T);
UNREGISTER_UNTAGGED(word);
jit_emit(jit,word->subprimitive);
}

View File

@ -47,6 +47,7 @@
#include "quotations.h"
#include "dispatch.h"
#include "jit.h"
#include "inline_cache.h"
#include "factor.h"
#include "utilities.h"

View File

@ -145,5 +145,6 @@ void *primitives[] = {
primitive_jit_compile,
primitive_load_locals,
primitive_check_datastack,
primitive_lookup_method
primitive_lookup_method,
primitive_inline_cache_miss,
};

View File

@ -168,14 +168,7 @@ void jit_compile(CELL quot, bool relocate)
/* Intrinsics */
if(word->subprimitive != F)
{
REGISTER_UNTAGGED(word);
if(array_nth(untag_object(word->subprimitive),1) != F)
jit_add_literal(&jit,T);
UNREGISTER_UNTAGGED(word);
jit_emit(&jit,word->subprimitive);
}
jit_emit_subprimitive(&jit,word);
/* The (execute) primitive is special-cased */
else if(obj == userenv[JIT_EXECUTE_WORD])
{

View File

@ -48,7 +48,7 @@ typedef enum {
JIT_RETURN,
JIT_PROFILING,
JIT_PUSH_IMMEDIATE,
JIT_DECLARE_WORD = 42,
JIT_DECLARE_WORD,
JIT_SAVE_STACK,
JIT_DIP_WORD,
JIT_DIP,
@ -60,6 +60,15 @@ typedef enum {
JIT_EXECUTE_JUMP,
JIT_EXECUTE_CALL,
/* Used by polymorphic inline cache generation in inline_cache.c */
PIC_TAG = 53,
PIC_HI_TAG,
PIC_TUPLE,
PIC_HI_TAG_TUPLE,
PIC_CHECK,
PIC_HIT,
PIC_MISS_WORD,
STACK_TRACES_ENV = 59,
UNDEFINED_ENV = 60, /* default quotation for undefined words */

View File

@ -172,7 +172,8 @@ CELL allot_array_4(CELL v1, CELL v2, CELL v3, CELL v4)
F_ARRAY *reallot_array(F_ARRAY* array, CELL capacity)
{
#ifdef FACTOR_DEBUG
assert(untag_header(array->header) == ARRAY_TYPE);
CELL header = untag_header(array->header);
assert(header == ARRAY_TYPE || header == BIGNUM_TYPE);
#endif
CELL to_copy = array_capacity(array);