Faster inline allocators

db4
Slava Pestov 2008-04-19 04:52:34 -05:00
parent 942313afa3
commit f48d5091c9
14 changed files with 77 additions and 31 deletions

View File

@ -187,6 +187,9 @@ HOOK: %unbox-any-c-ptr cpu ( dst src -- )
HOOK: %box-alien cpu ( dst src -- )
! GC check
HOOK: %gc cpu
: operand ( var -- op ) get v>operand ; inline
: unique-operands ( operands quot -- )

View File

@ -7,7 +7,7 @@ cpu.architecture alien ;
IN: cpu.ppc.allot
: load-zone-ptr ( reg -- )
"nursery" f pick %load-dlsym dup 0 LWZ ;
"nursery" f pick %load-dlsym ;
: %allot ( header size -- )
#! Store a pointer to 'size' bytes allocated from the
@ -25,6 +25,19 @@ IN: cpu.ppc.allot
: %store-tagged ( reg tag -- )
>r dup fresh-object v>operand 11 r> tag-number ORI ;
M: ppc %gc
"end" define-label
12 load-zone-ptr
11 12 cell LWZ ! nursery.here -> r11
12 12 3 cells LWZ ! nursery.end -> r12
11 12 1024 ADDI ! add ALLOT_BUFFER_ZONE to here
0 11 12 CMPI ! is here >= end?
"end" get BLE
0 frame-required
%prepare-alien-invoke
"minor_gc" f %alien-invoke
"end" resolve-label ;
: %allot-float ( reg -- )
#! exits with tagged ptr to object in r12, untagged in r11
float 16 %allot

View File

@ -17,6 +17,8 @@ M: x86.32 ds-reg ESI ;
M: x86.32 rs-reg EDI ;
M: x86.32 stack-reg ESP ;
M: x86.32 stack-save-reg EDX ;
M: x86.32 temp-reg-1 EAX ;
M: x86.32 temp-reg-2 ECX ;
M: temp-reg v>operand drop EBX ;

View File

@ -12,6 +12,8 @@ M: x86.64 ds-reg R14 ;
M: x86.64 rs-reg R15 ;
M: x86.64 stack-reg RSP ;
M: x86.64 stack-save-reg RSI ;
M: x86.64 temp-reg-1 RAX ;
M: x86.64 temp-reg-2 RCX ;
M: temp-reg v>operand drop RBX ;

View File

@ -16,12 +16,12 @@ IN: cpu.x86.allot
: object@ ( n -- operand ) cells (object@) ;
: load-zone-ptr ( -- )
: load-zone-ptr ( reg -- )
#! Load pointer to start of zone array
"nursery" f allot-reg %alien-global ;
0 MOV "nursery" f rc-absolute-cell rel-dlsym ;
: load-allot-ptr ( -- )
load-zone-ptr
allot-reg load-zone-ptr
allot-reg PUSH
allot-reg dup cell [+] MOV ;
@ -29,6 +29,19 @@ IN: cpu.x86.allot
allot-reg POP
allot-reg cell [+] swap 8 align ADD ;
M: x86.32 %gc ( -- )
"end" define-label
temp-reg-1 load-zone-ptr
temp-reg-2 temp-reg-1 cell [+] MOV
temp-reg-2 1024 ADD
temp-reg-1 temp-reg-1 3 cells [+] MOV
temp-reg-2 temp-reg-1 CMP
"end" get JLE
0 frame-required
%prepare-alien-invoke
"minor_gc" f %alien-invoke
"end" resolve-label ;
: store-header ( header -- )
0 object@ swap type-number tag-fixnum MOV ;

View File

@ -34,6 +34,10 @@ GENERIC: push-return-reg ( reg-class -- )
GENERIC: load-return-reg ( stack@ reg-class -- )
GENERIC: store-return-reg ( stack@ reg-class -- )
! Only used by inline allocation
HOOK: temp-reg-1 cpu
HOOK: temp-reg-2 cpu
HOOK: address-operand cpu ( address -- operand )
HOOK: fixnum>slot@ cpu

View File

@ -468,11 +468,6 @@ M: loc lazy-store
: finalize-contents ( -- )
finalize-locs finalize-vregs reset-phantoms ;
: %gc ( -- )
0 frame-required
%prepare-alien-invoke
"simple_gc" f %alien-invoke ;
! Loading stacks to vregs
: free-vregs? ( int# float# -- ? )
double-float-regs free-vregs length <=

View File

@ -305,6 +305,11 @@ cell-bits 32 = [
] unit-test
] when
[ f ] [
[ { integer } declare -63 shift 4095 bitand ]
\ shift inlined?
] unit-test
[ t ] [
[ B{ 1 0 } *short 0 number= ]
\ number= inlined?
@ -557,6 +562,11 @@ M: integer detect-integer ;
] { * + shift rem mod fixnum-mod fixnum* fixnum+ fixnum- >fixnum } inlined?
] unit-test
[ t ] [
[ { integer } declare bitnot detect-integer ]
\ detect-integer inlined?
] unit-test
! Later
! [ t ] [

View File

@ -122,7 +122,7 @@ M: float fp-nan?
: next-power-of-2 ( m -- n ) 2 swap (next-power-of-2) ; foldable
: power-of-2? ( n -- ? )
dup 0 < [ drop f ] [ dup 1- bitand zero? ] if ; foldable
dup 0 <= [ drop f ] [ dup 1- bitand zero? ] if ; foldable
: align ( m w -- n )
1- [ + ] keep bitnot bitand ; inline

View File

@ -125,7 +125,8 @@ optimizer.math.partial generic.standard system accessors ;
] if ; inline
: math-output-class/interval-1 ( node word -- classes intervals )
[ drop { } math-output-class ] [ math-output-interval-1 ] 2bi ;
[ drop { } math-output-class 1array ]
[ math-output-interval-1 1array ] 2bi ;
{
{ bitnot interval-bitnot }
@ -362,7 +363,6 @@ most-negative-fixnum most-positive-fixnum [a,b]
{ + [ [ >fixnum ] bi@ fixnum+fast ] }
{ - [ [ >fixnum ] bi@ fixnum-fast ] }
{ * [ [ >fixnum ] bi@ fixnum*fast ] }
{ shift [ [ >fixnum ] bi@ fixnum-shift-fast ] }
} [
>r derived-ops r> [
[

View File

@ -122,7 +122,7 @@ void clear_cards(CELL from, CELL to)
void set_data_heap(F_DATA_HEAP *data_heap_)
{
data_heap = data_heap_;
nursery = &data_heap->generations[NURSERY];
nursery = data_heap->generations[NURSERY];
init_cards_offset();
clear_cards(NURSERY,TENURED);
}
@ -231,7 +231,7 @@ DEFINE_PRIMITIVE(data_room)
for(gen = 0; gen < data_heap->gen_count; gen++)
{
F_ZONE *z = &data_heap->generations[gen];
F_ZONE *z = (gen == NURSERY ? &nursery : &data_heap->generations[gen]);
set_array_nth(a,gen * 2,tag_fixnum((z->end - z->here) >> 10));
set_array_nth(a,gen * 2 + 1,tag_fixnum((z->size) >> 10));
}
@ -583,7 +583,7 @@ CELL collect_next(CELL scan)
INLINE void reset_generation(CELL i)
{
F_ZONE *z = &data_heap->generations[i];
F_ZONE *z = (i == NURSERY ? &nursery : &data_heap->generations[i]);
z->here = z->start;
if(secure_gc)
memset((void*)z->start,69,z->size);
@ -608,7 +608,7 @@ void begin_gc(CELL requested_bytes)
old_data_heap = data_heap;
set_data_heap(grow_data_heap(old_data_heap,requested_bytes));
newspace = &data_heap->generations[collecting_gen];
newspace = &data_heap->generations[TENURED];
}
else if(collecting_accumulation_gen_p())
{
@ -783,6 +783,11 @@ void gc(void)
garbage_collection(TENURED,false,0);
}
void minor_gc(void)
{
garbage_collection(NURSERY,false,0);
}
DEFINE_PRIMITIVE(gc)
{
gc();
@ -794,12 +799,6 @@ DEFINE_PRIMITIVE(gc_time)
box_unsigned_8(gc_time);
}
void simple_gc(void)
{
if(nursery->here + ALLOT_BUFFER_ZONE > nursery->end)
garbage_collection(NURSERY,false,0);
}
DEFINE_PRIMITIVE(become)
{
F_ARRAY *new_objects = untag_array(dpop());

View File

@ -20,6 +20,7 @@ DECLARE_PRIMITIVE(next_object);
DECLARE_PRIMITIVE(end_scan);
void gc(void);
DLLEXPORT void minor_gc(void);
/* generational copying GC divides memory into zones */
typedef struct {
@ -125,7 +126,7 @@ void collect_cards(void);
F_ZONE *newspace;
/* new objects are allocated here */
DLLEXPORT F_ZONE *nursery;
DLLEXPORT F_ZONE nursery;
INLINE bool in_zone(F_ZONE *z, CELL pointer)
{
@ -200,7 +201,7 @@ INLINE bool should_copy(CELL untagged)
else if(HAVE_AGING_P && collecting_gen == AGING)
return !in_zone(&data_heap->generations[TENURED],untagged);
else if(HAVE_NURSERY_P && collecting_gen == NURSERY)
return in_zone(&data_heap->generations[NURSERY],untagged);
return in_zone(&nursery,untagged);
else
{
critical_error("Bug in should_copy",untagged);
@ -315,13 +316,15 @@ INLINE void* allot_object(CELL type, CELL a)
{
CELL *object;
if(HAVE_NURSERY_P && nursery->size - ALLOT_BUFFER_ZONE > a)
if(HAVE_NURSERY_P && nursery.size - ALLOT_BUFFER_ZONE > a)
{
/* If there is insufficient room, collect the nursery */
if(nursery->here + ALLOT_BUFFER_ZONE + a > nursery->end)
if(nursery.here + ALLOT_BUFFER_ZONE + a > nursery.end)
garbage_collection(NURSERY,false,0);
object = allot_zone(nursery,a);
CELL h = nursery.here;
nursery.here = h + align8(a);
object = (void*)h;
}
/* If the object is bigger than the nursery, allocate it in
tenured space */
@ -360,8 +363,6 @@ INLINE void* allot_object(CELL type, CELL a)
CELL collect_next(CELL scan);
DLLEXPORT void simple_gc(void);
DECLARE_PRIMITIVE(gc);
DECLARE_PRIMITIVE(gc_time);
DECLARE_PRIMITIVE(become);

View File

@ -227,7 +227,11 @@ void dump_zone(F_ZONE *z)
void dump_generations(void)
{
int i;
for(i = 0; i < data_heap->gen_count; i++)
printf("Nursery: ");
dump_zone(&nursery);
for(i = 1; i < data_heap->gen_count; i++)
{
printf("Generation %d: ",i);
dump_zone(&data_heap->generations[i]);

View File

@ -96,7 +96,7 @@ void memory_protection_error(CELL addr, F_STACK_FRAME *native_stack)
general_error(ERROR_RS_UNDERFLOW,F,F,native_stack);
else if(in_page(addr, rs_bot, rs_size, 0))
general_error(ERROR_RS_OVERFLOW,F,F,native_stack);
else if(in_page(addr, nursery->end, 0, 0))
else if(in_page(addr, nursery.end, 0, 0))
critical_error("allot_object() missed GC check",0);
else if(in_page(addr, gc_locals_region->start, 0, -1))
critical_error("gc locals underflow",0);