diff --git a/TODO.FACTOR.txt b/TODO.FACTOR.txt index a4eb4c3d3a..6587e10036 100644 --- a/TODO.FACTOR.txt +++ b/TODO.FACTOR.txt @@ -1,13 +1,13 @@ + allot refactoring: - rethink all string conversions -- what if retain stack is full - - ie, inside retain stack overflow handler, don't cons? - os-windows.c error_message &co - inline float allocation needs a gc check - alien invoke, callback need a gc check - relocation should not cons at all - ffi_dlopen, etc +- throwing an error can fill up the extra_roots stack +- last-index miscompiles + ui: @@ -49,6 +49,7 @@ + compiler/ffi: +- we may be able to remove the dlsym primitive - [ [ dup call ] dup call ] infer hangs - stdcall callbacks - callstack overflow when compiling mutually recursive inline words @@ -77,6 +78,7 @@ + misc: +- don't save big free chunk at the end of the code heap - faster apropos - growable data heap - minor GC takes too long now, we should card mark code heap diff --git a/vm/alien.c b/vm/alien.c index 620bbd8025..09ee7fc4c7 100644 --- a/vm/alien.c +++ b/vm/alien.c @@ -152,6 +152,7 @@ void box_value_pair(CELL x, CELL y) dpush(tag_object(array)); } +/* open a native library and push a handle */ void primitive_dlopen(void) { F_DLL* dll = allot_object(DLL_TYPE,sizeof(F_DLL)); @@ -160,6 +161,7 @@ void primitive_dlopen(void) dpush(tag_object(dll)); } +/* look up a symbol in a native library */ void primitive_dlsym(void) { CELL dll = dpop(); @@ -178,6 +180,7 @@ void primitive_dlsym(void) box_signed_cell((CELL)ffi_dlsym(d,sym,true)); } +/* close a native library handle */ void primitive_dlclose(void) { ffi_dlclose(untag_dll(dpop())); diff --git a/vm/code_gc.c b/vm/code_gc.c index 0383f12258..6bc38c0374 100644 --- a/vm/code_gc.c +++ b/vm/code_gc.c @@ -12,11 +12,14 @@ void new_heap(F_HEAP *heap, CELL size) heap->free_list = NULL; } +/* Allocate a code heap during startup */ void init_code_heap(CELL size) { new_heap(&compiling,size); } +/* If there is no previous block, next_free becomes the head of the free list, +else its linked in */ INLINE void update_free_list(F_HEAP *heap, F_BLOCK *prev, F_BLOCK *next_free) { if(prev) @@ -25,7 +28,7 @@ INLINE void update_free_list(F_HEAP *heap, F_BLOCK *prev, F_BLOCK *next_free) heap->free_list = next_free; } -/* called after reading the code heap from the image file. we must build the +/* Called after reading the code heap from the image file. We must build the free list, and add a large free block from compiling.base + size to compiling.limit. */ void build_free_list(F_HEAP *heap, CELL size) @@ -34,6 +37,7 @@ void build_free_list(F_HEAP *heap, CELL size) F_BLOCK *scan = (F_BLOCK *)heap->base; F_BLOCK *end = (F_BLOCK *)(heap->base + size); + /* Add all free blocks to the free list */ while(scan && scan < end) { if(scan->status == B_FREE) @@ -45,6 +49,7 @@ void build_free_list(F_HEAP *heap, CELL size) scan = next_block(heap,scan); } + /* If there is room at the end of the heap, add a free block */ if((CELL)(end + 1) <= heap->limit) { end->status = B_FREE; @@ -62,6 +67,7 @@ void build_free_list(F_HEAP *heap, CELL size) update_free_list(heap,prev,end); } +/* Allocate a block of memory from the mark and sweep GC heap */ CELL heap_allot(F_HEAP *heap, CELL size) { F_BLOCK *prev = NULL; @@ -151,6 +157,7 @@ void free_unmarked(F_HEAP *heap) build_free_list(heap,heap->limit - heap->base); } +/* Compute total sum of sizes of free blocks */ CELL heap_free_space(F_HEAP *heap) { CELL size = 0; @@ -175,6 +182,7 @@ CELL heap_size(F_HEAP *heap) return (CELL)scan - (CELL)start; } +/* Apply a function to every code block */ void iterate_code_heap(CODE_HEAP_ITERATOR iter) { F_BLOCK *scan = (F_BLOCK *)compiling.base; @@ -187,6 +195,7 @@ void iterate_code_heap(CODE_HEAP_ITERATOR iter) } } +/* Copy all literals referenced from a code block to newspace */ void collect_literals_step(F_COMPILED *relocating, CELL code_start, CELL reloc_start, CELL literal_start, CELL words_start, CELL words_end) { @@ -197,6 +206,8 @@ void collect_literals_step(F_COMPILED *relocating, CELL code_start, for(scan = literal_start; scan < literal_end; scan += CELLS) copy_handle((CELL*)scan); + /* If the block is not finalized, the words area contains pointers to + words in the data heap rather than XTs in the code heap */ if(!relocating->finalized) { for(scan = words_start; scan < words_end; scan += CELLS) @@ -204,11 +215,13 @@ void collect_literals_step(F_COMPILED *relocating, CELL code_start, } } +/* Copy literals referenced from all code blocks to newspace */ void collect_literals(void) { iterate_code_heap(collect_literals_step); } +/* Mark all XTs referenced from a code block */ void mark_sweep_step(F_COMPILED *compiled, CELL code_start, CELL reloc_start, CELL literal_start, CELL words_start, CELL words_end) { @@ -221,14 +234,18 @@ void mark_sweep_step(F_COMPILED *compiled, CELL code_start, } } +/* Mark all XTs and literals referenced from a word XT */ void recursive_mark(CELL xt) { F_BLOCK *block = xt_to_block(xt); + /* If already marked, do nothing */ if(block->status == B_MARKED) return; + /* Mark it */ else if(block->status == B_ALLOCATED) block->status = B_MARKED; + /* We should never be asked to mark a free block */ else critical_error("Marking the wrong block",(CELL)block); @@ -237,17 +254,20 @@ void recursive_mark(CELL xt) iterate_code_heap_step(compiled,mark_sweep_step); } +/* Push the free space and total size of the code heap */ void primitive_code_room(void) { box_unsigned_cell(heap_free_space(&compiling)); box_unsigned_cell(compiling.limit - compiling.base); } +/* Perform a code GC */ void primitive_code_gc(void) { garbage_collection(TENURED,true); } +/* Dump all code blocks for debugging */ void dump_heap(F_HEAP *heap) { F_BLOCK *scan = (F_BLOCK *)heap->base; diff --git a/vm/compiler.c b/vm/compiler.c index be0aee5174..65c5d929c0 100644 --- a/vm/compiler.c +++ b/vm/compiler.c @@ -1,5 +1,7 @@ #include "factor.h" +/* References to undefined symbols are patched up to call this function on +image load */ void undefined_symbol(void) { general_error(ERROR_UNDEFINED_SYMBOL,F,F,true); @@ -12,6 +14,7 @@ INLINE CELL get_literal(CELL literal_start, CELL num) return get(CREF(literal_start,num)); } +/* Look up an external library symbol referenced by a compiled code block */ CELL get_rel_symbol(F_REL *rel, CELL literal_start) { CELL arg = REL_ARGUMENT(rel); @@ -31,6 +34,7 @@ CELL get_rel_symbol(F_REL *rel, CELL literal_start) return sym; } +/* Compute an address to store at a relocation */ INLINE CELL compute_code_rel(F_REL *rel, CELL code_start, CELL literal_start, CELL words_start) { @@ -58,12 +62,14 @@ INLINE CELL compute_code_rel(F_REL *rel, } } +/* Store a 32-bit value into two consecutive PowerPC LI/LIS instructions */ INLINE void reloc_set_2_2(CELL cell, CELL value) { put(cell - CELLS,((get(cell - CELLS) & ~0xffff) | ((value >> 16) & 0xffff))); put(cell,((get(cell) & ~0xffff) | (value & 0xffff))); } +/* Store a value into a bitfield of a PowerPC instruction */ INLINE void reloc_set_masked(CELL cell, CELL value, CELL mask) { u32 original = *(u32*)cell; @@ -71,6 +77,7 @@ INLINE void reloc_set_masked(CELL cell, CELL value, CELL mask) *(u32*)cell = (original | (value & mask)); } +/* Perform a fixup on a code block */ void apply_relocation(F_REL *rel, CELL code_start, CELL literal_start, CELL words_start) { @@ -111,17 +118,19 @@ void apply_relocation(F_REL *rel, } } +/* Perform all fixups on a code block */ void relocate_code_block(F_COMPILED *relocating, CELL code_start, CELL reloc_start, CELL literal_start, CELL words_start, CELL words_end) { F_REL *rel = (F_REL *)reloc_start; F_REL *rel_end = (F_REL *)literal_start; - /* apply relocations */ while(rel < rel_end) apply_relocation(rel++,code_start,literal_start,words_start); } +/* After compiling a batch of words, we replace all mutual word references with +direct XT references, and perform fixups */ void finalize_code_block(F_COMPILED *relocating, CELL code_start, CELL reloc_start, CELL literal_start, CELL words_start, CELL words_end) { @@ -138,6 +147,7 @@ void finalize_code_block(F_COMPILED *relocating, CELL code_start, flush_icache(code_start,reloc_start - code_start); } +/* Write a sequence of integers to memory, with 'format' bytes per integer */ void deposit_integers(CELL here, F_VECTOR *vector, CELL format) { CELL count = untag_fixnum_fast(vector->top); @@ -158,6 +168,7 @@ void deposit_integers(CELL here, F_VECTOR *vector, CELL format) } } +/* Write a sequence of tagged pointers to memory */ void deposit_objects(CELL here, F_VECTOR *vector, CELL literal_length) { F_ARRAY *array = untag_array_fast(vector->array); @@ -180,29 +191,29 @@ void primitive_add_compiled_block(void) CELL start; { - /* read parameters from stack, leaving them on the stack */ + /* Read parameters from stack, leaving them on the stack */ FROB - /* try allocating a new code block */ + /* Try allocating a new code block */ CELL total_length = sizeof(F_COMPILED) + code_length + rel_length + literal_length + words_length; start = heap_allot(&compiling,total_length); - /* if allocation failed, do a code GC */ + /* If allocation failed, do a code GC */ if(start == 0) { garbage_collection(TENURED,true); start = heap_allot(&compiling,total_length); - /* insufficient room even after code GC, give up */ + /* Insufficient room even after code GC, give up */ if(start == 0) critical_error("code heap exhausted",0); } } /* we have to read the parameters again, since we may have called - code GC in which case the data heap semi-spaces will have switched */ + GC above in which case the data heap semi-spaces will have switched */ FROB /* now we can pop the parameters from the stack */ @@ -244,6 +255,8 @@ void primitive_add_compiled_block(void) #undef FROB +/* After batch compiling a bunch of words, perform various fixups to make them +executable */ void primitive_finalize_compile(void) { F_ARRAY *array = untag_array(dpop()); diff --git a/vm/data_gc.c b/vm/data_gc.c index 94d73a050e..4e64a14472 100644 --- a/vm/data_gc.c +++ b/vm/data_gc.c @@ -1,5 +1,7 @@ #include "factor.h" +/* Test if 'fault' is in the guard page at the top or bottom (depending on +offset being 0 or -1) of area+area_size */ bool in_page(CELL fault, CELL area, CELL area_size, int offset) { const int pagesize = getpagesize(); @@ -9,6 +11,7 @@ bool in_page(CELL fault, CELL area, CELL area_size, int offset) return fault >= area && fault <= area + pagesize; } +/* If memory allocation fails, bail out */ void *safe_malloc(size_t size) { void *ptr = malloc(size); @@ -17,6 +20,7 @@ void *safe_malloc(size_t size) return ptr; } +/* Size of the object pointed to by a tagged pointer */ CELL object_size(CELL tagged) { if(tagged == F || TAG(tagged) == FIXNUM_TYPE) @@ -25,11 +29,13 @@ CELL object_size(CELL tagged) return untagged_object_size(UNTAG(tagged)); } +/* Size of the object pointed to by an untagged pointer */ CELL untagged_object_size(CELL pointer) { return align8(unaligned_object_size(pointer)); } +/* Size of the data area of an object pointed to by an untagged pointer */ CELL unaligned_object_size(CELL pointer) { switch(untag_header(get(pointer))) @@ -73,6 +79,7 @@ void primitive_size(void) drepl(tag_fixnum(object_size(dpeek()))); } +/* Push memory usage statistics in data heap */ void primitive_data_room(void) { int gen; @@ -142,7 +149,7 @@ void primitive_end_scan(void) gc_off = false; } -/* scan all the objects in the card */ +/* Scan all the objects in the card */ INLINE void collect_card(F_CARD *ptr, CELL here) { F_CARD c = *ptr; @@ -164,6 +171,7 @@ INLINE void collect_card(F_CARD *ptr, CELL here) cards_scanned++; } +/* Copy all newspace objects referenced from marked cards to the destination */ INLINE void collect_gen_cards(CELL gen) { F_CARD *ptr = ADDR_TO_CARD(generations[gen].base); @@ -180,6 +188,8 @@ INLINE void collect_gen_cards(CELL gen) } } +/* After all old->new forward references have been copied over, we must unmark +the cards */ void unmark_cards(CELL from, CELL to) { F_CARD *ptr = ADDR_TO_CARD(generations[from].base); @@ -190,6 +200,8 @@ void unmark_cards(CELL from, CELL to) unmark_card(ptr); } +/* Every card stores the offset of the first object in that card, which must be +cleared when a generation has been cleared */ void clear_cards(CELL from, CELL to) { /* NOTE: reverse order due to heap layout. */ @@ -199,7 +211,8 @@ void clear_cards(CELL from, CELL to) clear_card(ptr); } -/* scan cards in all generations older than the one being collected */ +/* Scan cards in all generations older than the one being collected, copying +old->new references */ void collect_cards(CELL gen) { int i; @@ -207,8 +220,6 @@ void collect_cards(CELL gen) collect_gen_cards(i); } -/* Generational copying garbage collector */ - CELL init_zone(F_ZONE *z, CELL size, CELL base) { z->base = z->here = base; @@ -269,6 +280,11 @@ void init_data_heap(CELL gens, minor_collections = 0; cards_scanned = 0; secure_gc = secure_gc_; + + data_heap_end = data_heap_start + total_size; + + extra_roots_region = alloc_bounded_block(getpagesize()); + extra_roots = (CELL *)extra_roots_region->start; } void collect_callframe_triple(CELL *callframe, @@ -281,6 +297,7 @@ void collect_callframe_triple(CELL *callframe, *callframe_end += *callframe; } +/* Copy all tagged pointers in a range of memory */ void collect_stack(F_BOUNDED_BLOCK *region, CELL top) { CELL bottom = region->start; @@ -290,6 +307,7 @@ void collect_stack(F_BOUNDED_BLOCK *region, CELL top) copy_handle((CELL*)ptr); } +/* The callstack has a special format */ void collect_callstack(F_BOUNDED_BLOCK *region, CELL top) { CELL bottom = region->start; @@ -300,6 +318,8 @@ void collect_callstack(F_BOUNDED_BLOCK *region, CELL top) (CELL*)ptr + 1, (CELL*)ptr + 2); } +/* Copy roots over at the start of GC, namely various constants, stacks, +the user environment and extra roots registered with REGISTER_ROOT */ void collect_roots(void) { int i; @@ -311,6 +331,8 @@ void collect_roots(void) copy_handle(&bignum_neg_one); collect_callframe_triple(&callframe,&callframe_scan,&callframe_end); + collect_stack(extra_roots_region,(CELL)extra_roots); + save_stacks(); stacks = stack_chain; @@ -336,7 +358,7 @@ void collect_roots(void) copy_handle(&userenv[i]); } -/* Given a pointer to oldspace, copy it to newspace. */ +/* Given a pointer to oldspace, copy it to newspace */ INLINE void *copy_untagged_object(void *pointer, CELL size) { void *newpointer; @@ -358,7 +380,7 @@ INLINE CELL copy_object_impl(CELL pointer) return newpointer; } -/* follow a chain of forwarding pointers */ +/* Follow a chain of forwarding pointers */ CELL resolve_forwarding(CELL untagged, CELL tag) { CELL header = get(untagged); @@ -375,12 +397,10 @@ CELL resolve_forwarding(CELL untagged, CELL tag) } } -/* -Given a pointer to a tagged pointer to oldspace, copy it to newspace. +/* Given a pointer to a tagged pointer to oldspace, copy it to newspace. If the object has already been copied, return the forwarding pointer address without copying anything; otherwise, install -a new forwarding pointer. -*/ +a new forwarding pointer. */ CELL copy_object(CELL pointer) { CELL tag; @@ -461,6 +481,8 @@ CELL collect_next(CELL scan) return scan + size; } +/* After garbage collection, any generations which are now empty need to have +their allocation pointers and cards reset. */ void reset_generations(CELL from, CELL to) { CELL i; @@ -476,6 +498,7 @@ void reset_generations(CELL from, CELL to) clear_cards(from,to); } +/* Prepare to start copying reachable objects into an unused zone */ void begin_gc(CELL gen, bool code_gc) { collecting_gen = gen; @@ -531,7 +554,7 @@ void end_gc() /* all generations up to and including the one collected are now empty */ reset_generations(NURSERY,collecting_gen); - + minor_collections++; } @@ -543,7 +566,7 @@ void end_gc() } } -/* collect gen and all younger generations */ +/* Collect gen and all younger generations */ void garbage_collection(CELL gen, bool code_gc) { s64 start = current_millis(); @@ -601,6 +624,7 @@ void primitive_data_gc(void) garbage_collection(gen,false); } +/* Push total time spent on GC */ void primitive_gc_time(void) { box_unsigned_8(gc_time); diff --git a/vm/data_gc.h b/vm/data_gc.h index 9214eb1cf2..a027eb1ffd 100644 --- a/vm/data_gc.h +++ b/vm/data_gc.h @@ -183,17 +183,33 @@ bool gc_off; void garbage_collection(CELL gen, bool code_gc); -#define REGISTER_ROOT(obj) rpush(obj) -#define UNREGISTER_ROOT(obj) obj = rpop() +/* If a runtime function needs to call another function which potentially +allocates memory, it must store any local variable references to Factor +objects on the root stack */ +F_BOUNDED_BLOCK *extra_roots_region; +CELL *extra_roots; -#define REGISTER_ARRAY(obj) rpush(tag_object(obj)) -#define UNREGISTER_ARRAY(obj) obj = untag_array_fast(rpop()) +INLINE void push_root(CELL tagged) +{ + *(extra_roots++) = tagged; +} -#define REGISTER_STRING(obj) rpush(tag_object(obj)) -#define UNREGISTER_STRING(obj) obj = untag_string_fast(rpop()) +INLINE CELL pop_root(void) +{ + return *(--extra_roots); +} -#define REGISTER_C_STRING(obj) rpush(tag_object(((F_ARRAY *)obj) - 1)) -#define UNREGISTER_C_STRING(obj) obj = ((char*)(untag_array_fast(rpop()) + 1)) +#define REGISTER_ROOT(obj) push_root(obj) +#define UNREGISTER_ROOT(obj) obj = pop_root() + +#define REGISTER_ARRAY(obj) push_root(tag_object(obj)) +#define UNREGISTER_ARRAY(obj) obj = untag_array_fast(pop_root()) + +#define REGISTER_STRING(obj) push_root(tag_object(obj)) +#define UNREGISTER_STRING(obj) obj = untag_string_fast(pop_root()) + +#define REGISTER_C_STRING(obj) push_root(tag_object(((F_ARRAY *)obj) - 1)) +#define UNREGISTER_C_STRING(obj) obj = ((char*)(untag_array_fast(pop_root()) + 1)) INLINE void *allot_zone(F_ZONE *z, CELL a) { diff --git a/vm/factor.c b/vm/factor.c index 4e67e04f91..eec21c49e1 100644 --- a/vm/factor.c +++ b/vm/factor.c @@ -1,5 +1,6 @@ #include "factor.h" +/* Get things started */ void init_factor(const char* image, CELL ds_size, CELL rs_size, CELL cs_size, CELL gen_count, CELL young_size, CELL aging_size, diff --git a/vm/image.c b/vm/image.c index 286aa2ccd1..444d71b3d9 100644 --- a/vm/image.c +++ b/vm/image.c @@ -1,5 +1,6 @@ #include "factor.h" +/* Certain special objects in the image are known to the runtime */ void init_objects(F_HEADER *h) { int i; @@ -13,6 +14,7 @@ void init_objects(F_HEADER *h) bignum_neg_one = h->bignum_neg_one; } +/* Read an image file from disk, only done once during startup */ void load_image(const char* filename) { FILE* file; @@ -79,6 +81,7 @@ void load_image(const char* filename) fflush(stdout); } +/* Save the current image to disk */ bool save_image(const char* filename) { FILE* file; @@ -121,6 +124,7 @@ void primitive_save_image(void) save_image(to_char_string(filename,true)); } +/* Initialize an object in a newly-loaded image */ void relocate_object(CELL relocating) { CELL scan = relocating; @@ -152,6 +156,8 @@ void relocate_object(CELL relocating) } } +/* Since the image might have been saved with a different base address than +where it is loaded, we need to fix up pointers in the image. */ void relocate_data() { CELL relocating; diff --git a/vm/math.c b/vm/math.c index a9207294c6..41afed2e8b 100644 --- a/vm/math.c +++ b/vm/math.c @@ -71,10 +71,8 @@ void primitive_fixnum_subtract_fast(void) dpush(tag_fixnum(x - y)); } -/** - * Multiply two integers, and trap overflow. - * Thanks to David Blaikie (The_Vulture from freenode #java) for the hint. - */ +/* Multiply two integers, and trap overflow. +Thanks to David Blaikie (The_Vulture from freenode #java) for the hint. */ void primitive_fixnum_multiply(void) { POP_FIXNUMS(x,y)