From fb605aadadc894cd571e1f5f1da776c543a46236 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Thu, 8 May 2008 23:21:46 -0500 Subject: [PATCH] Simpler write barrier --- core/cpu/ppc/intrinsics/intrinsics.factor | 17 ++++----- core/cpu/x86/32/32.factor | 5 ++- core/cpu/x86/64/64.factor | 5 ++- core/cpu/x86/architecture/architecture.factor | 3 +- core/cpu/x86/intrinsics/intrinsics.factor | 2 +- vm/Config.unix | 4 +- vm/data_gc.c | 37 ++++++++++++++----- vm/data_gc.h | 25 +++++++------ 8 files changed, 60 insertions(+), 38 deletions(-) diff --git a/core/cpu/ppc/intrinsics/intrinsics.factor b/core/cpu/ppc/intrinsics/intrinsics.factor index 95af9e4b4b..d85c70577e 100755 --- a/core/cpu/ppc/intrinsics/intrinsics.factor +++ b/core/cpu/ppc/intrinsics/intrinsics.factor @@ -61,20 +61,17 @@ IN: cpu.ppc.intrinsics : %write-barrier ( -- ) "val" get operand-immediate? "obj" get fresh-object? or [ + "scratch1" operand card-mark LI + ! Mark the card - "obj" operand "scratch" operand card-bits SRWI "val" operand load-cards-offset - "scratch" operand dup "val" operand ADD - "val" operand "scratch" operand 0 LBZ - "val" operand dup card-mark ORI - "val" operand "scratch" operand 0 STB + "obj" operand "scratch2" operand card-bits SRWI + "val" operand "scratch2" operand "val" operand STBX ! Mark the card deck - "obj" operand "scratch" operand deck-bits SRWI "val" operand load-decks-offset - "scratch" operand dup "val" operand ADD - card-mark "val" operand LI - "val" operand "scratch" operand 0 STB + "obj" operand "scratch" operand deck-bits SRWI + "val" operand "scratch" operand "val" operand STBX ] unless ; \ set-slot { @@ -82,7 +79,7 @@ IN: cpu.ppc.intrinsics { [ %slot-literal-known-tag STW %write-barrier ] H{ { +input+ { { f "val" } { f "obj" known-tag } { [ small-slot? ] "n" } } } - { +scratch+ { { f "scratch" } } } + { +scratch+ { { f "scratch1" } { f "scratch2" } } } { +clobber+ { "val" } } } } diff --git a/core/cpu/x86/32/32.factor b/core/cpu/x86/32/32.factor index 50e38f2082..9ef8177cf3 100755 --- a/core/cpu/x86/32/32.factor +++ b/core/cpu/x86/32/32.factor @@ -22,8 +22,9 @@ M: x86.32 temp-reg-2 ECX ; M: temp-reg v>operand drop EBX ; -M: x86.32 %alien-invoke ( symbol dll -- ) - (CALL) rel-dlsym ; +M: x86.32 %alien-global 0 [] MOV rc-absolute-cell rel-dlsym ; + +M: x86.32 %alien-invoke (CALL) rel-dlsym ; ! On x86, parameters are never passed in registers. M: int-regs return-reg drop EAX ; diff --git a/core/cpu/x86/64/64.factor b/core/cpu/x86/64/64.factor index 5f396e7751..9c44a6a656 100755 --- a/core/cpu/x86/64/64.factor +++ b/core/cpu/x86/64/64.factor @@ -130,7 +130,10 @@ M: x86.64 %prepare-box-struct ( size -- ) M: x86.64 %prepare-var-args RAX RAX XOR ; -M: x86.64 %alien-invoke ( symbol dll -- ) +M: x86.64 %alien-global + [ 0 MOV rc-absolute-cell rel-dlsym ] [ dup [] MOV ] bi ; + +M: x86.64 %alien-invoke 0 address-operand >r rc-absolute-cell rel-dlsym r> CALL ; M: x86.64 %prepare-alien-indirect ( -- ) diff --git a/core/cpu/x86/architecture/architecture.factor b/core/cpu/x86/architecture/architecture.factor index 0b67ef7eb3..88881b19a8 100755 --- a/core/cpu/x86/architecture/architecture.factor +++ b/core/cpu/x86/architecture/architecture.factor @@ -63,8 +63,7 @@ M: x86 %prologue ( n -- ) M: x86 %epilogue ( n -- ) stack-reg swap ADD ; -: %alien-global ( symbol dll register -- ) - [ 0 MOV rc-absolute-cell rel-dlsym ] keep dup [] MOV ; +HOOK: %alien-global cpu ( symbol dll register -- ) M: x86 %prepare-alien-invoke #! Save Factor stack pointers in case the C code calls a diff --git a/core/cpu/x86/intrinsics/intrinsics.factor b/core/cpu/x86/intrinsics/intrinsics.factor index db303982ba..667f08c053 100755 --- a/core/cpu/x86/intrinsics/intrinsics.factor +++ b/core/cpu/x86/intrinsics/intrinsics.factor @@ -66,7 +66,7 @@ IN: cpu.x86.intrinsics ! Mark the card "obj" operand card-bits SHR "cards_offset" f temp-reg v>operand %alien-global - temp-reg v>operand "obj" operand [+] card-mark OR + temp-reg v>operand "obj" operand [+] card-mark MOV ! Mark the card deck "obj" operand deck-bits card-bits - SHR diff --git a/vm/Config.unix b/vm/Config.unix index e7b19e96e1..a25d0df95e 100644 --- a/vm/Config.unix +++ b/vm/Config.unix @@ -1,4 +1,6 @@ -CFLAGS += -fomit-frame-pointer +ifndef DEBUG + CFLAGS += -fomit-frame-pointer +endif EXE_SUFFIX = DLL_PREFIX = lib diff --git a/vm/data_gc.c b/vm/data_gc.c index aed2cef4d1..f44b8a7a05 100755 --- a/vm/data_gc.c +++ b/vm/data_gc.c @@ -24,6 +24,7 @@ CELL init_zone(F_ZONE *z, CELL size, CELL start) void init_card_decks(void) { CELL start = data_heap->segment->start & ~(DECK_SIZE - 1); + allot_markers_offset = (CELL)data_heap->allot_markers - (start >> CARD_BITS); cards_offset = (CELL)data_heap->cards - (start >> CARD_BITS); decks_offset = (CELL)data_heap->decks - (start >> DECK_BITS); } @@ -64,6 +65,9 @@ F_DATA_HEAP *alloc_data_heap(CELL gens, data_heap->semispaces = safe_malloc(sizeof(F_ZONE) * data_heap->gen_count); CELL cards_size = (total_size + DECK_SIZE) / CARD_SIZE; + data_heap->allot_markers = safe_malloc(cards_size); + data_heap->allot_markers_end = data_heap->allot_markers + cards_size; + data_heap->cards = safe_malloc(cards_size); data_heap->cards_end = data_heap->cards + cards_size; @@ -109,6 +113,7 @@ void dealloc_data_heap(F_DATA_HEAP *data_heap) dealloc_segment(data_heap->segment); free(data_heap->generations); free(data_heap->semispaces); + free(data_heap->allot_markers); free(data_heap->cards); free(data_heap->decks); free(data_heap); @@ -122,8 +127,7 @@ void clear_cards(CELL from, CELL to) F_CARD *first_card = ADDR_TO_CARD(data_heap->generations[to].start); F_CARD *last_card = ADDR_TO_CARD(data_heap->generations[from].end); F_CARD *ptr; - for(ptr = first_card; ptr < last_card; ptr++) - *ptr = CARD_BASE_MASK; /* invalid value */ + for(ptr = first_card; ptr < last_card; ptr++) *ptr = 0; } void clear_decks(CELL from, CELL to) @@ -132,8 +136,16 @@ void clear_decks(CELL from, CELL to) F_CARD *first_deck = ADDR_TO_CARD(data_heap->generations[to].start); F_CARD *last_deck = ADDR_TO_CARD(data_heap->generations[from].end); F_CARD *ptr; - for(ptr = first_deck; ptr < last_deck; ptr++) - *ptr = 0; + for(ptr = first_deck; ptr < last_deck; ptr++) *ptr = 0; +} + +void clear_allot_markers(CELL from, CELL to) +{ + /* NOTE: reverse order due to heap layout. */ + F_CARD *first_card = ADDR_TO_ALLOT_MARKER(data_heap->generations[to].start); + F_CARD *last_card = ADDR_TO_ALLOT_MARKER(data_heap->generations[from].end); + F_CARD *ptr; + for(ptr = first_card; ptr < last_card; ptr++) *ptr = CARD_BASE_MASK; } void set_data_heap(F_DATA_HEAP *data_heap_) @@ -142,6 +154,8 @@ void set_data_heap(F_DATA_HEAP *data_heap_) nursery = data_heap->generations[NURSERY]; init_card_decks(); clear_cards(NURSERY,TENURED); + clear_decks(NURSERY,TENURED); + clear_allot_markers(NURSERY,TENURED); } void gc_reset(void) @@ -290,7 +304,7 @@ CELL next_object(void) if(heap_scan_ptr >= data_heap->generations[TENURED].here) return F; - + type = untag_header(value); heap_scan_ptr += untagged_object_size(heap_scan_ptr); @@ -312,17 +326,16 @@ DEFINE_PRIMITIVE(end_scan) /* Scan all the objects in the card */ void collect_card(F_CARD *ptr, CELL gen, CELL here) { - F_CARD c = *ptr; - CELL offset = (c & CARD_BASE_MASK); + CELL offset = CARD_OFFSET(ptr); if(offset != CARD_BASE_MASK) { CELL card_scan = (CELL)CARD_TO_ADDR(ptr) + offset; CELL card_end = (CELL)CARD_TO_ADDR(ptr + 1); - + while(card_scan < card_end && card_scan < here) card_scan = collect_next(card_scan); - + cards_scanned++; } } @@ -658,6 +671,8 @@ void reset_generations(CELL from, CELL to) reset_generation(i); clear_cards(from,to); + clear_decks(from,to); + clear_allot_markers(from,to); } /* Prepare to start copying reachable objects into an unused zone */ @@ -682,6 +697,8 @@ void begin_gc(CELL requested_bytes) reset_generation(collecting_gen); newspace = &data_heap->generations[collecting_gen]; clear_cards(collecting_gen,collecting_gen); + clear_decks(collecting_gen,collecting_gen); + clear_allot_markers(collecting_gen,collecting_gen); } else { @@ -891,7 +908,7 @@ DEFINE_PRIMITIVE(become) critical_error("bad parameters to become",0); CELL i; - + for(i = 0; i < capacity; i++) { CELL old_obj = array_nth(old_objects,i); diff --git a/vm/data_gc.h b/vm/data_gc.h index c1023b8043..20692c14e6 100755 --- a/vm/data_gc.h +++ b/vm/data_gc.h @@ -44,6 +44,9 @@ typedef struct { F_ZONE *generations; F_ZONE* semispaces; + CELL *allot_markers; + CELL *allot_markers_end; + CELL *cards; CELL *cards_end; @@ -75,6 +78,7 @@ offset within the card */ #define ADDR_CARD_MASK (CARD_SIZE-1) DLLEXPORT CELL cards_offset; +DLLEXPORT CELL allot_markers_offset; #define ADDR_TO_CARD(a) (F_CARD*)(((CELL)(a) >> CARD_BITS) + cards_offset) #define CARD_TO_ADDR(c) (CELL*)(((CELL)(c) - cards_offset)<> DECK_BITS) + decks_offset) -#define DECK_TO_ADDR(c) (CELL*)(((CELL)(c) - decks_offset)<> CARD_BITS) + allot_markers_offset) +#define CARD_OFFSET(c) (*((c) - (CELL)data_heap->cards + (CELL)data_heap->allot_markers)) + void init_card_decks(void); /* this is an inefficient write barrier. compiled definitions use a more @@ -101,11 +108,8 @@ any time we are potentially storing a pointer from an older generation to a younger one */ INLINE void write_barrier(CELL address) { - F_CARD *c = ADDR_TO_CARD(address); - *c |= CARD_MARK_MASK; - - F_DECK *d = ADDR_TO_DECK(address); - *d = CARD_MARK_MASK ; + *ADDR_TO_CARD(address) = CARD_MARK_MASK; + *ADDR_TO_DECK(address) = CARD_MARK_MASK; } #define SLOT(obj,slot) (UNTAG(obj) + (slot) * CELLS) @@ -119,11 +123,10 @@ INLINE void set_slot(CELL obj, CELL slot, CELL value) /* we need to remember the first object allocated in the card */ INLINE void allot_barrier(CELL address) { - F_CARD *ptr = ADDR_TO_CARD(address); - F_CARD c = *ptr; - CELL b = (c & CARD_BASE_MASK); - CELL a = (address & ADDR_CARD_MASK); - *ptr = ((c & CARD_MARK_MASK) | ((b < a) ? b : a)); + F_CARD *ptr = ADDR_TO_ALLOT_MARKER(address); + F_CARD b = *ptr; + F_CARD a = (address & ADDR_CARD_MASK); + *ptr = (b < a ? b : a); } void clear_cards(CELL from, CELL to);