Working on adding support for the new write barrier to optimized code

db4
Slava Pestov 2009-10-14 02:06:01 -05:00
parent d689be57a5
commit 1ce39963fd
17 changed files with 179 additions and 208 deletions

View File

@ -42,6 +42,9 @@ M: ##set-slot-imm build-liveness-graph
M: ##write-barrier build-liveness-graph
dup src>> setter-liveness-graph ;
M: ##write-barrier-imm build-liveness-graph
dup src>> setter-liveness-graph ;
M: ##allot build-liveness-graph
[ dst>> allocations get conjoin ] [ call-next-method ] bi ;
@ -74,6 +77,9 @@ M: ##set-slot-imm compute-live-vregs
M: ##write-barrier compute-live-vregs
dup src>> setter-live-vregs ;
M: ##write-barrier-imm compute-live-vregs
dup src>> setter-live-vregs ;
M: ##fixnum-add compute-live-vregs record-live ;
M: ##fixnum-sub compute-live-vregs record-live ;
@ -91,6 +97,8 @@ M: ##set-slot-imm live-insn? obj>> live-vreg? ;
M: ##write-barrier live-insn? src>> live-vreg? ;
M: ##write-barrier-imm live-insn? src>> live-vreg? ;
M: ##fixnum-add live-insn? drop t ;
M: ##fixnum-sub live-insn? drop t ;

View File

@ -619,8 +619,13 @@ literal: size class
temp: temp/int-rep ;
INSN: ##write-barrier
use: src/int-rep slot/int-rep
temp: temp1/int-rep temp2/int-rep ;
INSN: ##write-barrier-imm
use: src/int-rep
temp: card#/int-rep table/int-rep ;
literal: slot
temp: temp1/int-rep temp2/int-rep ;
INSN: ##alien-global
def: dst/int-rep

View File

@ -1,7 +1,7 @@
! Copyright (C) 2008, 2009 Slava Pestov.
! See http://factorcode.org/license.txt for BSD license.
USING: layouts namespaces kernel accessors sequences classes.algebra
compiler.tree.propagation.info compiler.cfg.stacks compiler.cfg.hats
fry compiler.tree.propagation.info compiler.cfg.stacks compiler.cfg.hats
compiler.cfg.registers compiler.cfg.instructions
compiler.cfg.utilities compiler.cfg.builder.blocks ;
IN: compiler.cfg.intrinsics.slots
@ -30,25 +30,25 @@ IN: compiler.cfg.intrinsics.slots
ds-push
] [ drop emit-primitive ] if ;
: (emit-set-slot) ( infos -- obj-reg )
[ 3inputs ] [ second value-tag ] bi*
^^tag-offset>slot over [ ##set-slot ] dip ;
: (emit-set-slot) ( infos -- )
[ first class>> immediate class<= ]
[ [ 3inputs ] [ second value-tag ] bi* ^^tag-offset>slot ] bi
[ ##set-slot ]
[ '[ _ drop _ _ next-vreg next-vreg ##write-barrier ] unless ] 3bi ;
: (emit-set-slot-imm) ( infos -- obj-reg )
: (emit-set-slot-imm) ( infos -- )
ds-drop
[ 2inputs ]
[ [ third literal>> ] [ second value-tag ] bi ] bi*
pick [ ##set-slot-imm ] dip ;
[ first class>> immediate class<= ]
[ [ 2inputs ] [ [ third literal>> ] [ second value-tag ] bi ] bi* ] bi
'[ _ ##set-slot-imm ]
[ '[ _ drop _ _ cells next-vreg next-vreg ##write-barrier-imm ] unless ] 3bi ;
: emit-set-slot ( node -- )
dup node-input-infos
dup second value-tag [
nip
[
dup third value-info-small-fixnum?
[ (emit-set-slot-imm) ] [ (emit-set-slot) ] if
] [ first class>> immediate class<= ] bi
[ drop ] [ next-vreg next-vreg ##write-barrier ] if
] [ drop emit-primitive ] if ;
: emit-string-nth ( -- )

View File

@ -1,139 +1,43 @@
! Copyright (C) 2008, 2009 Slava Pestov, Daniel Ehrenberg.
! See http://factorcode.org/license.txt for BSD license.
USING: kernel accessors namespaces assocs sets sequences
fry combinators.short-circuit locals make arrays
compiler.cfg
compiler.cfg.dominance
compiler.cfg.predecessors
compiler.cfg.loop-detection
compiler.cfg.rpo
compiler.cfg.instructions
compiler.cfg.registers
compiler.cfg.dataflow-analysis
compiler.cfg.utilities ;
USING: accessors assocs combinators.short-circuit
compiler.cfg.instructions compiler.cfg.rpo kernel namespaces
sequences sets ;
IN: compiler.cfg.write-barrier
! Eliminate redundant write barrier hits.
SYMBOL: fresh-allocations
! Objects which have already been marked, as well as
! freshly-allocated objects
SYMBOL: safe
! Objects which have been mutated
SYMBOL: mutated
SYMBOL: mutated-objects
GENERIC: eliminate-write-barrier ( insn -- ? )
M: ##allot eliminate-write-barrier
dst>> safe get conjoin t ;
dst>> fresh-allocations get conjoin t ;
M: ##set-slot eliminate-write-barrier
obj>> mutated-objects get conjoin t ;
M: ##set-slot-imm eliminate-write-barrier
obj>> mutated-objects get conjoin t ;
: needs-write-barrier? ( insn -- ? )
{ [ fresh-allocations get key? not ] [ mutated-objects get key? ] } 1&& ;
M: ##write-barrier eliminate-write-barrier
src>> dup safe get key? not
[ safe get conjoin t ] [ drop f ] if ;
src>> needs-write-barrier? ;
M: ##write-barrier-imm eliminate-write-barrier
src>> needs-write-barrier? ;
M: ##copy eliminate-write-barrier
"Run copy propagation first" throw ;
M: insn eliminate-write-barrier drop t ;
! This doesn't actually benefit from being a dataflow analysis
! might as well be dominator-based
! Dealing with phi functions would help, though
FORWARD-ANALYSIS: safe
: has-allocation? ( bb -- ? )
instructions>> [ { [ ##allocation? ] [ ##call? ] } 1|| ] any? ;
M: safe-analysis transfer-set
drop [ H{ } assoc-clone-like safe set ] dip
instructions>> [
eliminate-write-barrier drop
] each safe get ;
M: safe-analysis join-sets
drop has-allocation? [ drop H{ } clone ] [ assoc-refine ] if ;
: write-barriers-step ( bb -- )
dup safe-in H{ } assoc-clone-like safe set
H{ } clone fresh-allocations set
H{ } clone mutated-objects set
instructions>> [ eliminate-write-barrier ] filter-here ;
GENERIC: remove-dead-barrier ( insn -- ? )
M: ##write-barrier remove-dead-barrier
src>> mutated get key? ;
M: ##set-slot remove-dead-barrier
obj>> mutated get conjoin t ;
M: ##set-slot-imm remove-dead-barrier
obj>> mutated get conjoin t ;
M: insn remove-dead-barrier drop t ;
: remove-dead-barriers ( bb -- )
H{ } clone mutated set
instructions>> [ remove-dead-barrier ] filter-here ;
! Availability of slot
! Anticipation of this and set-slot would help too, maybe later
FORWARD-ANALYSIS: slot
UNION: access ##slot ##slot-imm ##set-slot ##set-slot-imm ;
M: slot-analysis transfer-set
drop [ H{ } assoc-clone-like ] dip
instructions>> over '[
dup access? [
obj>> _ conjoin
] [ drop ] if
] each ;
: slot-available? ( vreg bb -- ? )
slot-in key? ;
: make-barriers ( vregs -- bb )
[ [ next-vreg next-vreg ##write-barrier ] each ] V{ } make <simple-block> ;
: emit-barriers ( vregs loop -- )
swap [
[ [ header>> predecessors>> ] [ ends>> keys ] bi diff ]
[ header>> ] bi
] [ make-barriers ] bi*
insert-basic-block ;
: write-barriers ( bbs -- bb=>barriers )
[
dup instructions>>
[ ##write-barrier? ] filter
[ src>> ] map
] { } map>assoc
[ nip empty? not ] assoc-filter ;
: filter-dominant ( bb=>barriers bbs -- barriers )
'[ drop _ [ dominates? ] with all? ] assoc-filter
values concat prune ;
: dominant-write-barriers ( loop -- vregs )
[ blocks>> values write-barriers ] [ ends>> keys ] bi filter-dominant ;
: safe-loops ( -- loops )
loops get values
[ blocks>> keys [ has-allocation? not ] all? ] filter ;
:: insert-extra-barriers ( cfg -- )
safe-loops [| loop |
cfg needs-dominance needs-predecessors drop
loop dominant-write-barriers
loop header>> '[ _ slot-available? ] filter
[ loop emit-barriers cfg cfg-changed drop ] unless-empty
] each ;
: contains-write-barrier? ( cfg -- ? )
post-order [ instructions>> [ ##write-barrier? ] any? ] any? ;
: eliminate-write-barriers ( cfg -- cfg' )
dup contains-write-barrier? [
needs-loops
dup [ remove-dead-barriers ] each-basic-block
dup compute-slot-sets
dup insert-extra-barriers
dup compute-safe-sets
dup [ write-barriers-step ] each-basic-block
] when ;
dup [ write-barriers-step ] each-basic-block ;

View File

@ -218,6 +218,7 @@ CODEGEN: ##set-alien-double %set-alien-double
CODEGEN: ##set-alien-vector %set-alien-vector
CODEGEN: ##allot %allot
CODEGEN: ##write-barrier %write-barrier
CODEGEN: ##write-barrier-imm %write-barrier-imm
CODEGEN: ##compare %compare
CODEGEN: ##compare-imm %compare-imm
CODEGEN: ##compare-float-ordered %compare-float-ordered

View File

@ -77,6 +77,15 @@ SYMBOL: relocation-table
: rel-here ( offset class -- )
[ add-literal ] dip rt-here rel-fixup ;
: rel-vm ( class -- )
rt-vm rel-fixup ;
: rel-cards-offset ( class -- )
rt-cards-offset rel-fixup ;
: rel-decks-offset ( class -- )
rt-decks-offset rel-fixup ;
! And the rest
: resolve-offset ( label-fixup -- offset )
label>> offset>> [ "Unresolved label" throw ] unless* ;

View File

@ -51,6 +51,8 @@ CONSTANT: rt-stack-chain 9
CONSTANT: rt-untagged 10
CONSTANT: rt-megamorphic-cache-hits 11
CONSTANT: rt-vm 12
CONSTANT: rt-cards-offset 13
CONSTANT: rt-decks-offset 14
: rc-absolute? ( n -- ? )
${ rc-absolute-ppc-2/2 rc-absolute-cell rc-absolute } member? ;

View File

@ -397,7 +397,8 @@ HOOK: %alien-global cpu ( dst symbol library -- )
HOOK: %vm-field-ptr cpu ( dst fieldname -- )
HOOK: %allot cpu ( dst size class temp -- )
HOOK: %write-barrier cpu ( src card# table -- )
HOOK: %write-barrier cpu ( src slot temp1 temp2 -- )
HOOK: %write-barrier-imm cpu ( src slot temp1 temp2 -- )
! GC checks
HOOK: %check-nursery cpu ( label size temp1 temp2 -- )

View File

@ -50,7 +50,7 @@ M: x86.32 reserved-area-size 0 ;
M: x86.32 %alien-invoke 0 CALL rc-relative rel-dlsym ;
: push-vm-ptr ( -- )
0 PUSH rc-absolute-cell rt-vm rel-fixup ; ! push the vm ptr as an argument
0 PUSH rc-absolute-cell rel-vm ; ! push the vm ptr as an argument
M: x86.32 return-struct-in-registers? ( c-type -- ? )
c-type
@ -263,7 +263,7 @@ M: x86.32 %alien-callback ( quot -- )
4 [
EAX swap %load-reference
EAX PUSH
param-reg-2 0 MOV rc-absolute-cell rt-vm rel-fixup
param-reg-2 %mov-vm-ptr
"c_to_factor" f %alien-invoke
] with-aligned-stack ;
@ -348,7 +348,7 @@ M:: x86.32 %call-gc ( gc-root-count temp -- )
temp gc-root-base param@ LEA
12 [
! Pass the VM ptr as the third parameter
0 PUSH rc-absolute-cell rt-vm rel-fixup
push-vm-ptr
! Pass number of roots as second parameter
gc-root-count PUSH
! Pass pointer to start of GC roots as first parameter

View File

@ -75,9 +75,6 @@ M: x86.64 %prepare-unbox ( -- )
param-reg-1 R14 [] MOV
R14 cell SUB ;
: %mov-vm-ptr ( reg -- )
0 MOV rc-absolute-cell rt-vm rel-fixup ;
M:: x86.64 %unbox ( n rep func -- )
param-reg-2 %mov-vm-ptr
! Call the unboxer
@ -183,11 +180,11 @@ M: x86.64 %alien-invoke
R11 CALL ;
M: x86.64 %nest-stacks ( -- )
param-reg-1 0 MOV rc-absolute-cell rt-vm rel-fixup
param-reg-1 %mov-vm-ptr
"nest_stacks" f %alien-invoke ;
M: x86.64 %unnest-stacks ( -- )
param-reg-1 0 MOV rc-absolute-cell rt-vm rel-fixup
param-reg-1 %mov-vm-ptr
"unnest_stacks" f %alien-invoke ;
M: x86.64 %prepare-alien-indirect ( -- )

View File

@ -369,19 +369,17 @@ M: x86 %shl int-rep two-operand [ SHL ] emit-shift ;
M: x86 %shr int-rep two-operand [ SHR ] emit-shift ;
M: x86 %sar int-rep two-operand [ SAR ] emit-shift ;
M: x86 %vm-field-ptr ( dst field -- )
[ drop 0 MOV rc-absolute-cell rt-vm rel-fixup ]
[ vm-field-offset ADD ] 2bi ;
: %mov-vm-ptr ( reg -- )
0 MOV rc-absolute-cell rel-vm ;
: load-zone-ptr ( reg -- )
#! Load pointer to start of zone array
"nursery" %vm-field-ptr ;
M: x86 %vm-field-ptr ( dst field -- )
[ drop %mov-vm-ptr ] [ vm-field-offset ADD ] 2bi ;
: load-allot-ptr ( nursery-ptr allot-ptr -- )
[ drop load-zone-ptr ] [ swap cell [+] MOV ] 2bi ;
[ drop "nursery" %vm-field-ptr ] [ swap [] MOV ] 2bi ;
: inc-allot-ptr ( nursery-ptr n -- )
[ cell [+] ] dip 8 align ADD ;
[ [] ] dip 8 align ADD ;
: store-header ( temp class -- )
[ [] ] [ type-number tag-fixnum ] bi* MOV ;
@ -395,26 +393,32 @@ M:: x86 %allot ( dst size class nursery-ptr -- )
dst class store-tagged
nursery-ptr size inc-allot-ptr ;
M:: x86 %write-barrier ( src card# table -- )
#! Mark the card pointed to by vreg.
:: (%write-barrier) ( src slot temp1 temp2 -- )
! Compute slot address.
temp1 src MOV
temp1 slot ADD
! Mark the card
card# src MOV
card# card-bits SHR
table "cards_offset" %vm-field-ptr
table table [] MOV
table card# [+] card-mark <byte> MOV
temp1 card-bits SHR
temp2 0 MOV rc-absolute-cell rel-cards-offset
temp2 temp1 [+] card-mark <byte> MOV
! Mark the card deck
card# deck-bits card-bits - SHR
table "decks_offset" %vm-field-ptr
table table [] MOV
table card# [+] card-mark <byte> MOV ;
temp1 deck-bits card-bits - SHR
temp2 0 MOV rc-absolute-cell rel-decks-offset
temp2 temp1 [+] card-mark <byte> MOV ;
M: x86 %write-barrier ( src slot temp1 temp2 -- ) (%write-barrier) ;
M: x86 %write-barrier-imm ( src slot temp1 temp2 -- ) (%write-barrier) ;
M:: x86 %check-nursery ( label size temp1 temp2 -- )
temp1 load-zone-ptr
temp2 temp1 cell [+] MOV
temp1 "nursery" %vm-field-ptr
! Load 'here' into temp2
temp2 temp1 [] MOV
temp2 size ADD
temp1 temp1 3 cells [+] MOV
! Load 'end' into temp1
temp1 temp1 2 cells [+] MOV
temp2 temp1 CMP
label JLE ;
@ -1327,8 +1331,8 @@ M:: x86 %save-context ( temp1 temp2 callback-allowed? -- )
#! Save Factor stack pointers in case the C code calls a
#! callback which does a GC, which must reliably trace
#! all roots.
temp1 0 MOV rc-absolute-cell rt-vm rel-fixup
temp1 temp1 "stack_chain" vm-field-offset [+] MOV
temp1 "stack_chain" %vm-field-ptr
temp1 temp1 [] MOV
temp2 stack-reg cell neg [+] LEA
temp1 [] temp2 MOV
callback-allowed? [

View File

@ -41,6 +41,8 @@ int factor_vm::number_of_parameters(relocation_type type)
case RT_STACK_CHAIN:
case RT_MEGAMORPHIC_CACHE_HITS:
case RT_VM:
case RT_CARDS_OFFSET:
case RT_DECKS_OFFSET:
return 0;
default:
critical_error("Bad rel type",type);
@ -180,6 +182,10 @@ cell factor_vm::compute_relocation(relocation_entry rel, cell index, code_block
return (cell)&megamorphic_cache_hits;
case RT_VM:
return (cell)this;
case RT_CARDS_OFFSET:
return cards_offset;
case RT_DECKS_OFFSET:
return decks_offset;
default:
critical_error("Bad rel type",rel);
return 0; /* Can't happen */

View File

@ -28,6 +28,10 @@ enum relocation_type {
RT_MEGAMORPHIC_CACHE_HITS,
/* address of vm object */
RT_VM,
/* value of vm->cards_offset */
RT_CARDS_OFFSET,
/* value of vm->decks_offset */
RT_DECKS_OFFSET,
};
enum relocation_class {

View File

@ -93,21 +93,49 @@ void full_collector::cheneys_algorithm()
}
}
void factor_vm::collect_full(cell requested_bytes, bool trace_contexts_p)
struct full_updater {
factor_vm *myvm;
full_updater(factor_vm *myvm_) : myvm(myvm_) {}
void operator()(heap_block *block)
{
data_heap *old;
if(current_gc->growing_data_heap)
myvm->relocate_code_block((code_block *)block);
}
};
struct literal_and_word_reference_updater {
factor_vm *myvm;
literal_and_word_reference_updater(factor_vm *myvm_) : myvm(myvm_) {}
void operator()(heap_block *block)
{
old = data;
set_data_heap(data->grow(requested_bytes));
code_block *compiled = (code_block *)block;
myvm->update_literal_references(compiled);
myvm->update_word_references(compiled);
}
};
void factor_vm::free_unmarked_code_blocks(bool growing_data_heap)
{
if(growing_data_heap)
{
full_updater updater(this);
code->free_unmarked(updater);
}
else
{
old = NULL;
std::swap(data->tenured,data->tenured_semispace);
reset_generation(data->tenured);
literal_and_word_reference_updater updater(this);
code->free_unmarked(updater);
}
code->points_to_nursery.clear();
code->points_to_aging.clear();
}
void factor_vm::collect_full_impl(bool trace_contexts_p)
{
full_collector collector(this);
collector.trace_roots();
@ -118,12 +146,26 @@ void factor_vm::collect_full(cell requested_bytes, bool trace_contexts_p)
}
collector.cheneys_algorithm();
free_unmarked_code_blocks();
reset_generation(data->aging);
nursery.here = nursery.start;
}
if(old) delete old;
void factor_vm::collect_growing_heap(cell requested_bytes, bool trace_contexts_p)
{
data_heap *old = data;
set_data_heap(data->grow(requested_bytes));
collect_full(trace_contexts_p);
free_unmarked_code_blocks(true);
delete old;
}
void factor_vm::collect_full(bool trace_contexts_p)
{
std::swap(data->tenured,data->tenured_semispace);
reset_generation(data->tenured);
collect_full_impl(trace_contexts_p);
free_unmarked_code_blocks(false);
}
}

View File

@ -12,27 +12,6 @@ gc_state::gc_state(data_heap *data_, bool growing_data_heap_, cell collecting_ge
gc_state::~gc_state() { }
struct literal_and_word_reference_updater {
factor_vm *myvm;
literal_and_word_reference_updater(factor_vm *myvm_) : myvm(myvm_) {}
void operator()(heap_block *block)
{
code_block *compiled = (code_block *)block;
myvm->update_literal_references(compiled);
myvm->update_word_references(compiled);
}
};
void factor_vm::free_unmarked_code_blocks()
{
literal_and_word_reference_updater updater(this);
code->free_unmarked(updater);
code->points_to_nursery.clear();
code->points_to_aging.clear();
}
void factor_vm::update_dirty_code_blocks(std::set<code_block *> *remembered_set)
{
/* The youngest generation that any code block can now reference */
@ -75,6 +54,7 @@ void factor_vm::garbage_collection(cell collecting_gen_, bool growing_data_heap_
resort to growing the data heap */
if(current_gc->collecting_tenured_p())
{
assert(!current_gc->growing_data_heap);
current_gc->growing_data_heap = true;
/* Since we start tracing again, any previously
@ -105,7 +85,14 @@ void factor_vm::garbage_collection(cell collecting_gen_, bool growing_data_heap_
collect_aging();
}
else if(current_gc->collecting_tenured_p())
collect_full(requested_bytes,trace_contexts_p);
{
if(current_gc->growing_data_heap)
collect_growing_heap(requested_bytes,trace_contexts_p);
else
collect_full(trace_contexts_p);
}
else
critical_error("Bug in GC",0);
record_gc_stats();

View File

@ -232,12 +232,14 @@ struct factor_vm
}
// gc
void free_unmarked_code_blocks();
void update_dirty_code_blocks(std::set<code_block *> *remembered_set);
void collect_nursery();
void collect_aging();
void collect_to_tenured();
void collect_full(cell requested_bytes, bool trace_contexts_p);
void free_unmarked_code_blocks(bool growing_data_heap);
void collect_full_impl(bool trace_contexts_p);
void collect_growing_heap(cell requested_bytes, bool trace_contexts_p);
void collect_full(bool trace_contexts_p);
void record_gc_stats();
void garbage_collection(cell gen, bool growing_data_heap, bool trace_contexts_p, cell requested_bytes);
void gc();

View File

@ -2,14 +2,13 @@ namespace factor
{
struct zone {
/* allocation pointer is 'here'; its offset is hardcoded in the
compiler backends */
cell start;
/* offset of 'here' and 'end' is hardcoded in compiler backends */
cell here;
cell size;
cell start;
cell end;
cell size;
zone(cell size_, cell start_) : start(start_), here(0), size(size_), end(start_ + size_) {}
zone(cell size_, cell start_) : here(0), start(start_), end(start_ + size_), size(size_) {}
inline bool contains_p(object *pointer)
{