From fb4ca1ce85b65c33659ab5e8b1e0df27a89c4dc9 Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Sun, 4 Oct 2009 15:37:22 -0500 Subject: [PATCH 1/7] cleaner implementation of translation-matrix4 --- extra/math/matrices/simd/simd.factor | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/extra/math/matrices/simd/simd.factor b/extra/math/matrices/simd/simd.factor index 0bc2817f53..ad36fe9b7e 100644 --- a/extra/math/matrices/simd/simd.factor +++ b/extra/math/matrices/simd/simd.factor @@ -119,8 +119,10 @@ CONSTANT: zero-matrix4 TYPED:: m4^n ( m: matrix4 n: fixnum -- m^n: matrix4 ) identity-matrix4 n [ m m4. ] times ; +: vmerge-diagonal* ( x y -- h t ) + [ vmerge-head ] [ swap vmerge-tail ] 2bi ; inline : vmerge-diagonal ( x -- h t ) - 0.0 float-4-with [ vmerge-head ] [ swap vmerge-tail ] 2bi ; inline + 0.0 float-4-with vmerge-diagonal* ; TYPED: diagonal-matrix4 ( diagonal: float-4 -- matrix: matrix4 ) [ vmerge-diagonal [ vmerge-diagonal ] bi@ ] make-matrix4 ; @@ -139,17 +141,16 @@ TYPED: transpose-matrix4 ( matrix: matrix4 -- matrix: matrix4 ) float-4{ 1.0 1.0 1.0 1.0 } swap v/ scale-matrix4 ; inline TYPED:: translation-matrix4 ( offset: float-4 -- matrix: matrix4 ) - matrix4 (struct) :> c + [ + float-4{ 1.0 1.0 1.0 1.0 } :> diagonal - float-4{ 0.0 0.0 0.0 1.0 } :> c4 - float-4{ t t t f } offset c4 v? :> offset' + offset 0 float-4-with vmerge + [ 0 float-4-with swap vmerge ] bi@ drop :> z :> y :> x - offset' { 3 3 3 0 } vshuffle float-4{ t f f t } vbitand - offset' { 3 3 3 1 } vshuffle float-4{ f t f t } vbitand - offset' { 3 3 3 2 } vshuffle float-4{ f f t t } vbitand - c4 - - c set-rows ; + diagonal y vmerge-diagonal* + [ x vmerge-diagonal* ] + [ z vmerge-diagonal* ] bi* + ] make-matrix4 ; TYPED:: rotation-matrix4 ( axis: float-4 theta: float -- matrix: matrix4 ) ! x*x + c*(1.0 - x*x) x*y*(1.0 - c) - s*z x*z*(1.0 - c) + s*y 0 From 0c7857152b8d4b97704d0ce2b2f2d979bbee164d Mon Sep 17 00:00:00 2001 From: Joe Groff Date: Sun, 4 Oct 2009 16:29:03 -0500 Subject: [PATCH 2/7] clean up frustum-matrix4 --- extra/math/matrices/simd/simd.factor | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/extra/math/matrices/simd/simd.factor b/extra/math/matrices/simd/simd.factor index ad36fe9b7e..cac77bf8b0 100644 --- a/extra/math/matrices/simd/simd.factor +++ b/extra/math/matrices/simd/simd.factor @@ -122,7 +122,7 @@ TYPED:: m4^n ( m: matrix4 n: fixnum -- m^n: matrix4 ) : vmerge-diagonal* ( x y -- h t ) [ vmerge-head ] [ swap vmerge-tail ] 2bi ; inline : vmerge-diagonal ( x -- h t ) - 0.0 float-4-with vmerge-diagonal* ; + 0.0 float-4-with vmerge-diagonal* ; inline TYPED: diagonal-matrix4 ( diagonal: float-4 -- matrix: matrix4 ) [ vmerge-diagonal [ vmerge-diagonal ] bi@ ] make-matrix4 ; @@ -189,16 +189,13 @@ TYPED:: rotation-matrix4 ( axis: float-4 theta: float -- matrix: matrix4 ) diagonal-m triangle-m m4+ ; TYPED:: frustum-matrix4 ( xy: float-4 near: float far: float -- matrix: matrix4 ) - matrix4 (struct) :> c - - near near near far + 2 near far * * float-4-boa :> num - float-4{ t t f f } xy near far - float-4-with v? :> denom - num denom v/ :> fov - - fov { 0 0 0 0 } vshuffle float-4{ t f f f } vbitand - fov { 1 1 1 1 } vshuffle float-4{ f t f f } vbitand - fov { 2 2 2 3 } vshuffle float-4{ f f t t } vbitand - float-4{ 0.0 0.0 -1.0 0.0 } - - c set-rows ; + [ + near near near far + 2 near far * * float-4-boa ! num + float-4{ t t f f } xy near far - float-4-with v? ! denom + v/ :> fov + + fov 0.0 float-4-with vmerge-head vmerge-diagonal + fov float-4{ f f t t } vand + float-4{ 0.0 0.0 -1.0 0.0 } + ] make-matrix4 ; From 6c047127ee4254d9af6e65da2bd3ca336c0f45ce Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Mon, 5 Oct 2009 03:27:28 -0500 Subject: [PATCH 3/7] Change C++ exception usage back into a longjmp() due to GCC bugs --- Makefile | 1 + vm/data_gc.cpp | 120 +++++++++++++++++++----------------------- vm/data_gc.hpp | 2 + vm/generic_arrays.hpp | 2 +- vm/os-windows-nt.cpp | 7 +-- vm/quotations.cpp | 1 + vm/tuples.cpp | 0 vm/vm.cpp | 11 ++++ vm/vm.hpp | 104 +++++++++++++++++------------------- 9 files changed, 122 insertions(+), 126 deletions(-) mode change 100644 => 100755 vm/generic_arrays.hpp mode change 100644 => 100755 vm/tuples.cpp create mode 100755 vm/vm.cpp mode change 100644 => 100755 vm/vm.hpp diff --git a/Makefile b/Makefile index 49c08c7d13..dd2e83985e 100755 --- a/Makefile +++ b/Makefile @@ -60,6 +60,7 @@ DLL_OBJS = $(PLAF_DLL_OBJS) \ vm/strings.o \ vm/tuples.o \ vm/utilities.o \ + vm/vm.o \ vm/words.o \ vm/write_barrier.o diff --git a/vm/data_gc.cpp b/vm/data_gc.cpp index 66dc2d8c04..679eec01f7 100755 --- a/vm/data_gc.cpp +++ b/vm/data_gc.cpp @@ -12,18 +12,16 @@ gc_state::gc_state(data_heap *data_, bool growing_data_heap_, cell collecting_ge data(data_), growing_data_heap(growing_data_heap_), collecting_gen(collecting_gen_), + collecting_aging_again(false), start_time(current_micros()) { } gc_state::~gc_state() { } -/* If a generation fills up, throw this error. It is caught in garbage_collection() */ -struct generation_full_condition { }; - /* Given a pointer to oldspace, copy it to newspace */ object *factor_vm::copy_untagged_object_impl(object *pointer, cell size) { if(current_gc->newspace->here + size >= current_gc->newspace->end) - throw generation_full_condition(); + longjmp(current_gc->gc_unwind,1); object *newpointer = allot_zone(current_gc->newspace,size); @@ -502,7 +500,6 @@ void factor_vm::begin_gc(cell requested_bytes) void factor_vm::end_gc() { - gc_stats *s = &stats[current_gc->collecting_gen]; cell gc_elapsed = (current_micros() - current_gc->start_time); @@ -545,77 +542,70 @@ void factor_vm::garbage_collection(cell collecting_gen_, bool growing_data_heap_ /* Keep trying to GC higher and higher generations until we don't run out of space */ - for(;;) - { - try - { - begin_gc(requested_bytes); + if(setjmp(current_gc->gc_unwind)) + { + /* We come back here if a generation is full */ - /* Initialize chase pointer */ - cell scan = current_gc->newspace->here; + /* We have no older generations we can try collecting, so we + resort to growing the data heap */ + if(current_gc->collecting_tenured_p()) + { + current_gc->growing_data_heap = true; - /* Trace objects referenced from global environment */ - trace_roots(); + /* see the comment in unmark_marked() */ + code->unmark_marked(); + } + /* we try collecting aging space twice before going on to + collect tenured */ + else if(data->have_aging_p() + && current_gc->collecting_gen == data->aging() + && !current_gc->collecting_aging_again) + { + current_gc->collecting_aging_again = true; + } + /* Collect the next oldest generation */ + else + { + current_gc->collecting_gen++; + } + } - /* Trace objects referenced from stacks, unless we're doing - save-image-and-exit in which case stack objects are irrelevant */ - if(trace_contexts_) trace_contexts(); + begin_gc(requested_bytes); - /* Trace objects referenced from older generations */ - trace_cards(); + /* Initialize chase pointer */ + cell scan = current_gc->newspace->here; - /* On minor GC, trace code heap roots if it has pointers - to this generation or younger. Otherwise, tracing data heap objects - will mark all reachable code blocks, and we free the unmarked ones - after. */ - if(!current_gc->collecting_tenured_p() && current_gc->collecting_gen >= last_code_heap_scan) - { - update_code_heap_roots(); - } + /* Trace objects referenced from global environment */ + trace_roots(); - /* do some copying -- this is where most of the work is done */ - copy_reachable_objects(scan,¤t_gc->newspace->here); + /* Trace objects referenced from stacks, unless we're doing + save-image-and-exit in which case stack objects are irrelevant */ + if(trace_contexts_) trace_contexts(); - /* On minor GC, update literal references in code blocks, now that all - data heap objects are in their final location. On a major GC, - free all code blocks that did not get marked during tracing. */ - if(current_gc->collecting_tenured_p()) - free_unmarked_code_blocks(); - else - update_dirty_code_blocks(); + /* Trace objects referenced from older generations */ + trace_cards(); - /* GC completed without any generations filling up; finish up */ - break; - } - catch(const generation_full_condition &c) - { - /* We come back here if a generation is full */ + /* On minor GC, trace code heap roots if it has pointers + to this generation or younger. Otherwise, tracing data heap objects + will mark all reachable code blocks, and we free the unmarked ones + after. */ + if(!current_gc->collecting_tenured_p() && current_gc->collecting_gen >= last_code_heap_scan) + { + update_code_heap_roots(); + } - /* We have no older generations we can try collecting, so we - resort to growing the data heap */ - if(current_gc->collecting_tenured_p()) - { - current_gc->growing_data_heap = true; + /* do some copying -- this is where most of the work is done */ + copy_reachable_objects(scan,¤t_gc->newspace->here); - /* see the comment in unmark_marked() */ - code->unmark_marked(); - } - /* we try collecting aging space twice before going on to - collect tenured */ - else if(data->have_aging_p() - && current_gc->collecting_gen == data->aging() - && !current_gc->collecting_aging_again) - { - current_gc->collecting_aging_again = true; - } - /* Collect the next oldest generation */ - else - { - current_gc->collecting_gen++; - } - } - } + /* On minor GC, update literal references in code blocks, now that all + data heap objects are in their final location. On a major GC, + free all code blocks that did not get marked during tracing. */ + if(current_gc->collecting_tenured_p()) + free_unmarked_code_blocks(); + else + update_dirty_code_blocks(); + /* GC completed without any generations filling up; finish up */ end_gc(); delete current_gc; diff --git a/vm/data_gc.hpp b/vm/data_gc.hpp index 3b80fa781a..0f098b877d 100755 --- a/vm/data_gc.hpp +++ b/vm/data_gc.hpp @@ -34,6 +34,8 @@ struct gc_state { /* GC start time, for benchmarking */ u64 start_time; + jmp_buf gc_unwind; + explicit gc_state(data_heap *data_, bool growing_data_heap_, cell collecting_gen_); ~gc_state(); diff --git a/vm/generic_arrays.hpp b/vm/generic_arrays.hpp old mode 100644 new mode 100755 index f028089964..d54fbaf468 --- a/vm/generic_arrays.hpp +++ b/vm/generic_arrays.hpp @@ -4,7 +4,7 @@ namespace factor template cell array_capacity(Array *array) { #ifdef FACTOR_DEBUG - assert(array->h.hi_tag() == T::type_number); + assert(array->h.hi_tag() == Array::type_number); #endif return array->capacity >> TAG_BITS; } diff --git a/vm/os-windows-nt.cpp b/vm/os-windows-nt.cpp index f3db47d32c..e8f3e1ec98 100755 --- a/vm/os-windows-nt.cpp +++ b/vm/os-windows-nt.cpp @@ -46,11 +46,12 @@ LONG factor_vm::exception_handler(PEXCEPTION_POINTERS pe) else signal_callstack_top = NULL; - switch (e->ExceptionCode) { - case EXCEPTION_ACCESS_VIOLATION: + switch (e->ExceptionCode) + { + case EXCEPTION_ACCESS_VIOLATION: signal_fault_addr = e->ExceptionInformation[1]; c->EIP = (cell)factor::memory_signal_handler_impl; - break; + break; case STATUS_FLOAT_DENORMAL_OPERAND: case STATUS_FLOAT_DIVIDE_BY_ZERO: diff --git a/vm/quotations.cpp b/vm/quotations.cpp index c1ab60b43d..f5eaffa163 100755 --- a/vm/quotations.cpp +++ b/vm/quotations.cpp @@ -330,6 +330,7 @@ void factor_vm::compile_all_words() } + printf("done\n"); /* Update XTs in code heap */ word_updater updater(this); iterate_code_heap(updater); diff --git a/vm/tuples.cpp b/vm/tuples.cpp old mode 100644 new mode 100755 diff --git a/vm/vm.cpp b/vm/vm.cpp new file mode 100755 index 0000000000..a6731a3225 --- /dev/null +++ b/vm/vm.cpp @@ -0,0 +1,11 @@ +#include "master.hpp" + +namespace factor +{ + +factor_vm::factor_vm() +{ + memset(this,0,sizeof(factor_vm)); +} + +} diff --git a/vm/vm.hpp b/vm/vm.hpp old mode 100644 new mode 100755 index 09eda62f54..d7099bfc37 --- a/vm/vm.hpp +++ b/vm/vm.hpp @@ -1,10 +1,10 @@ namespace factor { -struct factor_vm +struct factor_vm { // First five fields accessed directly by assembler. See vm.factor - context *stack_chain; + context *stack_chain; zone nursery; /* new objects are allocated here */ cell cards_offset; cell decks_offset; @@ -101,20 +101,20 @@ struct factor_vm bignum *bignum_multiply_unsigned_small_factor(bignum * x, bignum_digit_type y,int negative_p); void bignum_destructive_add(bignum * bignum, bignum_digit_type n); void bignum_destructive_scale_up(bignum * bignum, bignum_digit_type factor); - void bignum_divide_unsigned_large_denominator(bignum * numerator, bignum * denominator, + void bignum_divide_unsigned_large_denominator(bignum * numerator, bignum * denominator, bignum * * quotient, bignum * * remainder, int q_negative_p, int r_negative_p); void bignum_divide_unsigned_normalized(bignum * u, bignum * v, bignum * q); - bignum_digit_type bignum_divide_subtract(bignum_digit_type * v_start, bignum_digit_type * v_end, + bignum_digit_type bignum_divide_subtract(bignum_digit_type * v_start, bignum_digit_type * v_end, bignum_digit_type guess, bignum_digit_type * u_start); - void bignum_divide_unsigned_medium_denominator(bignum * numerator,bignum_digit_type denominator, + void bignum_divide_unsigned_medium_denominator(bignum * numerator,bignum_digit_type denominator, bignum * * quotient, bignum * * remainder,int q_negative_p, int r_negative_p); void bignum_destructive_normalization(bignum * source, bignum * target, int shift_left); void bignum_destructive_unnormalization(bignum * bignum, int shift_right); - bignum_digit_type bignum_digit_divide(bignum_digit_type uh, bignum_digit_type ul, + bignum_digit_type bignum_digit_divide(bignum_digit_type uh, bignum_digit_type ul, bignum_digit_type v, bignum_digit_type * q) /* return value */; - bignum_digit_type bignum_digit_divide_subtract(bignum_digit_type v1, bignum_digit_type v2, + bignum_digit_type bignum_digit_divide_subtract(bignum_digit_type v1, bignum_digit_type v2, bignum_digit_type guess, bignum_digit_type * u); - void bignum_divide_unsigned_small_denominator(bignum * numerator, bignum_digit_type denominator, + void bignum_divide_unsigned_small_denominator(bignum * numerator, bignum_digit_type denominator, bignum * * quotient, bignum * * remainder,int q_negative_p, int r_negative_p); bignum_digit_type bignum_destructive_scale_down(bignum * bignum, bignum_digit_type denominator); bignum * bignum_remainder_unsigned_small_denominator(bignum * n, bignum_digit_type d, int negative_p); @@ -172,7 +172,7 @@ struct factor_vm template void each_object(Iterator &iterator); cell find_all_words(); cell object_size(cell tagged); - + //write barrier cell allot_markers_offset; @@ -185,27 +185,27 @@ struct factor_vm { return ((cell)c - cards_offset) << card_bits; } - + inline cell card_offset(card *c) { return *(c - (cell)data->cards + (cell)data->allot_markers); } - + inline card_deck *addr_to_deck(cell a) { return (card_deck *)(((cell)a >> deck_bits) + decks_offset); } - + inline cell deck_to_addr(card_deck *c) { return ((cell)c - decks_offset) << deck_bits; } - + inline card *deck_to_card(card_deck *d) { return (card *)((((cell)d - decks_offset) << (deck_bits - card_bits)) + cards_offset); } - + inline card *addr_to_allot_marker(object *a) { return (card *)(((cell)a >> card_bits) + allot_markers_offset); @@ -397,7 +397,7 @@ struct factor_vm //math cell bignum_zero; cell bignum_pos_one; - cell bignum_neg_one; + cell bignum_neg_one; void primitive_bignum_to_fixnum(); void primitive_float_to_fixnum(); @@ -484,7 +484,7 @@ struct factor_vm inline double fixnum_to_float(cell tagged); template Type *untag_check(cell value); template Type *untag(cell value); - + //io void init_c_io(); void io_error(); @@ -535,7 +535,6 @@ struct factor_vm //code_heap heap *code; unordered_map forwarding; - typedef void (factor_vm::*code_heap_iterator)(code_block *compiled); void init_code_heap(cell size); bool in_code_heap_p(cell ptr); @@ -554,7 +553,7 @@ struct factor_vm template void iterate_code_heap(Iterator &iter) { heap_block *scan = code->first_block(); - + while(scan) { if(scan->status != B_FREE) @@ -606,7 +605,7 @@ struct factor_vm void primitive_set_innermost_stack_frame_quot(); void save_callstack_bottom(stack_frame *callstack_bottom); template void iterate_callstack(cell top, cell bottom, Iterator &iterator); - + /* Every object has a regular representation in the runtime, which makes GC much simpler. Every slot of the object until binary_payload_start is a pointer to some other object. */ @@ -615,9 +614,9 @@ struct factor_vm cell scan = obj; cell payload_start = binary_payload_start((object *)obj); cell end = obj + payload_start; - + scan += sizeof(cell); - + while(scan < end) { iter((cell *)scan); @@ -725,11 +724,11 @@ struct factor_vm const vm_char *default_image_path(); void windows_image_path(vm_char *full_path, vm_char *temp_path, unsigned int length); bool windows_stat(vm_char *path); - + #if defined(WINNT) void open_console(); LONG exception_handler(PEXCEPTION_POINTERS pe); - // next method here: + // next method here: #endif #else // UNIX void memory_signal_handler(int signal, siginfo_t *siginfo, void *uap); @@ -742,59 +741,50 @@ struct factor_vm #ifdef __APPLE__ void call_fault_handler(exception_type_t exception, exception_data_type_t code, MACH_EXC_STATE_TYPE *exc_state, MACH_THREAD_STATE_TYPE *thread_state, MACH_FLOAT_STATE_TYPE *float_state); #endif - - factor_vm() - : profiling_p(false), - secure_gc(false), - gc_off(false), - fep_disabled(false), - full_output(false), - max_pic_size(0) - { - memset(this,0,sizeof(this)); // just to make sure - } + + factor_vm(); }; #ifndef FACTOR_REENTRANT - #define FACTOR_SINGLE_THREADED_TESTING + #define FACTOR_SINGLE_THREADED_TESTING #endif #ifdef FACTOR_SINGLE_THREADED_SINGLETON /* calls are dispatched using the singleton vm ptr */ - extern factor_vm *vm; - #define PRIMITIVE_GETVM() vm - #define PRIMITIVE_OVERFLOW_GETVM() vm - #define VM_PTR vm - #define ASSERTVM() - #define SIGNAL_VM_PTR() vm + extern factor_vm *vm; + #define PRIMITIVE_GETVM() vm + #define PRIMITIVE_OVERFLOW_GETVM() vm + #define VM_PTR vm + #define ASSERTVM() + #define SIGNAL_VM_PTR() vm #endif #ifdef FACTOR_SINGLE_THREADED_TESTING /* calls are dispatched as per multithreaded, but checked against singleton */ - extern factor_vm *vm; - #define ASSERTVM() assert(vm==myvm) - #define PRIMITIVE_GETVM() ((factor_vm*)myvm) - #define PRIMITIVE_OVERFLOW_GETVM() ASSERTVM(); myvm - #define VM_PTR myvm - #define SIGNAL_VM_PTR() tls_vm() + extern factor_vm *vm; + #define ASSERTVM() assert(vm==myvm) + #define PRIMITIVE_GETVM() ((factor_vm*)myvm) + #define PRIMITIVE_OVERFLOW_GETVM() ASSERTVM(); myvm + #define VM_PTR myvm + #define SIGNAL_VM_PTR() tls_vm() #endif #ifdef FACTOR_REENTRANT_TLS /* uses thread local storage to obtain vm ptr */ - #define PRIMITIVE_GETVM() tls_vm() - #define PRIMITIVE_OVERFLOW_GETVM() tls_vm() - #define VM_PTR tls_vm() - #define ASSERTVM() - #define SIGNAL_VM_PTR() tls_vm() + #define PRIMITIVE_GETVM() tls_vm() + #define PRIMITIVE_OVERFLOW_GETVM() tls_vm() + #define VM_PTR tls_vm() + #define ASSERTVM() + #define SIGNAL_VM_PTR() tls_vm() #endif #ifdef FACTOR_REENTRANT - #define PRIMITIVE_GETVM() ((factor_vm*)myvm) - #define PRIMITIVE_OVERFLOW_GETVM() ((factor_vm*)myvm) - #define VM_PTR myvm - #define ASSERTVM() - #define SIGNAL_VM_PTR() tls_vm() + #define PRIMITIVE_GETVM() ((factor_vm*)myvm) + #define PRIMITIVE_OVERFLOW_GETVM() ((factor_vm*)myvm) + #define VM_PTR myvm + #define ASSERTVM() + #define SIGNAL_VM_PTR() tls_vm() #endif extern unordered_map thread_vms; From 95655c26b46e7a55714612a11f1eaac5e9d9d774 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Mon, 5 Oct 2009 03:28:36 -0500 Subject: [PATCH 4/7] vm: remove debug message --- vm/quotations.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/vm/quotations.cpp b/vm/quotations.cpp index f5eaffa163..c1ab60b43d 100755 --- a/vm/quotations.cpp +++ b/vm/quotations.cpp @@ -330,7 +330,6 @@ void factor_vm::compile_all_words() } - printf("done\n"); /* Update XTs in code heap */ word_updater updater(this); iterate_code_heap(updater); From 718cb8b5792ac6b74541039df149262a9586699c Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Mon, 5 Oct 2009 04:31:35 -0500 Subject: [PATCH 5/7] vm: fix VM constructor --- vm/factor.cpp | 2 +- vm/vm.cpp | 5 +---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/vm/factor.cpp b/vm/factor.cpp index 41a2da6f03..77c2e67148 100755 --- a/vm/factor.cpp +++ b/vm/factor.cpp @@ -224,7 +224,7 @@ struct startargs { factor_vm *new_factor_vm() { - factor_vm *newvm = new factor_vm; + factor_vm *newvm = new factor_vm(); register_vm_with_thread(newvm); thread_vms[thread_id()] = newvm; diff --git a/vm/vm.cpp b/vm/vm.cpp index a6731a3225..45cf05075c 100755 --- a/vm/vm.cpp +++ b/vm/vm.cpp @@ -3,9 +3,6 @@ namespace factor { -factor_vm::factor_vm() -{ - memset(this,0,sizeof(factor_vm)); -} +factor_vm::factor_vm() { } } From b4e36608da2f842c9910b018d5a86033dbe9e93d Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Mon, 5 Oct 2009 05:27:49 -0500 Subject: [PATCH 6/7] compiler.cfg: remove _gc instruction, it doesn't need to exist, and change GC checks to ensure that the right amount of space is available instead of blindly checking for 1Kb --- .../build-stack-frame.factor | 2 +- basis/compiler/cfg/gc-checks/gc-checks.factor | 21 ++++++++++++++++--- .../cfg/instructions/instructions.factor | 6 +----- .../cfg/intrinsics/allot/allot.factor | 5 ++++- .../linear-scan/assignment/assignment.factor | 10 +++++---- .../cfg/linearization/linearization.factor | 15 ------------- .../stacks/uninitialized/uninitialized.factor | 2 +- basis/compiler/codegen/codegen.factor | 4 ++-- basis/cpu/architecture/architecture.factor | 2 +- basis/cpu/ppc/ppc.factor | 5 ++--- basis/cpu/x86/x86.factor | 4 ++-- vm/data_gc.cpp | 4 ++-- vm/data_gc.hpp | 5 ----- 13 files changed, 40 insertions(+), 45 deletions(-) diff --git a/basis/compiler/cfg/build-stack-frame/build-stack-frame.factor b/basis/compiler/cfg/build-stack-frame/build-stack-frame.factor index 90992fcc96..b5510c7142 100644 --- a/basis/compiler/cfg/build-stack-frame/build-stack-frame.factor +++ b/basis/compiler/cfg/build-stack-frame/build-stack-frame.factor @@ -25,7 +25,7 @@ M: stack-frame-insn compute-stack-frame* M: ##call compute-stack-frame* word>> sub-primitive>> [ frame-required? on ] unless ; -M: _gc compute-stack-frame* +M: ##gc compute-stack-frame* frame-required? on stack-frame new swap tagged-values>> length cells >>gc-root-size request-stack-frame ; diff --git a/basis/compiler/cfg/gc-checks/gc-checks.factor b/basis/compiler/cfg/gc-checks/gc-checks.factor index 21a60768ea..5d3c79e40f 100644 --- a/basis/compiler/cfg/gc-checks/gc-checks.factor +++ b/basis/compiler/cfg/gc-checks/gc-checks.factor @@ -1,7 +1,7 @@ ! Copyright (C) 2009 Slava Pestov. ! See http://factorcode.org/license.txt for BSD license. USING: accessors kernel sequences assocs fry -cpu.architecture +cpu.architecture layouts compiler.cfg.rpo compiler.cfg.registers compiler.cfg.instructions @@ -17,11 +17,26 @@ IN: compiler.cfg.gc-checks : blocks-with-gc ( cfg -- bbs ) post-order [ insert-gc-check? ] filter ; +GENERIC: allocation-size* ( insn -- n ) + +M: ##allot allocation-size* size>> ; + +M: ##box-alien allocation-size* drop 4 cells ; + +M: ##box-displaced-alien allocation-size* drop 4 cells ; + +: allocation-size ( bb -- n ) + instructions>> [ ##allocation? ] filter [ allocation-size* ] sigma ; + : insert-gc-check ( bb -- ) - dup '[ + dup dup '[ int-rep next-vreg-rep int-rep next-vreg-rep - f f _ uninitialized-locs \ ##gc new-insn + _ allocation-size + f + f + _ uninitialized-locs + \ ##gc new-insn prefix ] change-instructions drop ; diff --git a/basis/compiler/cfg/instructions/instructions.factor b/basis/compiler/cfg/instructions/instructions.factor index b6881b61b6..6b61cb53cb 100644 --- a/basis/compiler/cfg/instructions/instructions.factor +++ b/basis/compiler/cfg/instructions/instructions.factor @@ -672,7 +672,7 @@ use: src1/int-rep src2/int-rep ; INSN: ##gc temp: temp1/int-rep temp2/int-rep -literal: data-values tagged-values uninitialized-locs ; +literal: size data-values tagged-values uninitialized-locs ; INSN: ##save-context temp: temp1/int-rep temp2/int-rep @@ -740,10 +740,6 @@ use: src1/int-rep src2/int-rep ; TUPLE: spill-slot { n integer } ; C: spill-slot -INSN: _gc -temp: temp1 temp2 -literal: data-values tagged-values uninitialized-locs ; - ! These instructions operate on machine registers and not ! virtual registers INSN: _spill diff --git a/basis/compiler/cfg/intrinsics/allot/allot.factor b/basis/compiler/cfg/intrinsics/allot/allot.factor index 6ad5450bfc..8283299ea8 100644 --- a/basis/compiler/cfg/intrinsics/allot/allot.factor +++ b/basis/compiler/cfg/intrinsics/allot/allot.factor @@ -55,6 +55,9 @@ IN: compiler.cfg.intrinsics.allot ] [ node emit-primitive ] if ] ; +: expand-(byte-array)? ( obj -- ? ) + dup integer? [ 0 1024 between? ] [ drop f ] if ; + : expand-? ( obj -- ? ) dup integer? [ 0 32 between? ] [ drop f ] if ; @@ -69,7 +72,7 @@ IN: compiler.cfg.intrinsics.allot [ byte-array store-length ] [ ds-push ] [ ] tri ; : emit-(byte-array) ( node -- ) - dup node-input-infos first literal>> dup expand-? + dup node-input-infos first literal>> dup expand-(byte-array)? [ nip emit-allot-byte-array drop ] [ drop emit-primitive ] if ; :: emit- ( node -- ) diff --git a/basis/compiler/cfg/linear-scan/assignment/assignment.factor b/basis/compiler/cfg/linear-scan/assignment/assignment.factor index 8959add822..f69db1deea 100644 --- a/basis/compiler/cfg/linear-scan/assignment/assignment.factor +++ b/basis/compiler/cfg/linear-scan/assignment/assignment.factor @@ -2,7 +2,7 @@ ! See http://factorcode.org/license.txt for BSD license. USING: accessors kernel math assocs namespaces sequences heaps fry make combinators sets locals arrays -cpu.architecture +cpu.architecture layouts compiler.cfg compiler.cfg.def-use compiler.cfg.liveness @@ -117,8 +117,6 @@ RENAMING: assign [ vreg>reg ] [ vreg>reg ] [ vreg>reg ] M: vreg-insn assign-registers-in-insn [ assign-insn-defs ] [ assign-insn-uses ] [ assign-insn-temps ] tri ; -! TODO: needs tagged-rep - : trace-on-gc ( assoc -- assoc' ) ! When a GC occurs, virtual registers which contain tagged data ! are traced by the GC. Outputs a sequence physical registers. @@ -141,12 +139,16 @@ M: vreg-insn assign-registers-in-insn ] assoc-each ] { } make ; +: gc-root-offsets ( registers -- alist ) + ! Outputs a sequence of { offset register/spill-slot } pairs + [ length iota [ cell * ] map ] keep zip ; + M: ##gc assign-registers-in-insn ! Since ##gc is always the first instruction in a block, the set of ! values live at the ##gc is just live-in. dup call-next-method basic-block get register-live-ins get at - [ trace-on-gc >>tagged-values ] [ spill-on-gc >>data-values ] bi + [ trace-on-gc gc-root-offsets >>tagged-values ] [ spill-on-gc >>data-values ] bi drop ; M: insn assign-registers-in-insn drop ; diff --git a/basis/compiler/cfg/linearization/linearization.factor b/basis/compiler/cfg/linearization/linearization.factor index 31a4247206..34ae7f8cc6 100755 --- a/basis/compiler/cfg/linearization/linearization.factor +++ b/basis/compiler/cfg/linearization/linearization.factor @@ -97,21 +97,6 @@ M: ##dispatch linearize-insn [ successors>> [ block-number _dispatch-label ] each ] bi* ; -: gc-root-offsets ( registers -- alist ) - ! Outputs a sequence of { offset register/spill-slot } pairs - [ length iota [ cell * ] map ] keep zip ; - -M: ##gc linearize-insn - nip - { - [ temp1>> ] - [ temp2>> ] - [ data-values>> ] - [ tagged-values>> gc-root-offsets ] - [ uninitialized-locs>> ] - } cleave - _gc ; - : linearize-basic-blocks ( cfg -- insns ) [ [ diff --git a/basis/compiler/cfg/stacks/uninitialized/uninitialized.factor b/basis/compiler/cfg/stacks/uninitialized/uninitialized.factor index ce0e98de5f..0bed759e52 100644 --- a/basis/compiler/cfg/stacks/uninitialized/uninitialized.factor +++ b/basis/compiler/cfg/stacks/uninitialized/uninitialized.factor @@ -9,7 +9,7 @@ IN: compiler.cfg.stacks.uninitialized ! Consider the following sequence of instructions: ! ##inc-d 2 -! _gc +! ##gc ! ##replace ... D 0 ! ##replace ... D 1 ! The GC check runs before stack locations 0 and 1 have been initialized, diff --git a/basis/compiler/codegen/codegen.factor b/basis/compiler/codegen/codegen.factor index 05f50771f6..121f09a5a8 100755 --- a/basis/compiler/codegen/codegen.factor +++ b/basis/compiler/codegen/codegen.factor @@ -271,10 +271,10 @@ M: object load-gc-root drop %load-gc-root ; : load-data-regs ( data-regs -- ) [ first3 %reload ] each ; -M: _gc generate-insn +M: ##gc generate-insn "no-gc" define-label { - [ [ "no-gc" get ] dip [ temp1>> ] [ temp2>> ] bi %check-nursery ] + [ [ "no-gc" get ] dip [ size>> ] [ temp1>> ] [ temp2>> ] tri %check-nursery ] [ [ uninitialized-locs>> ] [ temp1>> ] bi wipe-locs ] [ data-values>> save-data-regs ] [ [ tagged-values>> ] [ temp1>> ] bi save-gc-roots ] diff --git a/basis/cpu/architecture/architecture.factor b/basis/cpu/architecture/architecture.factor index 0fb69120da..5ce16ad731 100644 --- a/basis/cpu/architecture/architecture.factor +++ b/basis/cpu/architecture/architecture.factor @@ -317,7 +317,7 @@ HOOK: %allot cpu ( dst size class temp -- ) HOOK: %write-barrier cpu ( src card# table -- ) ! GC checks -HOOK: %check-nursery cpu ( label temp1 temp2 -- ) +HOOK: %check-nursery cpu ( label size temp1 temp2 -- ) HOOK: %save-gc-root cpu ( gc-root register -- ) HOOK: %load-gc-root cpu ( gc-root register -- ) HOOK: %call-gc cpu ( gc-root-count temp1 -- ) diff --git a/basis/cpu/ppc/ppc.factor b/basis/cpu/ppc/ppc.factor index f604efe64d..9394e864f0 100644 --- a/basis/cpu/ppc/ppc.factor +++ b/basis/cpu/ppc/ppc.factor @@ -446,12 +446,11 @@ M:: ppc %write-barrier ( src card# table -- ) src card# deck-bits SRWI table scratch-reg card# STBX ; -M:: ppc %check-nursery ( label temp1 temp2 -- ) +M:: ppc %check-nursery ( label size temp1 temp2 -- ) temp2 load-zone-ptr temp1 temp2 cell LWZ temp2 temp2 3 cells LWZ - ! add ALLOT_BUFFER_ZONE to here - temp1 temp1 1024 ADDI + temp1 temp1 size ADDI ! is here >= end? temp1 0 temp2 CMP label BLE ; diff --git a/basis/cpu/x86/x86.factor b/basis/cpu/x86/x86.factor index 7c025707fc..fa5d99101b 100644 --- a/basis/cpu/x86/x86.factor +++ b/basis/cpu/x86/x86.factor @@ -410,10 +410,10 @@ M:: x86 %write-barrier ( src card# table -- ) table table [] MOV table card# [+] card-mark MOV ; -M:: x86 %check-nursery ( label temp1 temp2 -- ) +M:: x86 %check-nursery ( label size temp1 temp2 -- ) temp1 load-zone-ptr temp2 temp1 cell [+] MOV - temp2 1024 ADD + temp2 size ADD temp1 temp1 3 cells [+] MOV temp2 temp1 CMP label JLE ; diff --git a/vm/data_gc.cpp b/vm/data_gc.cpp index 679eec01f7..60195f89d5 100755 --- a/vm/data_gc.cpp +++ b/vm/data_gc.cpp @@ -738,10 +738,10 @@ object *factor_vm::allot_object(header header, cell size) object *obj; - if(nursery.size - allot_buffer_zone > size) + if(nursery.size > size) { /* If there is insufficient room, collect the nursery */ - if(nursery.here + allot_buffer_zone + size > nursery.end) + if(nursery.here + size > nursery.end) garbage_collection(data->nursery(),false,true,0); cell h = nursery.here; diff --git a/vm/data_gc.hpp b/vm/data_gc.hpp index 0f098b877d..2a74568b66 100755 --- a/vm/data_gc.hpp +++ b/vm/data_gc.hpp @@ -58,11 +58,6 @@ struct gc_state { } }; -/* We leave this many bytes free at the top of the nursery so that inline -allocation (which does not call GC because of possible roots in volatile -registers) does not run out of memory */ -static const cell allot_buffer_zone = 1024; - VM_C_API void inline_gc(cell *gc_roots_base, cell gc_roots_size, factor_vm *myvm); } From 61e3ba77d84f7f7d8f8eb30ce54ffed0c1dd5615 Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Mon, 5 Oct 2009 05:29:30 -0500 Subject: [PATCH 7/7] math.matrices.simd: add an inline declaration --- extra/math/matrices/simd/simd.factor | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extra/math/matrices/simd/simd.factor b/extra/math/matrices/simd/simd.factor index cac77bf8b0..1fdb4f6d56 100644 --- a/extra/math/matrices/simd/simd.factor +++ b/extra/math/matrices/simd/simd.factor @@ -135,7 +135,7 @@ TYPED: transpose-matrix4 ( matrix: matrix4 -- matrix: matrix4 ) : scale-matrix4 ( factors -- matrix ) [ float-4{ t t t f } ] dip float-4{ 0.0 0.0 0.0 1.0 } v? - diagonal-matrix4 ; + diagonal-matrix4 ; inline : ortho-matrix4 ( factors -- matrix ) float-4{ 1.0 1.0 1.0 1.0 } swap v/ scale-matrix4 ; inline