From 22c717616c924ed2281d372d1b527575685482fc Mon Sep 17 00:00:00 2001 From: Slava Pestov Date: Thu, 5 Nov 2009 19:29:27 -0600 Subject: [PATCH] vm: speed up some bit twiddling on 32-bit --- basis/vm/vm.factor | 2 +- vm/bitwise_hacks.hpp | 59 +++++++++++++++++++---------------------- vm/mark_bits.hpp | 40 +++++++++++++++------------- vm/object_start_map.cpp | 7 ++++- 4 files changed, 55 insertions(+), 53 deletions(-) diff --git a/basis/vm/vm.factor b/basis/vm/vm.factor index ba057edffa..86ff4497b8 100644 --- a/basis/vm/vm.factor +++ b/basis/vm/vm.factor @@ -3,7 +3,7 @@ USING: classes.struct alien.c-types alien.syntax ; IN: vm -TYPEDEF: intptr_t cell +TYPEDEF: uintptr_t cell C-TYPE: context STRUCT: zone diff --git a/vm/bitwise_hacks.hpp b/vm/bitwise_hacks.hpp index dc685bb28c..8830e4f876 100644 --- a/vm/bitwise_hacks.hpp +++ b/vm/bitwise_hacks.hpp @@ -3,65 +3,60 @@ namespace factor /* These algorithms were snarfed from various places. I did not come up with them myself */ -inline cell popcount(u64 x) +inline cell popcount(cell x) { +#ifdef FACTOR_64 u64 k1 = 0x5555555555555555ll; u64 k2 = 0x3333333333333333ll; u64 k4 = 0x0f0f0f0f0f0f0f0fll; u64 kf = 0x0101010101010101ll; + cell ks = 56; +#else + u32 k1 = 0x55555555; + u32 k2 = 0x33333333; + u32 k4 = 0xf0f0f0f; + u32 kf = 0x1010101; + cell ks = 24; +#endif + x = x - ((x >> 1) & k1); // put count of each 2 bits into those 2 bits x = (x & k2) + ((x >> 2) & k2); // put count of each 4 bits into those 4 bits x = (x + (x >> 4)) & k4 ; // put count of each 8 bits into those 8 bits - x = (x * kf) >> 56; // returns 8 most significant bits of x + (x<<8) + (x<<16) + (x<<24) + ... + x = (x * kf) >> ks; // returns 8 most significant bits of x + (x<<8) + (x<<16) + (x<<24) + ... return (cell)x; } -inline cell log2(u64 x) +inline cell log2(cell x) { -#ifdef FACTOR_AMD64 +#if defined(FACTOR_X86) cell n; - asm ("bsr %1, %0;":"=r"(n):"r"((cell)x)); + asm ("bsr %1, %0;":"=r"(n):"r"(x)); +#elif defined(FACTOR_AMD64) + cell n; + asm ("bsr %1, %0;":"=r"(n):"r"(x)); #else cell n = 0; +#ifdef FACTOR_64 if (x >= (u64)1 << 32) { x >>= 32; n += 32; } - if (x >= (u64)1 << 16) { x >>= 16; n += 16; } - if (x >= (u64)1 << 8) { x >>= 8; n += 8; } - if (x >= (u64)1 << 4) { x >>= 4; n += 4; } - if (x >= (u64)1 << 2) { x >>= 2; n += 2; } - if (x >= (u64)1 << 1) { n += 1; } +#endif + if (x >= (u32)1 << 16) { x >>= 16; n += 16; } + if (x >= (u32)1 << 8) { x >>= 8; n += 8; } + if (x >= (u32)1 << 4) { x >>= 4; n += 4; } + if (x >= (u32)1 << 2) { x >>= 2; n += 2; } + if (x >= (u32)1 << 1) { n += 1; } #endif return n; } -inline cell log2(u16 x) -{ -#if defined(FACTOR_X86) || defined(FACTOR_AMD64) - cell n; - asm ("bsr %1, %0;":"=r"(n):"r"((cell)x)); -#else - cell n = 0; - if (x >= 1 << 8) { x >>= 8; n += 8; } - if (x >= 1 << 4) { x >>= 4; n += 4; } - if (x >= 1 << 2) { x >>= 2; n += 2; } - if (x >= 1 << 1) { n += 1; } -#endif - return n; -} - -inline cell rightmost_clear_bit(u64 x) +inline cell rightmost_clear_bit(cell x) { return log2(~x & (x + 1)); } -inline cell rightmost_set_bit(u64 x) +inline cell rightmost_set_bit(cell x) { return log2(x & -x); } -inline cell rightmost_set_bit(u16 x) -{ - return log2((u16)(x & -x)); -} - } diff --git a/vm/mark_bits.hpp b/vm/mark_bits.hpp index b54a2c9d46..d4b1dcda8d 100644 --- a/vm/mark_bits.hpp +++ b/vm/mark_bits.hpp @@ -2,18 +2,19 @@ namespace factor { const int block_granularity = 16; -const int forwarding_granularity = 64; +const int mark_bits_granularity = sizeof(cell) * 8; +const int mark_bits_mask = sizeof(cell) * 8 - 1; template struct mark_bits { cell size; cell start; cell bits_size; - u64 *marked; + cell *marked; cell *forwarding; void clear_mark_bits() { - memset(marked,0,bits_size * sizeof(u64)); + memset(marked,0,bits_size * sizeof(cell)); } void clear_forwarding() @@ -24,8 +25,8 @@ template struct mark_bits { explicit mark_bits(cell size_, cell start_) : size(size_), start(start_), - bits_size(size / block_granularity / forwarding_granularity), - marked(new u64[bits_size]), + bits_size(size / block_granularity / mark_bits_granularity), + marked(new cell[bits_size]), forwarding(new cell[bits_size]) { clear_mark_bits(); @@ -53,15 +54,15 @@ template struct mark_bits { std::pair bitmap_deref(Block *address) { cell line_number = block_line(address); - cell word_index = (line_number >> 6); - cell word_shift = (line_number & 63); + cell word_index = (line_number / mark_bits_granularity); + cell word_shift = (line_number & mark_bits_mask); return std::make_pair(word_index,word_shift); } - bool bitmap_elt(u64 *bits, Block *address) + bool bitmap_elt(cell *bits, Block *address) { std::pair position = bitmap_deref(address); - return (bits[position.first] & ((u64)1 << position.second)) != 0; + return (bits[position.first] & ((cell)1 << position.second)) != 0; } Block *next_block_after(Block *block) @@ -69,13 +70,13 @@ template struct mark_bits { return (Block *)((cell)block + block->size()); } - void set_bitmap_range(u64 *bits, Block *address) + void set_bitmap_range(cell *bits, Block *address) { std::pair start = bitmap_deref(address); std::pair end = bitmap_deref(next_block_after(address)); - u64 start_mask = ((u64)1 << start.second) - 1; - u64 end_mask = ((u64)1 << end.second) - 1; + cell start_mask = ((cell)1 << start.second) - 1; + cell end_mask = ((cell)1 << end.second) - 1; if(start.first == end.first) bits[start.first] |= start_mask ^ end_mask; @@ -87,7 +88,7 @@ template struct mark_bits { bits[start.first] |= ~start_mask; for(cell index = start.first + 1; index < end.first; index++) - bits[index] = (u64)-1; + bits[index] = (cell)-1; if(end_mask != 0) { @@ -121,7 +122,8 @@ template struct mark_bits { } } - /* We have the popcount for every 64 entries; look up and compute the rest */ + /* We have the popcount for every mark_bits_granularity entries; look + up and compute the rest */ Block *forward_block(Block *original) { #ifdef FACTOR_DEBUG @@ -130,7 +132,7 @@ template struct mark_bits { std::pair position = bitmap_deref(original); cell approx_popcount = forwarding[position.first]; - u64 mask = ((u64)1 << position.second) - 1; + cell mask = ((cell)1 << position.second) - 1; cell new_line_number = approx_popcount + popcount(marked[position.first] & mask); Block *new_block = line_block(new_line_number); @@ -147,13 +149,13 @@ template struct mark_bits { for(cell index = position.first; index < bits_size; index++) { - u64 mask = ((s64)marked[index] >> bit_index); + cell mask = ((fixnum)marked[index] >> bit_index); if(~mask) { /* Found an unmarked block on this page. Stop, it's hammer time */ cell clear_bit = rightmost_clear_bit(mask); - return line_block(index * 64 + bit_index + clear_bit); + return line_block(index * mark_bits_granularity + bit_index + clear_bit); } else { @@ -174,13 +176,13 @@ template struct mark_bits { for(cell index = position.first; index < bits_size; index++) { - u64 mask = (marked[index] >> bit_index); + cell mask = (marked[index] >> bit_index); if(mask) { /* Found an marked block on this page. Stop, it's hammer time */ cell set_bit = rightmost_set_bit(mask); - return line_block(index * 64 + bit_index + set_bit); + return line_block(index * mark_bits_granularity + bit_index + set_bit); } else { diff --git a/vm/object_start_map.cpp b/vm/object_start_map.cpp index 724f365e79..3159313dd5 100644 --- a/vm/object_start_map.cpp +++ b/vm/object_start_map.cpp @@ -79,11 +79,16 @@ void object_start_map::update_for_sweep(mark_bits *state) { for(cell index = 0; index < state->bits_size; index++) { - u64 mask = state->marked[index]; + cell mask = state->marked[index]; +#ifdef FACTOR_64 update_card_for_sweep(index * 4, mask & 0xffff); update_card_for_sweep(index * 4 + 1, (mask >> 16) & 0xffff); update_card_for_sweep(index * 4 + 2, (mask >> 32) & 0xffff); update_card_for_sweep(index * 4 + 3, (mask >> 48) & 0xffff); +#else + update_card_for_sweep(index * 2, mask & 0xffff); + update_card_for_sweep(index * 2 + 1, (mask >> 16) & 0xffff); +#endif } }