vm: speed up some bit twiddling on 32-bit

db4
Slava Pestov 2009-11-05 19:29:27 -06:00
parent 72ab6ec548
commit 22c717616c
4 changed files with 55 additions and 53 deletions

View File

@ -3,7 +3,7 @@
USING: classes.struct alien.c-types alien.syntax ; USING: classes.struct alien.c-types alien.syntax ;
IN: vm IN: vm
TYPEDEF: intptr_t cell TYPEDEF: uintptr_t cell
C-TYPE: context C-TYPE: context
STRUCT: zone STRUCT: zone

View File

@ -3,65 +3,60 @@ namespace factor
/* These algorithms were snarfed from various places. I did not come up with them myself */ /* These algorithms were snarfed from various places. I did not come up with them myself */
inline cell popcount(u64 x) inline cell popcount(cell x)
{ {
#ifdef FACTOR_64
u64 k1 = 0x5555555555555555ll; u64 k1 = 0x5555555555555555ll;
u64 k2 = 0x3333333333333333ll; u64 k2 = 0x3333333333333333ll;
u64 k4 = 0x0f0f0f0f0f0f0f0fll; u64 k4 = 0x0f0f0f0f0f0f0f0fll;
u64 kf = 0x0101010101010101ll; u64 kf = 0x0101010101010101ll;
cell ks = 56;
#else
u32 k1 = 0x55555555;
u32 k2 = 0x33333333;
u32 k4 = 0xf0f0f0f;
u32 kf = 0x1010101;
cell ks = 24;
#endif
x = x - ((x >> 1) & k1); // put count of each 2 bits into those 2 bits x = x - ((x >> 1) & k1); // put count of each 2 bits into those 2 bits
x = (x & k2) + ((x >> 2) & k2); // put count of each 4 bits into those 4 bits x = (x & k2) + ((x >> 2) & k2); // put count of each 4 bits into those 4 bits
x = (x + (x >> 4)) & k4 ; // put count of each 8 bits into those 8 bits x = (x + (x >> 4)) & k4 ; // put count of each 8 bits into those 8 bits
x = (x * kf) >> 56; // returns 8 most significant bits of x + (x<<8) + (x<<16) + (x<<24) + ... x = (x * kf) >> ks; // returns 8 most significant bits of x + (x<<8) + (x<<16) + (x<<24) + ...
return (cell)x; return (cell)x;
} }
inline cell log2(u64 x) inline cell log2(cell x)
{ {
#ifdef FACTOR_AMD64 #if defined(FACTOR_X86)
cell n; cell n;
asm ("bsr %1, %0;":"=r"(n):"r"((cell)x)); asm ("bsr %1, %0;":"=r"(n):"r"(x));
#elif defined(FACTOR_AMD64)
cell n;
asm ("bsr %1, %0;":"=r"(n):"r"(x));
#else #else
cell n = 0; cell n = 0;
#ifdef FACTOR_64
if (x >= (u64)1 << 32) { x >>= 32; n += 32; } if (x >= (u64)1 << 32) { x >>= 32; n += 32; }
if (x >= (u64)1 << 16) { x >>= 16; n += 16; } #endif
if (x >= (u64)1 << 8) { x >>= 8; n += 8; } if (x >= (u32)1 << 16) { x >>= 16; n += 16; }
if (x >= (u64)1 << 4) { x >>= 4; n += 4; } if (x >= (u32)1 << 8) { x >>= 8; n += 8; }
if (x >= (u64)1 << 2) { x >>= 2; n += 2; } if (x >= (u32)1 << 4) { x >>= 4; n += 4; }
if (x >= (u64)1 << 1) { n += 1; } if (x >= (u32)1 << 2) { x >>= 2; n += 2; }
if (x >= (u32)1 << 1) { n += 1; }
#endif #endif
return n; return n;
} }
inline cell log2(u16 x) inline cell rightmost_clear_bit(cell x)
{
#if defined(FACTOR_X86) || defined(FACTOR_AMD64)
cell n;
asm ("bsr %1, %0;":"=r"(n):"r"((cell)x));
#else
cell n = 0;
if (x >= 1 << 8) { x >>= 8; n += 8; }
if (x >= 1 << 4) { x >>= 4; n += 4; }
if (x >= 1 << 2) { x >>= 2; n += 2; }
if (x >= 1 << 1) { n += 1; }
#endif
return n;
}
inline cell rightmost_clear_bit(u64 x)
{ {
return log2(~x & (x + 1)); return log2(~x & (x + 1));
} }
inline cell rightmost_set_bit(u64 x) inline cell rightmost_set_bit(cell x)
{ {
return log2(x & -x); return log2(x & -x);
} }
inline cell rightmost_set_bit(u16 x)
{
return log2((u16)(x & -x));
}
} }

View File

@ -2,18 +2,19 @@ namespace factor
{ {
const int block_granularity = 16; const int block_granularity = 16;
const int forwarding_granularity = 64; const int mark_bits_granularity = sizeof(cell) * 8;
const int mark_bits_mask = sizeof(cell) * 8 - 1;
template<typename Block> struct mark_bits { template<typename Block> struct mark_bits {
cell size; cell size;
cell start; cell start;
cell bits_size; cell bits_size;
u64 *marked; cell *marked;
cell *forwarding; cell *forwarding;
void clear_mark_bits() void clear_mark_bits()
{ {
memset(marked,0,bits_size * sizeof(u64)); memset(marked,0,bits_size * sizeof(cell));
} }
void clear_forwarding() void clear_forwarding()
@ -24,8 +25,8 @@ template<typename Block> struct mark_bits {
explicit mark_bits(cell size_, cell start_) : explicit mark_bits(cell size_, cell start_) :
size(size_), size(size_),
start(start_), start(start_),
bits_size(size / block_granularity / forwarding_granularity), bits_size(size / block_granularity / mark_bits_granularity),
marked(new u64[bits_size]), marked(new cell[bits_size]),
forwarding(new cell[bits_size]) forwarding(new cell[bits_size])
{ {
clear_mark_bits(); clear_mark_bits();
@ -53,15 +54,15 @@ template<typename Block> struct mark_bits {
std::pair<cell,cell> bitmap_deref(Block *address) std::pair<cell,cell> bitmap_deref(Block *address)
{ {
cell line_number = block_line(address); cell line_number = block_line(address);
cell word_index = (line_number >> 6); cell word_index = (line_number / mark_bits_granularity);
cell word_shift = (line_number & 63); cell word_shift = (line_number & mark_bits_mask);
return std::make_pair(word_index,word_shift); return std::make_pair(word_index,word_shift);
} }
bool bitmap_elt(u64 *bits, Block *address) bool bitmap_elt(cell *bits, Block *address)
{ {
std::pair<cell,cell> position = bitmap_deref(address); std::pair<cell,cell> position = bitmap_deref(address);
return (bits[position.first] & ((u64)1 << position.second)) != 0; return (bits[position.first] & ((cell)1 << position.second)) != 0;
} }
Block *next_block_after(Block *block) Block *next_block_after(Block *block)
@ -69,13 +70,13 @@ template<typename Block> struct mark_bits {
return (Block *)((cell)block + block->size()); return (Block *)((cell)block + block->size());
} }
void set_bitmap_range(u64 *bits, Block *address) void set_bitmap_range(cell *bits, Block *address)
{ {
std::pair<cell,cell> start = bitmap_deref(address); std::pair<cell,cell> start = bitmap_deref(address);
std::pair<cell,cell> end = bitmap_deref(next_block_after(address)); std::pair<cell,cell> end = bitmap_deref(next_block_after(address));
u64 start_mask = ((u64)1 << start.second) - 1; cell start_mask = ((cell)1 << start.second) - 1;
u64 end_mask = ((u64)1 << end.second) - 1; cell end_mask = ((cell)1 << end.second) - 1;
if(start.first == end.first) if(start.first == end.first)
bits[start.first] |= start_mask ^ end_mask; bits[start.first] |= start_mask ^ end_mask;
@ -87,7 +88,7 @@ template<typename Block> struct mark_bits {
bits[start.first] |= ~start_mask; bits[start.first] |= ~start_mask;
for(cell index = start.first + 1; index < end.first; index++) for(cell index = start.first + 1; index < end.first; index++)
bits[index] = (u64)-1; bits[index] = (cell)-1;
if(end_mask != 0) if(end_mask != 0)
{ {
@ -121,7 +122,8 @@ template<typename Block> struct mark_bits {
} }
} }
/* We have the popcount for every 64 entries; look up and compute the rest */ /* We have the popcount for every mark_bits_granularity entries; look
up and compute the rest */
Block *forward_block(Block *original) Block *forward_block(Block *original)
{ {
#ifdef FACTOR_DEBUG #ifdef FACTOR_DEBUG
@ -130,7 +132,7 @@ template<typename Block> struct mark_bits {
std::pair<cell,cell> position = bitmap_deref(original); std::pair<cell,cell> position = bitmap_deref(original);
cell approx_popcount = forwarding[position.first]; cell approx_popcount = forwarding[position.first];
u64 mask = ((u64)1 << position.second) - 1; cell mask = ((cell)1 << position.second) - 1;
cell new_line_number = approx_popcount + popcount(marked[position.first] & mask); cell new_line_number = approx_popcount + popcount(marked[position.first] & mask);
Block *new_block = line_block(new_line_number); Block *new_block = line_block(new_line_number);
@ -147,13 +149,13 @@ template<typename Block> struct mark_bits {
for(cell index = position.first; index < bits_size; index++) for(cell index = position.first; index < bits_size; index++)
{ {
u64 mask = ((s64)marked[index] >> bit_index); cell mask = ((fixnum)marked[index] >> bit_index);
if(~mask) if(~mask)
{ {
/* Found an unmarked block on this page. /* Found an unmarked block on this page.
Stop, it's hammer time */ Stop, it's hammer time */
cell clear_bit = rightmost_clear_bit(mask); cell clear_bit = rightmost_clear_bit(mask);
return line_block(index * 64 + bit_index + clear_bit); return line_block(index * mark_bits_granularity + bit_index + clear_bit);
} }
else else
{ {
@ -174,13 +176,13 @@ template<typename Block> struct mark_bits {
for(cell index = position.first; index < bits_size; index++) for(cell index = position.first; index < bits_size; index++)
{ {
u64 mask = (marked[index] >> bit_index); cell mask = (marked[index] >> bit_index);
if(mask) if(mask)
{ {
/* Found an marked block on this page. /* Found an marked block on this page.
Stop, it's hammer time */ Stop, it's hammer time */
cell set_bit = rightmost_set_bit(mask); cell set_bit = rightmost_set_bit(mask);
return line_block(index * 64 + bit_index + set_bit); return line_block(index * mark_bits_granularity + bit_index + set_bit);
} }
else else
{ {

View File

@ -79,11 +79,16 @@ void object_start_map::update_for_sweep(mark_bits<object> *state)
{ {
for(cell index = 0; index < state->bits_size; index++) for(cell index = 0; index < state->bits_size; index++)
{ {
u64 mask = state->marked[index]; cell mask = state->marked[index];
#ifdef FACTOR_64
update_card_for_sweep(index * 4, mask & 0xffff); update_card_for_sweep(index * 4, mask & 0xffff);
update_card_for_sweep(index * 4 + 1, (mask >> 16) & 0xffff); update_card_for_sweep(index * 4 + 1, (mask >> 16) & 0xffff);
update_card_for_sweep(index * 4 + 2, (mask >> 32) & 0xffff); update_card_for_sweep(index * 4 + 2, (mask >> 32) & 0xffff);
update_card_for_sweep(index * 4 + 3, (mask >> 48) & 0xffff); update_card_for_sweep(index * 4 + 3, (mask >> 48) & 0xffff);
#else
update_card_for_sweep(index * 2, mask & 0xffff);
update_card_for_sweep(index * 2 + 1, (mask >> 16) & 0xffff);
#endif
} }
} }