vm: speed up some bit twiddling on 32-bit
parent
72ab6ec548
commit
22c717616c
|
@ -3,7 +3,7 @@
|
||||||
USING: classes.struct alien.c-types alien.syntax ;
|
USING: classes.struct alien.c-types alien.syntax ;
|
||||||
IN: vm
|
IN: vm
|
||||||
|
|
||||||
TYPEDEF: intptr_t cell
|
TYPEDEF: uintptr_t cell
|
||||||
C-TYPE: context
|
C-TYPE: context
|
||||||
|
|
||||||
STRUCT: zone
|
STRUCT: zone
|
||||||
|
|
|
@ -3,65 +3,60 @@ namespace factor
|
||||||
|
|
||||||
/* These algorithms were snarfed from various places. I did not come up with them myself */
|
/* These algorithms were snarfed from various places. I did not come up with them myself */
|
||||||
|
|
||||||
inline cell popcount(u64 x)
|
inline cell popcount(cell x)
|
||||||
{
|
{
|
||||||
|
#ifdef FACTOR_64
|
||||||
u64 k1 = 0x5555555555555555ll;
|
u64 k1 = 0x5555555555555555ll;
|
||||||
u64 k2 = 0x3333333333333333ll;
|
u64 k2 = 0x3333333333333333ll;
|
||||||
u64 k4 = 0x0f0f0f0f0f0f0f0fll;
|
u64 k4 = 0x0f0f0f0f0f0f0f0fll;
|
||||||
u64 kf = 0x0101010101010101ll;
|
u64 kf = 0x0101010101010101ll;
|
||||||
|
cell ks = 56;
|
||||||
|
#else
|
||||||
|
u32 k1 = 0x55555555;
|
||||||
|
u32 k2 = 0x33333333;
|
||||||
|
u32 k4 = 0xf0f0f0f;
|
||||||
|
u32 kf = 0x1010101;
|
||||||
|
cell ks = 24;
|
||||||
|
#endif
|
||||||
|
|
||||||
x = x - ((x >> 1) & k1); // put count of each 2 bits into those 2 bits
|
x = x - ((x >> 1) & k1); // put count of each 2 bits into those 2 bits
|
||||||
x = (x & k2) + ((x >> 2) & k2); // put count of each 4 bits into those 4 bits
|
x = (x & k2) + ((x >> 2) & k2); // put count of each 4 bits into those 4 bits
|
||||||
x = (x + (x >> 4)) & k4 ; // put count of each 8 bits into those 8 bits
|
x = (x + (x >> 4)) & k4 ; // put count of each 8 bits into those 8 bits
|
||||||
x = (x * kf) >> 56; // returns 8 most significant bits of x + (x<<8) + (x<<16) + (x<<24) + ...
|
x = (x * kf) >> ks; // returns 8 most significant bits of x + (x<<8) + (x<<16) + (x<<24) + ...
|
||||||
|
|
||||||
return (cell)x;
|
return (cell)x;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline cell log2(u64 x)
|
inline cell log2(cell x)
|
||||||
{
|
{
|
||||||
#ifdef FACTOR_AMD64
|
#if defined(FACTOR_X86)
|
||||||
cell n;
|
cell n;
|
||||||
asm ("bsr %1, %0;":"=r"(n):"r"((cell)x));
|
asm ("bsr %1, %0;":"=r"(n):"r"(x));
|
||||||
|
#elif defined(FACTOR_AMD64)
|
||||||
|
cell n;
|
||||||
|
asm ("bsr %1, %0;":"=r"(n):"r"(x));
|
||||||
#else
|
#else
|
||||||
cell n = 0;
|
cell n = 0;
|
||||||
|
#ifdef FACTOR_64
|
||||||
if (x >= (u64)1 << 32) { x >>= 32; n += 32; }
|
if (x >= (u64)1 << 32) { x >>= 32; n += 32; }
|
||||||
if (x >= (u64)1 << 16) { x >>= 16; n += 16; }
|
#endif
|
||||||
if (x >= (u64)1 << 8) { x >>= 8; n += 8; }
|
if (x >= (u32)1 << 16) { x >>= 16; n += 16; }
|
||||||
if (x >= (u64)1 << 4) { x >>= 4; n += 4; }
|
if (x >= (u32)1 << 8) { x >>= 8; n += 8; }
|
||||||
if (x >= (u64)1 << 2) { x >>= 2; n += 2; }
|
if (x >= (u32)1 << 4) { x >>= 4; n += 4; }
|
||||||
if (x >= (u64)1 << 1) { n += 1; }
|
if (x >= (u32)1 << 2) { x >>= 2; n += 2; }
|
||||||
|
if (x >= (u32)1 << 1) { n += 1; }
|
||||||
#endif
|
#endif
|
||||||
return n;
|
return n;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline cell log2(u16 x)
|
inline cell rightmost_clear_bit(cell x)
|
||||||
{
|
|
||||||
#if defined(FACTOR_X86) || defined(FACTOR_AMD64)
|
|
||||||
cell n;
|
|
||||||
asm ("bsr %1, %0;":"=r"(n):"r"((cell)x));
|
|
||||||
#else
|
|
||||||
cell n = 0;
|
|
||||||
if (x >= 1 << 8) { x >>= 8; n += 8; }
|
|
||||||
if (x >= 1 << 4) { x >>= 4; n += 4; }
|
|
||||||
if (x >= 1 << 2) { x >>= 2; n += 2; }
|
|
||||||
if (x >= 1 << 1) { n += 1; }
|
|
||||||
#endif
|
|
||||||
return n;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline cell rightmost_clear_bit(u64 x)
|
|
||||||
{
|
{
|
||||||
return log2(~x & (x + 1));
|
return log2(~x & (x + 1));
|
||||||
}
|
}
|
||||||
|
|
||||||
inline cell rightmost_set_bit(u64 x)
|
inline cell rightmost_set_bit(cell x)
|
||||||
{
|
{
|
||||||
return log2(x & -x);
|
return log2(x & -x);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline cell rightmost_set_bit(u16 x)
|
|
||||||
{
|
|
||||||
return log2((u16)(x & -x));
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,18 +2,19 @@ namespace factor
|
||||||
{
|
{
|
||||||
|
|
||||||
const int block_granularity = 16;
|
const int block_granularity = 16;
|
||||||
const int forwarding_granularity = 64;
|
const int mark_bits_granularity = sizeof(cell) * 8;
|
||||||
|
const int mark_bits_mask = sizeof(cell) * 8 - 1;
|
||||||
|
|
||||||
template<typename Block> struct mark_bits {
|
template<typename Block> struct mark_bits {
|
||||||
cell size;
|
cell size;
|
||||||
cell start;
|
cell start;
|
||||||
cell bits_size;
|
cell bits_size;
|
||||||
u64 *marked;
|
cell *marked;
|
||||||
cell *forwarding;
|
cell *forwarding;
|
||||||
|
|
||||||
void clear_mark_bits()
|
void clear_mark_bits()
|
||||||
{
|
{
|
||||||
memset(marked,0,bits_size * sizeof(u64));
|
memset(marked,0,bits_size * sizeof(cell));
|
||||||
}
|
}
|
||||||
|
|
||||||
void clear_forwarding()
|
void clear_forwarding()
|
||||||
|
@ -24,8 +25,8 @@ template<typename Block> struct mark_bits {
|
||||||
explicit mark_bits(cell size_, cell start_) :
|
explicit mark_bits(cell size_, cell start_) :
|
||||||
size(size_),
|
size(size_),
|
||||||
start(start_),
|
start(start_),
|
||||||
bits_size(size / block_granularity / forwarding_granularity),
|
bits_size(size / block_granularity / mark_bits_granularity),
|
||||||
marked(new u64[bits_size]),
|
marked(new cell[bits_size]),
|
||||||
forwarding(new cell[bits_size])
|
forwarding(new cell[bits_size])
|
||||||
{
|
{
|
||||||
clear_mark_bits();
|
clear_mark_bits();
|
||||||
|
@ -53,15 +54,15 @@ template<typename Block> struct mark_bits {
|
||||||
std::pair<cell,cell> bitmap_deref(Block *address)
|
std::pair<cell,cell> bitmap_deref(Block *address)
|
||||||
{
|
{
|
||||||
cell line_number = block_line(address);
|
cell line_number = block_line(address);
|
||||||
cell word_index = (line_number >> 6);
|
cell word_index = (line_number / mark_bits_granularity);
|
||||||
cell word_shift = (line_number & 63);
|
cell word_shift = (line_number & mark_bits_mask);
|
||||||
return std::make_pair(word_index,word_shift);
|
return std::make_pair(word_index,word_shift);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool bitmap_elt(u64 *bits, Block *address)
|
bool bitmap_elt(cell *bits, Block *address)
|
||||||
{
|
{
|
||||||
std::pair<cell,cell> position = bitmap_deref(address);
|
std::pair<cell,cell> position = bitmap_deref(address);
|
||||||
return (bits[position.first] & ((u64)1 << position.second)) != 0;
|
return (bits[position.first] & ((cell)1 << position.second)) != 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
Block *next_block_after(Block *block)
|
Block *next_block_after(Block *block)
|
||||||
|
@ -69,13 +70,13 @@ template<typename Block> struct mark_bits {
|
||||||
return (Block *)((cell)block + block->size());
|
return (Block *)((cell)block + block->size());
|
||||||
}
|
}
|
||||||
|
|
||||||
void set_bitmap_range(u64 *bits, Block *address)
|
void set_bitmap_range(cell *bits, Block *address)
|
||||||
{
|
{
|
||||||
std::pair<cell,cell> start = bitmap_deref(address);
|
std::pair<cell,cell> start = bitmap_deref(address);
|
||||||
std::pair<cell,cell> end = bitmap_deref(next_block_after(address));
|
std::pair<cell,cell> end = bitmap_deref(next_block_after(address));
|
||||||
|
|
||||||
u64 start_mask = ((u64)1 << start.second) - 1;
|
cell start_mask = ((cell)1 << start.second) - 1;
|
||||||
u64 end_mask = ((u64)1 << end.second) - 1;
|
cell end_mask = ((cell)1 << end.second) - 1;
|
||||||
|
|
||||||
if(start.first == end.first)
|
if(start.first == end.first)
|
||||||
bits[start.first] |= start_mask ^ end_mask;
|
bits[start.first] |= start_mask ^ end_mask;
|
||||||
|
@ -87,7 +88,7 @@ template<typename Block> struct mark_bits {
|
||||||
bits[start.first] |= ~start_mask;
|
bits[start.first] |= ~start_mask;
|
||||||
|
|
||||||
for(cell index = start.first + 1; index < end.first; index++)
|
for(cell index = start.first + 1; index < end.first; index++)
|
||||||
bits[index] = (u64)-1;
|
bits[index] = (cell)-1;
|
||||||
|
|
||||||
if(end_mask != 0)
|
if(end_mask != 0)
|
||||||
{
|
{
|
||||||
|
@ -121,7 +122,8 @@ template<typename Block> struct mark_bits {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* We have the popcount for every 64 entries; look up and compute the rest */
|
/* We have the popcount for every mark_bits_granularity entries; look
|
||||||
|
up and compute the rest */
|
||||||
Block *forward_block(Block *original)
|
Block *forward_block(Block *original)
|
||||||
{
|
{
|
||||||
#ifdef FACTOR_DEBUG
|
#ifdef FACTOR_DEBUG
|
||||||
|
@ -130,7 +132,7 @@ template<typename Block> struct mark_bits {
|
||||||
std::pair<cell,cell> position = bitmap_deref(original);
|
std::pair<cell,cell> position = bitmap_deref(original);
|
||||||
|
|
||||||
cell approx_popcount = forwarding[position.first];
|
cell approx_popcount = forwarding[position.first];
|
||||||
u64 mask = ((u64)1 << position.second) - 1;
|
cell mask = ((cell)1 << position.second) - 1;
|
||||||
|
|
||||||
cell new_line_number = approx_popcount + popcount(marked[position.first] & mask);
|
cell new_line_number = approx_popcount + popcount(marked[position.first] & mask);
|
||||||
Block *new_block = line_block(new_line_number);
|
Block *new_block = line_block(new_line_number);
|
||||||
|
@ -147,13 +149,13 @@ template<typename Block> struct mark_bits {
|
||||||
|
|
||||||
for(cell index = position.first; index < bits_size; index++)
|
for(cell index = position.first; index < bits_size; index++)
|
||||||
{
|
{
|
||||||
u64 mask = ((s64)marked[index] >> bit_index);
|
cell mask = ((fixnum)marked[index] >> bit_index);
|
||||||
if(~mask)
|
if(~mask)
|
||||||
{
|
{
|
||||||
/* Found an unmarked block on this page.
|
/* Found an unmarked block on this page.
|
||||||
Stop, it's hammer time */
|
Stop, it's hammer time */
|
||||||
cell clear_bit = rightmost_clear_bit(mask);
|
cell clear_bit = rightmost_clear_bit(mask);
|
||||||
return line_block(index * 64 + bit_index + clear_bit);
|
return line_block(index * mark_bits_granularity + bit_index + clear_bit);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -174,13 +176,13 @@ template<typename Block> struct mark_bits {
|
||||||
|
|
||||||
for(cell index = position.first; index < bits_size; index++)
|
for(cell index = position.first; index < bits_size; index++)
|
||||||
{
|
{
|
||||||
u64 mask = (marked[index] >> bit_index);
|
cell mask = (marked[index] >> bit_index);
|
||||||
if(mask)
|
if(mask)
|
||||||
{
|
{
|
||||||
/* Found an marked block on this page.
|
/* Found an marked block on this page.
|
||||||
Stop, it's hammer time */
|
Stop, it's hammer time */
|
||||||
cell set_bit = rightmost_set_bit(mask);
|
cell set_bit = rightmost_set_bit(mask);
|
||||||
return line_block(index * 64 + bit_index + set_bit);
|
return line_block(index * mark_bits_granularity + bit_index + set_bit);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
|
|
@ -79,11 +79,16 @@ void object_start_map::update_for_sweep(mark_bits<object> *state)
|
||||||
{
|
{
|
||||||
for(cell index = 0; index < state->bits_size; index++)
|
for(cell index = 0; index < state->bits_size; index++)
|
||||||
{
|
{
|
||||||
u64 mask = state->marked[index];
|
cell mask = state->marked[index];
|
||||||
|
#ifdef FACTOR_64
|
||||||
update_card_for_sweep(index * 4, mask & 0xffff);
|
update_card_for_sweep(index * 4, mask & 0xffff);
|
||||||
update_card_for_sweep(index * 4 + 1, (mask >> 16) & 0xffff);
|
update_card_for_sweep(index * 4 + 1, (mask >> 16) & 0xffff);
|
||||||
update_card_for_sweep(index * 4 + 2, (mask >> 32) & 0xffff);
|
update_card_for_sweep(index * 4 + 2, (mask >> 32) & 0xffff);
|
||||||
update_card_for_sweep(index * 4 + 3, (mask >> 48) & 0xffff);
|
update_card_for_sweep(index * 4 + 3, (mask >> 48) & 0xffff);
|
||||||
|
#else
|
||||||
|
update_card_for_sweep(index * 2, mask & 0xffff);
|
||||||
|
update_card_for_sweep(index * 2 + 1, (mask >> 16) & 0xffff);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue