vm: speed up some bit twiddling on 32-bit
							parent
							
								
									72ab6ec548
								
							
						
					
					
						commit
						22c717616c
					
				| 
						 | 
					@ -3,7 +3,7 @@
 | 
				
			||||||
USING: classes.struct alien.c-types alien.syntax ;
 | 
					USING: classes.struct alien.c-types alien.syntax ;
 | 
				
			||||||
IN: vm
 | 
					IN: vm
 | 
				
			||||||
 | 
					
 | 
				
			||||||
TYPEDEF: intptr_t cell
 | 
					TYPEDEF: uintptr_t cell
 | 
				
			||||||
C-TYPE: context
 | 
					C-TYPE: context
 | 
				
			||||||
 | 
					
 | 
				
			||||||
STRUCT: zone
 | 
					STRUCT: zone
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -3,65 +3,60 @@ namespace factor
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* These algorithms were snarfed from various places. I did not come up with them myself */
 | 
					/* These algorithms were snarfed from various places. I did not come up with them myself */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
inline cell popcount(u64 x)
 | 
					inline cell popcount(cell x)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
					#ifdef FACTOR_64
 | 
				
			||||||
	u64 k1 = 0x5555555555555555ll;
 | 
						u64 k1 = 0x5555555555555555ll;
 | 
				
			||||||
	u64 k2 = 0x3333333333333333ll;
 | 
						u64 k2 = 0x3333333333333333ll;
 | 
				
			||||||
	u64 k4 = 0x0f0f0f0f0f0f0f0fll;
 | 
						u64 k4 = 0x0f0f0f0f0f0f0f0fll;
 | 
				
			||||||
	u64 kf = 0x0101010101010101ll;
 | 
						u64 kf = 0x0101010101010101ll;
 | 
				
			||||||
 | 
						cell ks = 56;
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
						u32 k1 = 0x55555555;
 | 
				
			||||||
 | 
						u32 k2 = 0x33333333;
 | 
				
			||||||
 | 
						u32 k4 = 0xf0f0f0f;
 | 
				
			||||||
 | 
						u32 kf = 0x1010101;
 | 
				
			||||||
 | 
						cell ks = 24;
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	x =  x       - ((x >> 1)  & k1); // put count of each 2 bits into those 2 bits
 | 
						x =  x       - ((x >> 1)  & k1); // put count of each 2 bits into those 2 bits
 | 
				
			||||||
	x = (x & k2) + ((x >> 2)  & k2); // put count of each 4 bits into those 4 bits
 | 
						x = (x & k2) + ((x >> 2)  & k2); // put count of each 4 bits into those 4 bits
 | 
				
			||||||
	x = (x       +  (x >> 4)) & k4 ; // put count of each 8 bits into those 8 bits
 | 
						x = (x       +  (x >> 4)) & k4 ; // put count of each 8 bits into those 8 bits
 | 
				
			||||||
	x = (x * kf) >> 56; // returns 8 most significant bits of x + (x<<8) + (x<<16) + (x<<24) + ...
 | 
						x = (x * kf) >> ks; // returns 8 most significant bits of x + (x<<8) + (x<<16) + (x<<24) + ...
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return (cell)x;
 | 
						return (cell)x;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
inline cell log2(u64 x)
 | 
					inline cell log2(cell x)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
#ifdef FACTOR_AMD64
 | 
					#if defined(FACTOR_X86)
 | 
				
			||||||
	cell n;
 | 
						cell n;
 | 
				
			||||||
	asm ("bsr %1, %0;":"=r"(n):"r"((cell)x));
 | 
						asm ("bsr %1, %0;":"=r"(n):"r"(x));
 | 
				
			||||||
 | 
					#elif defined(FACTOR_AMD64)
 | 
				
			||||||
 | 
						cell n;
 | 
				
			||||||
 | 
						asm ("bsr %1, %0;":"=r"(n):"r"(x));
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
	cell n = 0;
 | 
						cell n = 0;
 | 
				
			||||||
 | 
					#ifdef FACTOR_64
 | 
				
			||||||
	if (x >= (u64)1 << 32) { x >>= 32; n += 32; }
 | 
						if (x >= (u64)1 << 32) { x >>= 32; n += 32; }
 | 
				
			||||||
	if (x >= (u64)1 << 16) { x >>= 16; n += 16; }
 | 
					#endif
 | 
				
			||||||
	if (x >= (u64)1 <<  8) { x >>=  8; n +=  8; }
 | 
						if (x >= (u32)1 << 16) { x >>= 16; n += 16; }
 | 
				
			||||||
	if (x >= (u64)1 <<  4) { x >>=  4; n +=  4; }
 | 
						if (x >= (u32)1 <<  8) { x >>=  8; n +=  8; }
 | 
				
			||||||
	if (x >= (u64)1 <<  2) { x >>=  2; n +=  2; }
 | 
						if (x >= (u32)1 <<  4) { x >>=  4; n +=  4; }
 | 
				
			||||||
	if (x >= (u64)1 <<  1) {           n +=  1; }
 | 
						if (x >= (u32)1 <<  2) { x >>=  2; n +=  2; }
 | 
				
			||||||
 | 
						if (x >= (u32)1 <<  1) {           n +=  1; }
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
	return n;
 | 
						return n;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
inline cell log2(u16 x)
 | 
					inline cell rightmost_clear_bit(cell x)
 | 
				
			||||||
{
 | 
					 | 
				
			||||||
#if defined(FACTOR_X86) || defined(FACTOR_AMD64)
 | 
					 | 
				
			||||||
	cell n;
 | 
					 | 
				
			||||||
	asm ("bsr %1, %0;":"=r"(n):"r"((cell)x));
 | 
					 | 
				
			||||||
#else
 | 
					 | 
				
			||||||
	cell n = 0;
 | 
					 | 
				
			||||||
	if (x >= 1 << 8) { x >>=  8; n += 8; }
 | 
					 | 
				
			||||||
	if (x >= 1 << 4) { x >>=  4; n += 4; }
 | 
					 | 
				
			||||||
	if (x >= 1 << 2) { x >>=  2; n += 2; }
 | 
					 | 
				
			||||||
	if (x >= 1 << 1) {           n += 1; }
 | 
					 | 
				
			||||||
#endif
 | 
					 | 
				
			||||||
	return n;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
inline cell rightmost_clear_bit(u64 x)
 | 
					 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	return log2(~x & (x + 1));
 | 
						return log2(~x & (x + 1));
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
inline cell rightmost_set_bit(u64 x)
 | 
					inline cell rightmost_set_bit(cell x)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	return log2(x & -x);
 | 
						return log2(x & -x);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
inline cell rightmost_set_bit(u16 x)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
	return log2((u16)(x & -x));
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2,18 +2,19 @@ namespace factor
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
					
 | 
				
			||||||
const int block_granularity = 16;
 | 
					const int block_granularity = 16;
 | 
				
			||||||
const int forwarding_granularity = 64;
 | 
					const int mark_bits_granularity = sizeof(cell) * 8;
 | 
				
			||||||
 | 
					const int mark_bits_mask = sizeof(cell) * 8 - 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
template<typename Block> struct mark_bits {
 | 
					template<typename Block> struct mark_bits {
 | 
				
			||||||
	cell size;
 | 
						cell size;
 | 
				
			||||||
	cell start;
 | 
						cell start;
 | 
				
			||||||
	cell bits_size;
 | 
						cell bits_size;
 | 
				
			||||||
	u64 *marked;
 | 
						cell *marked;
 | 
				
			||||||
	cell *forwarding;
 | 
						cell *forwarding;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	void clear_mark_bits()
 | 
						void clear_mark_bits()
 | 
				
			||||||
	{
 | 
						{
 | 
				
			||||||
		memset(marked,0,bits_size * sizeof(u64));
 | 
							memset(marked,0,bits_size * sizeof(cell));
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	void clear_forwarding()
 | 
						void clear_forwarding()
 | 
				
			||||||
| 
						 | 
					@ -24,8 +25,8 @@ template<typename Block> struct mark_bits {
 | 
				
			||||||
	explicit mark_bits(cell size_, cell start_) :
 | 
						explicit mark_bits(cell size_, cell start_) :
 | 
				
			||||||
		size(size_),
 | 
							size(size_),
 | 
				
			||||||
		start(start_),
 | 
							start(start_),
 | 
				
			||||||
		bits_size(size / block_granularity / forwarding_granularity),
 | 
							bits_size(size / block_granularity / mark_bits_granularity),
 | 
				
			||||||
		marked(new u64[bits_size]),
 | 
							marked(new cell[bits_size]),
 | 
				
			||||||
		forwarding(new cell[bits_size])
 | 
							forwarding(new cell[bits_size])
 | 
				
			||||||
	{
 | 
						{
 | 
				
			||||||
		clear_mark_bits();
 | 
							clear_mark_bits();
 | 
				
			||||||
| 
						 | 
					@ -53,15 +54,15 @@ template<typename Block> struct mark_bits {
 | 
				
			||||||
	std::pair<cell,cell> bitmap_deref(Block *address)
 | 
						std::pair<cell,cell> bitmap_deref(Block *address)
 | 
				
			||||||
	{
 | 
						{
 | 
				
			||||||
		cell line_number = block_line(address);
 | 
							cell line_number = block_line(address);
 | 
				
			||||||
		cell word_index = (line_number >> 6);
 | 
							cell word_index = (line_number / mark_bits_granularity);
 | 
				
			||||||
		cell word_shift = (line_number & 63);
 | 
							cell word_shift = (line_number & mark_bits_mask);
 | 
				
			||||||
		return std::make_pair(word_index,word_shift);
 | 
							return std::make_pair(word_index,word_shift);
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	bool bitmap_elt(u64 *bits, Block *address)
 | 
						bool bitmap_elt(cell *bits, Block *address)
 | 
				
			||||||
	{
 | 
						{
 | 
				
			||||||
		std::pair<cell,cell> position = bitmap_deref(address);
 | 
							std::pair<cell,cell> position = bitmap_deref(address);
 | 
				
			||||||
		return (bits[position.first] & ((u64)1 << position.second)) != 0;
 | 
							return (bits[position.first] & ((cell)1 << position.second)) != 0;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	Block *next_block_after(Block *block)
 | 
						Block *next_block_after(Block *block)
 | 
				
			||||||
| 
						 | 
					@ -69,13 +70,13 @@ template<typename Block> struct mark_bits {
 | 
				
			||||||
		return (Block *)((cell)block + block->size());
 | 
							return (Block *)((cell)block + block->size());
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	void set_bitmap_range(u64 *bits, Block *address)
 | 
						void set_bitmap_range(cell *bits, Block *address)
 | 
				
			||||||
	{
 | 
						{
 | 
				
			||||||
		std::pair<cell,cell> start = bitmap_deref(address);
 | 
							std::pair<cell,cell> start = bitmap_deref(address);
 | 
				
			||||||
		std::pair<cell,cell> end = bitmap_deref(next_block_after(address));
 | 
							std::pair<cell,cell> end = bitmap_deref(next_block_after(address));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		u64 start_mask = ((u64)1 << start.second) - 1;
 | 
							cell start_mask = ((cell)1 << start.second) - 1;
 | 
				
			||||||
		u64 end_mask = ((u64)1 << end.second) - 1;
 | 
							cell end_mask = ((cell)1 << end.second) - 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		if(start.first == end.first)
 | 
							if(start.first == end.first)
 | 
				
			||||||
			bits[start.first] |= start_mask ^ end_mask;
 | 
								bits[start.first] |= start_mask ^ end_mask;
 | 
				
			||||||
| 
						 | 
					@ -87,7 +88,7 @@ template<typename Block> struct mark_bits {
 | 
				
			||||||
			bits[start.first] |= ~start_mask;
 | 
								bits[start.first] |= ~start_mask;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			for(cell index = start.first + 1; index < end.first; index++)
 | 
								for(cell index = start.first + 1; index < end.first; index++)
 | 
				
			||||||
				bits[index] = (u64)-1;
 | 
									bits[index] = (cell)-1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			if(end_mask != 0)
 | 
								if(end_mask != 0)
 | 
				
			||||||
			{
 | 
								{
 | 
				
			||||||
| 
						 | 
					@ -121,7 +122,8 @@ template<typename Block> struct mark_bits {
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* We have the popcount for every 64 entries; look up and compute the rest */
 | 
						/* We have the popcount for every mark_bits_granularity entries; look
 | 
				
			||||||
 | 
						up and compute the rest */
 | 
				
			||||||
	Block *forward_block(Block *original)
 | 
						Block *forward_block(Block *original)
 | 
				
			||||||
	{
 | 
						{
 | 
				
			||||||
#ifdef FACTOR_DEBUG
 | 
					#ifdef FACTOR_DEBUG
 | 
				
			||||||
| 
						 | 
					@ -130,7 +132,7 @@ template<typename Block> struct mark_bits {
 | 
				
			||||||
		std::pair<cell,cell> position = bitmap_deref(original);
 | 
							std::pair<cell,cell> position = bitmap_deref(original);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		cell approx_popcount = forwarding[position.first];
 | 
							cell approx_popcount = forwarding[position.first];
 | 
				
			||||||
		u64 mask = ((u64)1 << position.second) - 1;
 | 
							cell mask = ((cell)1 << position.second) - 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		cell new_line_number = approx_popcount + popcount(marked[position.first] & mask);
 | 
							cell new_line_number = approx_popcount + popcount(marked[position.first] & mask);
 | 
				
			||||||
		Block *new_block = line_block(new_line_number);
 | 
							Block *new_block = line_block(new_line_number);
 | 
				
			||||||
| 
						 | 
					@ -147,13 +149,13 @@ template<typename Block> struct mark_bits {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		for(cell index = position.first; index < bits_size; index++)
 | 
							for(cell index = position.first; index < bits_size; index++)
 | 
				
			||||||
		{
 | 
							{
 | 
				
			||||||
			u64 mask = ((s64)marked[index] >> bit_index);
 | 
								cell mask = ((fixnum)marked[index] >> bit_index);
 | 
				
			||||||
			if(~mask)
 | 
								if(~mask)
 | 
				
			||||||
			{
 | 
								{
 | 
				
			||||||
				/* Found an unmarked block on this page.
 | 
									/* Found an unmarked block on this page.
 | 
				
			||||||
				Stop, it's hammer time */
 | 
									Stop, it's hammer time */
 | 
				
			||||||
				cell clear_bit = rightmost_clear_bit(mask);
 | 
									cell clear_bit = rightmost_clear_bit(mask);
 | 
				
			||||||
				return line_block(index * 64 + bit_index + clear_bit);
 | 
									return line_block(index * mark_bits_granularity + bit_index + clear_bit);
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
			else
 | 
								else
 | 
				
			||||||
			{
 | 
								{
 | 
				
			||||||
| 
						 | 
					@ -174,13 +176,13 @@ template<typename Block> struct mark_bits {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		for(cell index = position.first; index < bits_size; index++)
 | 
							for(cell index = position.first; index < bits_size; index++)
 | 
				
			||||||
		{
 | 
							{
 | 
				
			||||||
			u64 mask = (marked[index] >> bit_index);
 | 
								cell mask = (marked[index] >> bit_index);
 | 
				
			||||||
			if(mask)
 | 
								if(mask)
 | 
				
			||||||
			{
 | 
								{
 | 
				
			||||||
				/* Found an marked block on this page.
 | 
									/* Found an marked block on this page.
 | 
				
			||||||
				Stop, it's hammer time */
 | 
									Stop, it's hammer time */
 | 
				
			||||||
				cell set_bit = rightmost_set_bit(mask);
 | 
									cell set_bit = rightmost_set_bit(mask);
 | 
				
			||||||
				return line_block(index * 64 + bit_index + set_bit);
 | 
									return line_block(index * mark_bits_granularity + bit_index + set_bit);
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
			else
 | 
								else
 | 
				
			||||||
			{
 | 
								{
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -79,11 +79,16 @@ void object_start_map::update_for_sweep(mark_bits<object> *state)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	for(cell index = 0; index < state->bits_size; index++)
 | 
						for(cell index = 0; index < state->bits_size; index++)
 | 
				
			||||||
	{
 | 
						{
 | 
				
			||||||
		u64 mask = state->marked[index];
 | 
							cell mask = state->marked[index];
 | 
				
			||||||
 | 
					#ifdef FACTOR_64
 | 
				
			||||||
		update_card_for_sweep(index * 4,      mask        & 0xffff);
 | 
							update_card_for_sweep(index * 4,      mask        & 0xffff);
 | 
				
			||||||
		update_card_for_sweep(index * 4 + 1, (mask >> 16) & 0xffff);
 | 
							update_card_for_sweep(index * 4 + 1, (mask >> 16) & 0xffff);
 | 
				
			||||||
		update_card_for_sweep(index * 4 + 2, (mask >> 32) & 0xffff);
 | 
							update_card_for_sweep(index * 4 + 2, (mask >> 32) & 0xffff);
 | 
				
			||||||
		update_card_for_sweep(index * 4 + 3, (mask >> 48) & 0xffff);
 | 
							update_card_for_sweep(index * 4 + 3, (mask >> 48) & 0xffff);
 | 
				
			||||||
 | 
					#else
 | 
				
			||||||
 | 
							update_card_for_sweep(index * 2,      mask        & 0xffff);
 | 
				
			||||||
 | 
							update_card_for_sweep(index * 2 + 1, (mask >> 16) & 0xffff);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue