#ifdef _MSC_VER
#pragma intrinsic(_BitScanForward)
+#pragma intrinsic(_BitScanReverse)
#if _WIN64
#pragma intrinsic(_BitScanForward64)
+ #pragma intrinsic(_BitScanReverse64)
#endif
#endif // _MSC_VER
#endif // _MSC_VER
}
+// Cross-platform wrapper for the _BitScanReverse compiler intrinsic.
+inline uint8_t BitScanReverse(uint32_t *bitIndex, uint32_t mask)
+{
+#ifdef _MSC_VER
+ return _BitScanReverse((unsigned long*)bitIndex, mask);
+#else // _MSC_VER
+ // The result of __builtin_clzl is undefined when mask is zero,
+ // but it's still OK to call the intrinsic in that case (just don't use the output).
+ // Unconditionally calling the intrinsic in this way allows the compiler to
+ // emit branchless code for this function when possible (depending on how the
+ // intrinsic is implemented for the target platform).
+ int lzcount = __builtin_clzl(mask);
+ *bitIndex = static_cast<uint32_t>(31 - lzcount);
+ return mask != 0 ? TRUE : FALSE;
+#endif // _MSC_VER
+}
+
+// Cross-platform wrapper for the _BitScanReverse64 compiler intrinsic.
+inline uint8_t BitScanReverse64(uint32_t *bitIndex, uint64_t mask)
+{
+#ifdef _MSC_VER
+ #if _WIN64
+ return _BitScanReverse64((unsigned long*)bitIndex, mask);
+ #else
+ // MSVC targeting a 32-bit target does not support this intrinsic.
+ // We can fake it checking whether the upper 32 bits are zeros (or not)
+ // then calling _BitScanReverse() on either the upper or lower 32 bits.
+ uint32_t upper = static_cast<uint32_t>(mask >> 32);
+
+ if (upper != 0)
+ {
+ uint8_t result = _BitScanReverse((unsigned long*)bitIndex, upper);
+ *bitIndex += 32;
+ return result;
+ }
+
+ return _BitScanReverse((unsigned long*)bitIndex, static_cast<uint32_t>(mask));
+ #endif // _WIN64
+#else
+ // The result of __builtin_clzll is undefined when mask is zero,
+ // but it's still OK to call the intrinsic in that case (just don't use the output).
+ // Unconditionally calling the intrinsic in this way allows the compiler to
+ // emit branchless code for this function when possible (depending on how the
+ // intrinsic is implemented for the target platform).
+ int lzcount = __builtin_clzll(mask);
+ *bitIndex = static_cast<uint32_t>(63 - lzcount);
+ return mask != 0 ? TRUE : FALSE;
+#endif // _MSC_VER
+}
+
// Aligns a size_t to the specified alignment. Alignment must be a power
// of two.
inline size_t ALIGN_UP(size_t val, size_t alignment)
#endif // GC_STATS
-#ifdef BIT64
-#define TOTAL_TIMES_TO_SHIFT 6
-#else
-#define TOTAL_TIMES_TO_SHIFT 5
-#endif // BIT64
-
+inline
size_t round_up_power2 (size_t size)
{
- unsigned short shift = 1;
- size_t shifted = 0;
-
- size--;
- for (unsigned short i = 0; i < TOTAL_TIMES_TO_SHIFT; i++)
- {
- shifted = size | (size >> shift);
- if (shifted == size)
- {
- break;
- }
-
- size = shifted;
- shift <<= 1;
- }
- shifted++;
+ // Get the 0-based index of the most-significant bit in size-1.
+ // If the call failed (because size-1 is zero), size must be 1,
+ // so return 1 (because 1 rounds up to itself).
+ DWORD highest_set_bit_index;
+ if (0 ==
+#ifdef BIT64
+ BitScanReverse64(
+#else
+ BitScanReverse(
+#endif
+ &highest_set_bit_index, size - 1)) { return 1; }
- return shifted;
+ // The size == 0 case (which would have overflowed to SIZE_MAX when decremented)
+ // is handled below by relying on the fact that highest_set_bit_index is the maximum value
+ // (31 or 63, depending on sizeof(size_t)) and left-shifting a value >= 2 by that
+ // number of bits shifts in zeros from the right, resulting in an output of zero.
+ return static_cast<size_t>(2) << highest_set_bit_index;
}
inline
size_t round_down_power2 (size_t size)
{
- size_t power2 = round_up_power2 (size);
-
- if (power2 != size)
- {
- power2 >>= 1;
- }
+ // Get the 0-based index of the most-significant bit in size.
+ // If the call failed, size must be zero so return zero.
+ DWORD highest_set_bit_index;
+ if (0 ==
+#ifdef BIT64
+ BitScanReverse64(
+#else
+ BitScanReverse(
+#endif
+ &highest_set_bit_index, size)) { return 0; }
- return power2;
+ // Left-shift 1 by highest_set_bit_index to get back a value containing only
+ // the most-significant set bit of size, i.e. size rounded down
+ // to the next power-of-two value.
+ return static_cast<size_t>(1) << highest_set_bit_index;
}
-// the index starts from 0.
-int index_of_set_bit (size_t power2)
+// Get the 0-based index of the most-significant bit in the value.
+// Returns -1 if the input value is zero (i.e. has no set bits).
+inline
+int index_of_highest_set_bit (size_t value)
{
- int low = 0;
- int high = sizeof (size_t) * 8 - 1;
- int mid;
- while (low <= high)
- {
- mid = ((low + high)/2);
- size_t temp = (size_t)1 << mid;
- if (power2 & temp)
- {
- return mid;
- }
- else if (power2 < temp)
- {
- high = mid - 1;
- }
- else
- {
- low = mid + 1;
- }
- }
-
- return -1;
+ // Get the 0-based index of the most-significant bit in the value.
+ // If the call failed (because value is zero), return -1.
+ DWORD highest_set_bit_index;
+ return (0 ==
+#ifdef BIT64
+ BitScanReverse64(
+#else
+ BitScanReverse(
+#endif
+ &highest_set_bit_index, value)) ? -1 : static_cast<int>(highest_set_bit_index);
}
inline
int relative_index_power2_plug (size_t power2)
{
- int index = index_of_set_bit (power2);
+ int index = index_of_highest_set_bit (power2);
assert (index <= MAX_INDEX_POWER2);
return ((index < MIN_INDEX_POWER2) ? 0 : (index - MIN_INDEX_POWER2));
inline
int relative_index_power2_free_space (size_t power2)
{
- int index = index_of_set_bit (power2);
+ int index = index_of_highest_set_bit (power2);
assert (index <= MAX_INDEX_POWER2);
return ((index < MIN_INDEX_POWER2) ? -1 : (index - MIN_INDEX_POWER2));
}
}
- int bucket_power2 = index_of_set_bit (round_down_power2 (size));
+ int bucket_power2 = index_of_highest_set_bit (size);
if (bucket_power2 < base_power2)
{
return;
plug_size_to_fit += (pad_in_front ? Align(min_obj_size) : 0);
#endif //SHORT_PLUGS
- int plug_power2 = index_of_set_bit (round_up_power2 (plug_size_to_fit + Align(min_obj_size)));
+ int plug_power2 = index_of_highest_set_bit (round_up_power2 (plug_size_to_fit + Align(min_obj_size)));
ptrdiff_t i;
uint8_t* new_address = 0;
(plug_size - pad),
pad,
pinned_plug (m),
- index_of_set_bit (round_down_power2 (free_space_size)),
+ index_of_highest_set_bit (free_space_size),
(pinned_plug (m) - pinned_len (m)),
- index_of_set_bit (round_down_power2 (new_free_space_size))));
+ index_of_highest_set_bit (new_free_space_size)));
#endif //SIMPLE_DPRINTF
#ifdef SHORT_PLUGS
old_loc,
new_address,
(plug_size - pad),
- index_of_set_bit (round_down_power2 (free_space_size)),
+ index_of_highest_set_bit (free_space_size),
heap_segment_plan_allocated (seg),
- index_of_set_bit (round_down_power2 (new_free_space_size))));
+ index_of_highest_set_bit (new_free_space_size)));
#endif //SIMPLE_DPRINTF
if (realign_padding_p)
(!chosen_power2) && (i < free_space_count));
}
- int new_bucket_power2 = index_of_set_bit (round_down_power2 (new_free_space_size));
+ int new_bucket_power2 = index_of_highest_set_bit (new_free_space_size);
if (new_bucket_power2 < base_power2)
{
gc_heap::min_loh_segment_size = large_seg_size;
gc_heap::min_segment_size = min (seg_size, large_seg_size);
#ifdef SEG_MAPPING_TABLE
- gc_heap::min_segment_size_shr = index_of_set_bit (gc_heap::min_segment_size);
+ gc_heap::min_segment_size_shr = index_of_highest_set_bit (gc_heap::min_segment_size);
#endif //SEG_MAPPING_TABLE
#ifdef MULTIPLE_HEAPS
return qwMask != 0 ? TRUE : FALSE;
}
+// Define BitScanReverse64 and BitScanReverse
+// Per MSDN, BitScanReverse64 will search the mask data from MSB to LSB for a set bit.
+// If one is found, its bit position is stored in the out PDWORD argument and 1 is returned.
+// Otherwise, an undefined value is stored in the out PDWORD argument and 0 is returned.
+//
+// GCC/clang don't have a directly equivalent intrinsic; they do provide the __builtin_clzll
+// intrinsic, which returns the number of leading 0-bits in x starting at the most significant
+// bit position (the result is undefined when x = 0).
+//
+// The same is true for BitScanReverse, except that the GCC function is __builtin_clzl.
+
+EXTERN_C
+PALIMPORT
+inline
+unsigned char
+PALAPI
+BitScanReverse(
+ IN OUT PDWORD Index,
+ IN UINT qwMask)
+{
+ // The result of __builtin_clzl is undefined when qwMask is zero,
+ // but it's still OK to call the intrinsic in that case (just don't use the output).
+ // Unconditionally calling the intrinsic in this way allows the compiler to
+ // emit branchless code for this function when possible (depending on how the
+ // intrinsic is implemented for the target platform).
+ int lzcount = __builtin_clzl(qwMask);
+ *Index = (DWORD)(31 - lzcount);
+ return qwMask != 0;
+}
+
+EXTERN_C
+PALIMPORT
+inline
+unsigned char
+PALAPI
+BitScanReverse64(
+ IN OUT PDWORD Index,
+ IN UINT64 qwMask)
+{
+ // The result of __builtin_clzll is undefined when qwMask is zero,
+ // but it's still OK to call the intrinsic in that case (just don't use the output).
+ // Unconditionally calling the intrinsic in this way allows the compiler to
+ // emit branchless code for this function when possible (depending on how the
+ // intrinsic is implemented for the target platform).
+ int lzcount = __builtin_clzll(qwMask);
+ *Index = (DWORD)(63 - lzcount);
+ return qwMask != 0;
+}
+
FORCEINLINE void PAL_ArmInterlockedOperationBarrier()
{
#ifdef _ARM64_