From b7cc093db28cf6d20b35092c4108bc8aa15cc011 Mon Sep 17 00:00:00 2001 From: Shawn Landden Date: Sun, 26 May 2019 13:49:58 +0000 Subject: [PATCH] [Support] make countLeadingZeros() and countTrailingZeros() return unsigned This matches countLeadingOnes() and countTrailingOnes(), and APInt's countLeadingZeros() and countTrailingZeros(). (as well as __builtin_clzll()) llvm-svn: 361724 --- llvm/include/llvm/Support/MathExtras.h | 24 ++++++++++++------------ llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 2 +- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 23 ++++++++++++----------- 3 files changed, 25 insertions(+), 24 deletions(-) diff --git a/llvm/include/llvm/Support/MathExtras.h b/llvm/include/llvm/Support/MathExtras.h index e902a72..85d5a5a 100644 --- a/llvm/include/llvm/Support/MathExtras.h +++ b/llvm/include/llvm/Support/MathExtras.h @@ -51,14 +51,14 @@ enum ZeroBehavior { namespace detail { template struct TrailingZerosCounter { - static std::size_t count(T Val, ZeroBehavior) { + static unsigned count(T Val, ZeroBehavior) { if (!Val) return std::numeric_limits::digits; if (Val & 0x1) return 0; // Bisection method. - std::size_t ZeroBits = 0; + unsigned ZeroBits = 0; T Shift = std::numeric_limits::digits >> 1; T Mask = std::numeric_limits::max() >> Shift; while (Shift) { @@ -75,7 +75,7 @@ template struct TrailingZerosCounter { #if __GNUC__ >= 4 || defined(_MSC_VER) template struct TrailingZerosCounter { - static std::size_t count(T Val, ZeroBehavior ZB) { + static unsigned count(T Val, ZeroBehavior ZB) { if (ZB != ZB_Undefined && Val == 0) return 32; @@ -91,7 +91,7 @@ template struct TrailingZerosCounter { #if !defined(_MSC_VER) || defined(_M_X64) template struct TrailingZerosCounter { - static std::size_t count(T Val, ZeroBehavior ZB) { + static unsigned count(T Val, ZeroBehavior ZB) { if (ZB != ZB_Undefined && Val == 0) return 64; @@ -116,7 +116,7 @@ template struct TrailingZerosCounter { /// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are /// valid arguments. template -std::size_t countTrailingZeros(T Val, ZeroBehavior ZB = ZB_Width) { +unsigned countTrailingZeros(T Val, ZeroBehavior ZB = ZB_Width) { static_assert(std::numeric_limits::is_integer && !std::numeric_limits::is_signed, "Only unsigned integral types are allowed."); @@ -125,12 +125,12 @@ std::size_t countTrailingZeros(T Val, ZeroBehavior ZB = ZB_Width) { namespace detail { template struct LeadingZerosCounter { - static std::size_t count(T Val, ZeroBehavior) { + static unsigned count(T Val, ZeroBehavior) { if (!Val) return std::numeric_limits::digits; // Bisection method. - std::size_t ZeroBits = 0; + unsigned ZeroBits = 0; for (T Shift = std::numeric_limits::digits >> 1; Shift; Shift >>= 1) { T Tmp = Val >> Shift; if (Tmp) @@ -144,7 +144,7 @@ template struct LeadingZerosCounter { #if __GNUC__ >= 4 || defined(_MSC_VER) template struct LeadingZerosCounter { - static std::size_t count(T Val, ZeroBehavior ZB) { + static unsigned count(T Val, ZeroBehavior ZB) { if (ZB != ZB_Undefined && Val == 0) return 32; @@ -160,7 +160,7 @@ template struct LeadingZerosCounter { #if !defined(_MSC_VER) || defined(_M_X64) template struct LeadingZerosCounter { - static std::size_t count(T Val, ZeroBehavior ZB) { + static unsigned count(T Val, ZeroBehavior ZB) { if (ZB != ZB_Undefined && Val == 0) return 64; @@ -185,7 +185,7 @@ template struct LeadingZerosCounter { /// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are /// valid arguments. template -std::size_t countLeadingZeros(T Val, ZeroBehavior ZB = ZB_Width) { +unsigned countLeadingZeros(T Val, ZeroBehavior ZB = ZB_Width) { static_assert(std::numeric_limits::is_integer && !std::numeric_limits::is_signed, "Only unsigned integral types are allowed."); @@ -458,7 +458,7 @@ inline uint64_t ByteSwap_64(uint64_t Value) { /// \param ZB the behavior on an input of all ones. Only ZB_Width and /// ZB_Undefined are valid arguments. template -std::size_t countLeadingOnes(T Value, ZeroBehavior ZB = ZB_Width) { +unsigned countLeadingOnes(T Value, ZeroBehavior ZB = ZB_Width) { static_assert(std::numeric_limits::is_integer && !std::numeric_limits::is_signed, "Only unsigned integral types are allowed."); @@ -474,7 +474,7 @@ std::size_t countLeadingOnes(T Value, ZeroBehavior ZB = ZB_Width) { /// \param ZB the behavior on an input of all ones. Only ZB_Width and /// ZB_Undefined are valid arguments. template -std::size_t countTrailingOnes(T Value, ZeroBehavior ZB = ZB_Width) { +unsigned countTrailingOnes(T Value, ZeroBehavior ZB = ZB_Width) { static_assert(std::numeric_limits::is_integer && !std::numeric_limits::is_signed, "Only unsigned integral types are allowed."); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 0e8a517..0b0dd0a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -1147,7 +1147,7 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out, // These alignment values are specified in powers of two, so alignment = // 2^n. The minimum alignment is 2^4 = 16. - Out.kernarg_segment_alignment = std::max((size_t)4, + Out.kernarg_segment_alignment = std::max(4, countTrailingZeros(MaxKernArgAlign)); } diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 90b5520..71d592e 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -5377,8 +5377,8 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, if (MinCaseVal->isNullValue()) TableIndex = SI->getCondition(); else - TableIndex = Builder.CreateSub(SI->getCondition(), MinCaseVal, - "switch.tableidx"); + TableIndex = + Builder.CreateSub(SI->getCondition(), MinCaseVal, "switch.tableidx"); // Compute the maximum table size representable by the integer type we are // switching upon. @@ -5512,7 +5512,8 @@ static bool isSwitchDense(ArrayRef Values) { uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front(); uint64_t Range = Diff + 1; uint64_t NumCases = Values.size(); - // 40% is the default density for building a jump table in optsize/minsize mode. + // 40% is the default density for building a jump table in optsize/minsize + // mode. uint64_t MinDensity = 40; return NumCases * 100 >= Range * MinDensity; @@ -5538,11 +5539,11 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, if (SI->getNumCases() < 4) return false; - // This transform is agnostic to the signedness of the input or case values. We - // can treat the case values as signed or unsigned. We can optimize more common - // cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values - // as signed. - SmallVector Values; + // This transform is agnostic to the signedness of the input or case values. + // We can treat the case values as signed or unsigned. We can optimize more + // common cases such as a sequence crossing zero {-4,0,4,8} if we interpret + // case values as signed. + SmallVector Values; for (auto &C : SI->cases()) Values.push_back(C.getCaseValue()->getValue().getSExtValue()); llvm::sort(Values); @@ -5563,9 +5564,9 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, for (auto &V : Values) GCD = GreatestCommonDivisor64(GCD, (uint64_t)V); - // This transform can be done speculatively because it is so cheap - it results - // in a single rotate operation being inserted. This can only happen if the - // factor extracted is a power of 2. + // This transform can be done speculatively because it is so cheap - it + // results in a single rotate operation being inserted. This can only happen + // if the factor extracted is a power of 2. // FIXME: If the GCD is an odd number we can multiply by the multiplicative // inverse of GCD and then perform this transform. // FIXME: It's possible that optimizing a switch on powers of two might also -- 2.7.4