From c0645f13243cf574abd1f8d587dc22943b9dd95d Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Sun, 4 Apr 2021 11:38:09 -0400 Subject: [PATCH] [InstCombine] fold popcount of exactly one bit to shift This is discussed in https://llvm.org/PR48999 , but it does not solve that request. The difference in the vector test shows that some other logic transform is limited to scalar types. --- llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 16 +++++++++++----- llvm/test/Transforms/InstCombine/ctpop.ll | 6 +++--- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index bf1c51e..762e3f3 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -522,18 +522,24 @@ static Instruction *foldCtpop(IntrinsicInst &II, InstCombinerImpl &IC) { return CallInst::Create(F, {X, IC.Builder.getFalse()}); } + KnownBits Known(BitWidth); + IC.computeKnownBits(Op0, Known, 0, &II); + + // If all bits are zero except for exactly one fixed bit, then the result + // must be 0 or 1, and we can get that answer by shifting to LSB: + // ctpop (X & 32) --> (X & 32) >> 5 + if ((~Known.Zero).isPowerOf2()) + return BinaryOperator::CreateLShr( + Op0, ConstantInt::get(Ty, (~Known.Zero).exactLogBase2())); + // FIXME: Try to simplify vectors of integers. auto *IT = dyn_cast(Ty); if (!IT) return nullptr; - KnownBits Known(BitWidth); - IC.computeKnownBits(Op0, Known, 0, &II); - + // Add range metadata since known bits can't completely reflect what we know. unsigned MinCount = Known.countMinPopulation(); unsigned MaxCount = Known.countMaxPopulation(); - - // Add range metadata since known bits can't completely reflect what we know. if (IT->getBitWidth() != 1 && !II.getMetadata(LLVMContext::MD_range)) { Metadata *LowAndHigh[] = { ConstantAsMetadata::get(ConstantInt::get(IT, MinCount)), diff --git a/llvm/test/Transforms/InstCombine/ctpop.ll b/llvm/test/Transforms/InstCombine/ctpop.ll index 697b210..d1c587f 100644 --- a/llvm/test/Transforms/InstCombine/ctpop.ll +++ b/llvm/test/Transforms/InstCombine/ctpop.ll @@ -96,8 +96,8 @@ define i1 @test6(i1 %arg) { define i8 @mask_one_bit(i8 %x) { ; CHECK-LABEL: @mask_one_bit( -; CHECK-NEXT: [[A:%.*]] = and i8 [[X:%.*]], 16 -; CHECK-NEXT: [[R:%.*]] = call i8 @llvm.ctpop.i8(i8 [[A]]), !range [[RNG1:![0-9]+]] +; CHECK-NEXT: [[A:%.*]] = lshr i8 [[X:%.*]], 4 +; CHECK-NEXT: [[R:%.*]] = and i8 [[A]], 1 ; CHECK-NEXT: ret i8 [[R]] ; %a = and i8 %x, 16 @@ -109,7 +109,7 @@ define <2 x i32> @mask_one_bit_splat(<2 x i32> %x, <2 x i32>* %p) { ; CHECK-LABEL: @mask_one_bit_splat( ; CHECK-NEXT: [[A:%.*]] = and <2 x i32> [[X:%.*]], ; CHECK-NEXT: store <2 x i32> [[A]], <2 x i32>* [[P:%.*]], align 8 -; CHECK-NEXT: [[R:%.*]] = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[A]]) +; CHECK-NEXT: [[R:%.*]] = lshr exact <2 x i32> [[A]], ; CHECK-NEXT: ret <2 x i32> [[R]] ; %a = and <2 x i32> %x, -- 2.7.4