From 691badc3d6a76ceb98530d58f80c11565337d767 Mon Sep 17 00:00:00 2001 From: =?utf8?q?D=C3=A1vid=20Bolvansk=C3=BD?= Date: Mon, 26 Apr 2021 14:22:14 +0200 Subject: [PATCH] [InstCombine] C - ctpop(a) - > ctpop(~a)) if C is bitwidth (PR50104) Proof: https://alive2.llvm.org/ce/z/mncA9K Solves https://bugs.llvm.org/show_bug.cgi?id=50104 Reviewed By: spatel Differential Revision: https://reviews.llvm.org/D101257 --- llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp | 7 +++++++ llvm/test/Transforms/InstCombine/ctpop-cttz.ll | 10 +++++----- llvm/test/Transforms/InstCombine/ctpop.ll | 12 ++++++------ 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 48808db..b1b77d1 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -2112,6 +2112,13 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) { return replaceInstUsesWith( I, Builder.CreateIntrinsic(Intrinsic::umin, {I.getType()}, {Op0, Y})); + // C - ctpop(X) => ctpop(~X) if C is bitwidth + if (match(Op0, m_SpecificInt(Ty->getScalarSizeInBits())) && + match(Op1, m_OneUse(m_Intrinsic(m_Value(X))))) + return replaceInstUsesWith( + I, Builder.CreateIntrinsic(Intrinsic::ctpop, {I.getType()}, + {Builder.CreateNot(X)})); + return TryToNarrowDeduceFlags(); } diff --git a/llvm/test/Transforms/InstCombine/ctpop-cttz.ll b/llvm/test/Transforms/InstCombine/ctpop-cttz.ll index 59a45f3..88675a4 100644 --- a/llvm/test/Transforms/InstCombine/ctpop-cttz.ll +++ b/llvm/test/Transforms/InstCombine/ctpop-cttz.ll @@ -8,7 +8,7 @@ declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>) ; __builtin_popcount(i | -i) -> 32 - __builtin_cttz(i, false) define i32 @ctpop1(i32 %0) { ; CHECK-LABEL: @ctpop1( -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP0:%.*]], i1 false), !range !0 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP0:%.*]], i1 false), !range [[RNG0:![0-9]+]] ; CHECK-NEXT: ret i32 [[TMP2]] ; %2 = sub i32 0, %0 @@ -34,8 +34,8 @@ define i32 @ctpop1_multiuse(i32 %0) { ; CHECK-LABEL: @ctpop1_multiuse( ; CHECK-NEXT: [[TMP2:%.*]] = sub i32 0, [[TMP0:%.*]] ; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP2]], [[TMP0]] -; CHECK-NEXT: [[TMP4:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[TMP3]]), !range !0 -; CHECK-NEXT: [[TMP5:%.*]] = sub nuw nsw i32 32, [[TMP4]] +; CHECK-NEXT: [[TMP4:%.*]] = xor i32 [[TMP3]], -1 +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.ctpop.i32(i32 [[TMP4]]), !range [[RNG0]] ; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[TMP5]], [[TMP3]] ; CHECK-NEXT: ret i32 [[TMP6]] ; @@ -51,7 +51,7 @@ define i32 @ctpop1_multiuse(i32 %0) { ; __builtin_popcount(~i & (i-1)) -> __builtin_cttz(i, false) define i32 @ctpop2(i32 %0) { ; CHECK-LABEL: @ctpop2( -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP0:%.*]], i1 false), !range !0 +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP0:%.*]], i1 false), !range [[RNG0]] ; CHECK-NEXT: ret i32 [[TMP2]] ; %2 = xor i32 %0, -1 @@ -78,7 +78,7 @@ define i32 @ctpop2_multiuse(i32 %0) { ; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP0:%.*]], -1 ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP0]], -1 ; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP3]], [[TMP2]] -; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP0]], i1 false), !range !0 +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP0]], i1 false), !range [[RNG0]] ; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[TMP5]], [[TMP4]] ; CHECK-NEXT: ret i32 [[TMP6]] ; diff --git a/llvm/test/Transforms/InstCombine/ctpop.ll b/llvm/test/Transforms/InstCombine/ctpop.ll index f2141a8..bdb8e6a 100644 --- a/llvm/test/Transforms/InstCombine/ctpop.ll +++ b/llvm/test/Transforms/InstCombine/ctpop.ll @@ -292,9 +292,9 @@ declare i8 @llvm.fshr.i8(i8, i8, i8) define i8 @sub_ctpop(i8 %a) { ; CHECK-LABEL: @sub_ctpop( -; CHECK-NEXT: [[CNT:%.*]] = tail call i8 @llvm.ctpop.i8(i8 [[A:%.*]]), !range [[RNG0]] -; CHECK-NEXT: [[RES:%.*]] = sub nuw nsw i8 8, [[CNT]] -; CHECK-NEXT: ret i8 [[RES]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[A:%.*]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.ctpop.i8(i8 [[TMP1]]), !range [[RNG0]] +; CHECK-NEXT: ret i8 [[TMP2]] ; %cnt = tail call i8 @llvm.ctpop.i8(i8 %a) %res = sub i8 8, %cnt @@ -325,9 +325,9 @@ define i8 @sub_ctpop_unknown(i8 %a, i8 %b) { define <2 x i32> @sub_ctpop_vec(<2 x i32> %a) { ; CHECK-LABEL: @sub_ctpop_vec( -; CHECK-NEXT: [[CNT:%.*]] = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[A:%.*]]) -; CHECK-NEXT: [[RES:%.*]] = sub nuw nsw <2 x i32> , [[CNT]] -; CHECK-NEXT: ret <2 x i32> [[RES]] +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[TMP1]]) +; CHECK-NEXT: ret <2 x i32> [[TMP2]] ; %cnt = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a) %res = sub <2 x i32> , %cnt -- 2.7.4