From c9755d80a689658d0070fb9bcbd7dd446e6ab784 Mon Sep 17 00:00:00 2001 From: Noah Goldstein Date: Sat, 21 Jan 2023 22:00:14 -0800 Subject: [PATCH] Transform ctpop(Pow2) -> icmp ne Pow2, 0 This makes folding to 0/1 later on easier and regardless `icmp ne` is 'probably' faster on most targets (especially for vectors). Reviewed By: nikic Differential Revision: https://reviews.llvm.org/D142253 --- .../Transforms/InstCombine/InstCombineCalls.cpp | 11 +++++++ llvm/test/Transforms/InstCombine/ctpop-pow2.ll | 38 +++++++++------------- 2 files changed, 26 insertions(+), 23 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 65248ea..4648ed5 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -668,10 +668,21 @@ static Instruction *foldCtpop(IntrinsicInst &II, InstCombinerImpl &IC) { // If all bits are zero except for exactly one fixed bit, then the result // must be 0 or 1, and we can get that answer by shifting to LSB: // ctpop (X & 32) --> (X & 32) >> 5 + // TODO: Investigate removing this as its likely unnecessary given the below + // `isKnownToBeAPowerOfTwo` check. if ((~Known.Zero).isPowerOf2()) return BinaryOperator::CreateLShr( Op0, ConstantInt::get(Ty, (~Known.Zero).exactLogBase2())); + // More generally we can also handle non-constant power of 2 patterns such as + // shl/shr(Pow2, X), (X & -X), etc... by transforming: + // ctpop(Pow2OrZero) --> icmp ne X, 0 + if (IC.isKnownToBeAPowerOfTwo(Op0, /* OrZero */ true)) + return CastInst::Create(Instruction::ZExt, + IC.Builder.CreateICmp(ICmpInst::ICMP_NE, Op0, + Constant::getNullValue(Ty)), + Ty); + // FIXME: Try to simplify vectors of integers. auto *IT = dyn_cast(Ty); if (!IT) diff --git a/llvm/test/Transforms/InstCombine/ctpop-pow2.ll b/llvm/test/Transforms/InstCombine/ctpop-pow2.ll index b05d11e..6ae5d32 100644 --- a/llvm/test/Transforms/InstCombine/ctpop-pow2.ll +++ b/llvm/test/Transforms/InstCombine/ctpop-pow2.ll @@ -13,7 +13,8 @@ define i16 @ctpop_x_and_negx(i16 %x) { ; CHECK-LABEL: @ctpop_x_and_negx( ; CHECK-NEXT: [[V0:%.*]] = sub i16 0, [[X:%.*]] ; CHECK-NEXT: [[V1:%.*]] = and i16 [[V0]], [[X]] -; CHECK-NEXT: [[CNT:%.*]] = call i16 @llvm.ctpop.i16(i16 [[V1]]), !range [[RNG0:![0-9]+]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i16 [[V1]], 0 +; CHECK-NEXT: [[CNT:%.*]] = zext i1 [[TMP1]] to i16 ; CHECK-NEXT: ret i16 [[CNT]] ; %v0 = sub i16 0, %x @@ -24,11 +25,7 @@ define i16 @ctpop_x_and_negx(i16 %x) { define i8 @ctpop_x_nz_and_negx(i8 %x) { ; CHECK-LABEL: @ctpop_x_nz_and_negx( -; CHECK-NEXT: [[X1:%.*]] = or i8 [[X:%.*]], 1 -; CHECK-NEXT: [[V0:%.*]] = sub nsw i8 0, [[X1]] -; CHECK-NEXT: [[V1:%.*]] = and i8 [[X1]], [[V0]] -; CHECK-NEXT: [[CNT:%.*]] = call i8 @llvm.ctpop.i8(i8 [[V1]]), !range [[RNG1:![0-9]+]] -; CHECK-NEXT: ret i8 [[CNT]] +; CHECK-NEXT: ret i8 1 ; %x1 = or i8 %x, 1 %v0 = sub i8 0, %x1 @@ -39,8 +36,8 @@ define i8 @ctpop_x_nz_and_negx(i8 %x) { define i32 @ctpop_2_shl(i32 %x) { ; CHECK-LABEL: @ctpop_2_shl( -; CHECK-NEXT: [[V:%.*]] = shl i32 2, [[X:%.*]] -; CHECK-NEXT: [[CNT:%.*]] = call i32 @llvm.ctpop.i32(i32 [[V]]), !range [[RNG2:![0-9]+]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[X:%.*]], 31 +; CHECK-NEXT: [[CNT:%.*]] = zext i1 [[TMP1]] to i32 ; CHECK-NEXT: ret i32 [[CNT]] ; %v = shl i32 2, %x @@ -50,10 +47,7 @@ define i32 @ctpop_2_shl(i32 %x) { define i32 @ctpop_2_shl_nz(i32 %x) { ; CHECK-LABEL: @ctpop_2_shl_nz( -; CHECK-NEXT: [[XA30:%.*]] = and i32 [[X:%.*]], 30 -; CHECK-NEXT: [[V:%.*]] = shl i32 2, [[XA30]] -; CHECK-NEXT: [[CNT:%.*]] = call i32 @llvm.ctpop.i32(i32 [[V]]), !range [[RNG3:![0-9]+]] -; CHECK-NEXT: ret i32 [[CNT]] +; CHECK-NEXT: ret i32 1 ; %xa30 = and i32 30, %x %v = shl i32 2, %xa30 @@ -66,7 +60,7 @@ define i8 @ctpop_imin_plus1_lshr_nz(i8 %x) { ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i8 [[X:%.*]], 0 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) ; CHECK-NEXT: [[V:%.*]] = lshr i8 -127, [[X]] -; CHECK-NEXT: [[CNT:%.*]] = call i8 @llvm.ctpop.i8(i8 [[V]]), !range [[RNG4:![0-9]+]] +; CHECK-NEXT: [[CNT:%.*]] = call i8 @llvm.ctpop.i8(i8 [[V]]), !range [[RNG0:![0-9]+]] ; CHECK-NEXT: ret i8 [[CNT]] ; %cmp = icmp ne i8 %x, 0 @@ -83,8 +77,7 @@ define i64 @ctpop_x_and_negx_nz(i64 %x) { ; CHECK-NEXT: [[V1:%.*]] = and i64 [[V0]], [[X]] ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i64 [[V1]], 0 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) -; CHECK-NEXT: [[CNT:%.*]] = call i64 @llvm.ctpop.i64(i64 [[V1]]), !range [[RNG5:![0-9]+]] -; CHECK-NEXT: ret i64 [[CNT]] +; CHECK-NEXT: ret i64 1 ; %v0 = sub i64 0, %x %v1 = and i64 %x, %v0 @@ -97,7 +90,8 @@ define i64 @ctpop_x_and_negx_nz(i64 %x) { define <2 x i32> @ctpop_shl2_1_vec(<2 x i32> %x) { ; CHECK-LABEL: @ctpop_shl2_1_vec( ; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i32> , [[X:%.*]] -; CHECK-NEXT: [[CNT:%.*]] = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[SHL]]) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <2 x i32> [[SHL]], zeroinitializer +; CHECK-NEXT: [[CNT:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[CNT]] ; %shl = shl <2 x i32> , %x @@ -124,7 +118,8 @@ define <2 x i32> @ctpop_shl2_1_vec_nz(<2 x i32> %x) { ; CHECK-LABEL: @ctpop_shl2_1_vec_nz( ; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X:%.*]], ; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i32> , [[AND]] -; CHECK-NEXT: [[CNT:%.*]] = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[SHL]]) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <2 x i32> [[SHL]], zeroinitializer +; CHECK-NEXT: [[CNT:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[CNT]] ; %and = and <2 x i32> %x, @@ -137,7 +132,8 @@ define <2 x i64> @ctpop_x_and_negx_vec(<2 x i64> %x) { ; CHECK-LABEL: @ctpop_x_and_negx_vec( ; CHECK-NEXT: [[SUB:%.*]] = sub <2 x i64> zeroinitializer, [[X:%.*]] ; CHECK-NEXT: [[AND:%.*]] = and <2 x i64> [[SUB]], [[X]] -; CHECK-NEXT: [[CNT:%.*]] = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> [[AND]]) +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <2 x i64> [[AND]], zeroinitializer +; CHECK-NEXT: [[CNT:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[CNT]] ; %sub = sub <2 x i64> , %x @@ -148,11 +144,7 @@ define <2 x i64> @ctpop_x_and_negx_vec(<2 x i64> %x) { define <2 x i32> @ctpop_x_and_negx_vec_nz(<2 x i32> %x) { ; CHECK-LABEL: @ctpop_x_and_negx_vec_nz( -; CHECK-NEXT: [[X1:%.*]] = or <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[SUB:%.*]] = sub nsw <2 x i32> zeroinitializer, [[X1]] -; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X1]], [[SUB]] -; CHECK-NEXT: [[CNT:%.*]] = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[AND]]) -; CHECK-NEXT: ret <2 x i32> [[CNT]] +; CHECK-NEXT: ret <2 x i32> ; %x1 = or <2 x i32> %x, %sub = sub <2 x i32> , %x1 -- 2.7.4