From bfde8619355a06d9f1a199e9cb3f8d41aef5c05b Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Fri, 17 Jun 2022 10:41:00 -0400 Subject: [PATCH] [InstCombine] convert mask and shift of power-of-2 to cmp+select When the mask is a power-of-2 constant and op0 is a shifted-power-of-2 constant, test if the shift amount equals the offset bit index: (ShiftC << X) & C --> X == (log2(C) - log2(ShiftC)) ? C : 0 (ShiftC >> X) & C --> X == (log2(ShiftC) - log2(C)) ? C : 0 This is an alternate to D127610 with a more general pattern. We match only shift+and instead of the trailing xor, so we see a few more tests diffs. I think we discussed this initially in D126617. Here are proofs for shifts in both directions: https://alive2.llvm.org/ce/z/CFrLs4 The test diffs look equal or better for IR, and this makes the patterns more uniform in IR. The backend can partially invert this in both cases if that is profitable. It is not trivially reversible, however, so if we find perf regressions that are not easy to undo, then we may want to revert this. Differential Revision: https://reviews.llvm.org/D127801 --- .../Transforms/InstCombine/InstCombineAndOrXor.cpp | 17 +++++++++++ llvm/test/Transforms/InstCombine/and.ll | 26 +++++++---------- llvm/test/Transforms/InstCombine/icmp-and-shift.ll | 33 ++++++++++------------ .../InstCombine/lshr-and-signbit-icmpeq-zero.ll | 5 ++-- 4 files changed, 44 insertions(+), 37 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index af0efe1..cc06e2f 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1914,6 +1914,23 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) { } } + // When the mask is a power-of-2 constant and op0 is a shifted-power-of-2 + // constant, test if the shift amount equals the offset bit index: + // (ShiftC << X) & C --> X == (log2(C) - log2(ShiftC)) ? C : 0 + // (ShiftC >> X) & C --> X == (log2(ShiftC) - log2(C)) ? C : 0 + if (C->isPowerOf2() && + match(Op0, m_OneUse(m_LogicalShift(m_Power2(ShiftC), m_Value(X))))) { + int Log2ShiftC = ShiftC->exactLogBase2(); + int Log2C = C->exactLogBase2(); + bool IsShiftLeft = + cast(Op0)->getOpcode() == Instruction::Shl; + int BitNum = IsShiftLeft ? Log2C - Log2ShiftC : Log2ShiftC - Log2C; + assert(BitNum >= 0 && "Expected demanded bits to handle impossible mask"); + Value *Cmp = Builder.CreateICmpEQ(X, ConstantInt::get(Ty, BitNum)); + return SelectInst::Create(Cmp, ConstantInt::get(Ty, *C), + ConstantInt::getNullValue(Ty)); + } + Constant *C1, *C2; const APInt *C3 = C; Value *X; diff --git a/llvm/test/Transforms/InstCombine/and.ll b/llvm/test/Transforms/InstCombine/and.ll index fcf1db2..6143e61 100644 --- a/llvm/test/Transforms/InstCombine/and.ll +++ b/llvm/test/Transforms/InstCombine/and.ll @@ -1876,8 +1876,8 @@ define <3 x i16> @shl_lshr_pow2_const_case1_undef3_vec(<3 x i16> %x) { define i16 @shl_lshr_pow2_const_case2(i16 %x) { ; CHECK-LABEL: @shl_lshr_pow2_const_case2( -; CHECK-NEXT: [[TMP1:%.*]] = shl i16 2, [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = and i16 [[TMP1]], 8 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i16 [[X:%.*]], 2 +; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP1]], i16 8, i16 0 ; CHECK-NEXT: ret i16 [[R]] ; %shl = shl i16 16, %x @@ -1886,13 +1886,10 @@ define i16 @shl_lshr_pow2_const_case2(i16 %x) { ret i16 %r } -; TODO: this pattern can be transform to icmp+select - define i16 @shl_lshr_pow2_not_const_case2(i16 %x) { ; CHECK-LABEL: @shl_lshr_pow2_not_const_case2( -; CHECK-NEXT: [[TMP1:%.*]] = shl i16 2, [[X:%.*]] -; CHECK-NEXT: [[AND:%.*]] = and i16 [[TMP1]], 8 -; CHECK-NEXT: [[R:%.*]] = xor i16 [[AND]], 8 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i16 [[X:%.*]], 2 +; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP1]], i16 0, i16 8 ; CHECK-NEXT: ret i16 [[R]] ; %shl = shl i16 16, %x @@ -1969,8 +1966,8 @@ define i16 @shl_lshr_pow2_const_negative_nopow2_2(i16 %x) { define i16 @lshr_lshr_pow2_const(i16 %x) { ; CHECK-LABEL: @lshr_lshr_pow2_const( -; CHECK-NEXT: [[LSHR2:%.*]] = lshr i16 32, [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = and i16 [[LSHR2]], 4 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i16 [[X:%.*]], 3 +; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP1]], i16 4, i16 0 ; CHECK-NEXT: ret i16 [[R]] ; %lshr1 = lshr i16 2048, %x @@ -2032,8 +2029,8 @@ define i16 @lshr_lshr_pow2_const_negative_overflow(i16 %x) { define i16 @lshr_shl_pow2_const_case1(i16 %x) { ; CHECK-LABEL: @lshr_shl_pow2_const_case1( -; CHECK-NEXT: [[TMP1:%.*]] = lshr i16 1024, [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = and i16 [[TMP1]], 8 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i16 [[X:%.*]], 7 +; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP1]], i16 8, i16 0 ; CHECK-NEXT: ret i16 [[R]] ; %lshr1 = lshr i16 256, %x @@ -2042,13 +2039,10 @@ define i16 @lshr_shl_pow2_const_case1(i16 %x) { ret i16 %r } -; TODO: this pattern can be transform to icmp+select - define i16 @lshr_shl_pow2_const_xor(i16 %x) { ; CHECK-LABEL: @lshr_shl_pow2_const_xor( -; CHECK-NEXT: [[TMP1:%.*]] = lshr i16 1024, [[X:%.*]] -; CHECK-NEXT: [[AND:%.*]] = and i16 [[TMP1]], 8 -; CHECK-NEXT: [[R:%.*]] = xor i16 [[AND]], 8 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i16 [[X:%.*]], 7 +; CHECK-NEXT: [[R:%.*]] = select i1 [[TMP1]], i16 0, i16 8 ; CHECK-NEXT: ret i16 [[R]] ; %lshr1 = lshr i16 256, %x diff --git a/llvm/test/Transforms/InstCombine/icmp-and-shift.ll b/llvm/test/Transforms/InstCombine/icmp-and-shift.ll index 87b5942..a67017f 100644 --- a/llvm/test/Transforms/InstCombine/icmp-and-shift.ll +++ b/llvm/test/Transforms/InstCombine/icmp-and-shift.ll @@ -329,9 +329,9 @@ define i32 @icmp_ne_and1_lshr_pow2(i32 %0) { define <2 x i32> @icmp_ne_and1_lshr_pow2_vec(<2 x i32> %0) { ; CHECK-LABEL: @icmp_ne_and1_lshr_pow2_vec( -; CHECK-NEXT: [[AND:%.*]] = lshr <2 x i32> , [[TMP0:%.*]] -; CHECK-NEXT: [[AND_LOBIT:%.*]] = and <2 x i32> [[AND]], -; CHECK-NEXT: ret <2 x i32> [[AND_LOBIT]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP0:%.*]], +; CHECK-NEXT: [[CONV:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[CONV]] ; %lshr = lshr <2 x i32> , %0 %and = and <2 x i32> %lshr, @@ -342,10 +342,9 @@ define <2 x i32> @icmp_ne_and1_lshr_pow2_vec(<2 x i32> %0) { define i32 @icmp_eq_and_pow2_lshr_pow2(i32 %0) { ; CHECK-LABEL: @icmp_eq_and_pow2_lshr_pow2( -; CHECK-NEXT: [[AND:%.*]] = lshr i32 2, [[TMP0:%.*]] -; CHECK-NEXT: [[AND_LOBIT:%.*]] = and i32 [[AND]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[AND_LOBIT]], 1 -; CHECK-NEXT: ret i32 [[TMP2]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP0:%.*]], 1 +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[TMP2]] to i32 +; CHECK-NEXT: ret i32 [[CONV]] ; %lshr = lshr i32 8, %0 %and = and i32 %lshr, 4 @@ -367,10 +366,9 @@ define i32 @icmp_eq_and_pow2_lshr_pow2_case2(i32 %0) { define <2 x i32> @icmp_eq_and_pow2_lshr_pow2_vec(<2 x i32> %0) { ; CHECK-LABEL: @icmp_eq_and_pow2_lshr_pow2_vec( -; CHECK-NEXT: [[AND:%.*]] = lshr <2 x i32> , [[TMP0:%.*]] -; CHECK-NEXT: [[AND_LOBIT:%.*]] = and <2 x i32> [[AND]], -; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i32> [[AND_LOBIT]], -; CHECK-NEXT: ret <2 x i32> [[TMP2]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP0:%.*]], +; CHECK-NEXT: [[CONV:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[CONV]] ; %lshr = lshr <2 x i32> , %0 %and = and <2 x i32> %lshr, @@ -381,10 +379,9 @@ define <2 x i32> @icmp_eq_and_pow2_lshr_pow2_vec(<2 x i32> %0) { define i32 @icmp_ne_and_pow2_lshr_pow2(i32 %0) { ; CHECK-LABEL: @icmp_ne_and_pow2_lshr_pow2( -; CHECK-NEXT: [[AND:%.*]] = lshr i32 2, [[TMP0:%.*]] -; CHECK-NEXT: [[AND_LOBIT:%.*]] = and i32 [[AND]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[AND_LOBIT]], 1 -; CHECK-NEXT: ret i32 [[TMP2]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP0:%.*]], 1 +; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[TMP2]] to i32 +; CHECK-NEXT: ret i32 [[CONV]] ; %lshr = lshr i32 8, %0 %and = and i32 %lshr, 4 @@ -406,9 +403,9 @@ define i32 @icmp_ne_and_pow2_lshr_pow2_case2(i32 %0) { define <2 x i32> @icmp_ne_and_pow2_lshr_pow2_vec(<2 x i32> %0) { ; CHECK-LABEL: @icmp_ne_and_pow2_lshr_pow2_vec( -; CHECK-NEXT: [[AND:%.*]] = lshr <2 x i32> , [[TMP0:%.*]] -; CHECK-NEXT: [[AND_LOBIT:%.*]] = and <2 x i32> [[AND]], -; CHECK-NEXT: ret <2 x i32> [[AND_LOBIT]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP0:%.*]], +; CHECK-NEXT: [[CONV:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32> +; CHECK-NEXT: ret <2 x i32> [[CONV]] ; %lshr = lshr <2 x i32> , %0 %and = and <2 x i32> %lshr, diff --git a/llvm/test/Transforms/InstCombine/lshr-and-signbit-icmpeq-zero.ll b/llvm/test/Transforms/InstCombine/lshr-and-signbit-icmpeq-zero.ll index 1388e5a..6686098 100644 --- a/llvm/test/Transforms/InstCombine/lshr-and-signbit-icmpeq-zero.ll +++ b/llvm/test/Transforms/InstCombine/lshr-and-signbit-icmpeq-zero.ll @@ -193,9 +193,8 @@ define i1 @scalar_i32_lshr_and_signbit_eq_X_is_constant1(i32 %y) { define i1 @scalar_i32_lshr_and_negC_eq_X_is_constant2(i32 %y) { ; CHECK-LABEL: @scalar_i32_lshr_and_negC_eq_X_is_constant2( -; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 -2147483648, [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp sgt i32 [[LSHR]], -1 -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[Y:%.*]], 0 +; CHECK-NEXT: ret i1 [[TMP1]] ; %lshr = lshr i32 2147483648, %y %and = and i32 %lshr, 2147483648 -- 2.7.4