From 008a89037a49ca0d9ed85bb8548a048991ff133b Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Wed, 12 Oct 2022 10:58:03 -0400 Subject: [PATCH] [InstCombine] fold udiv with common shl amount in operands (X << Z) / (Y << Z) --> X / Y https://alive2.llvm.org/ce/z/E5eaxU This fixes the motivating example from issue #58137, but it is not the most general transform. We should probably also convert left-shift in the divisor to right-shift in the dividend for that, but that exposes another missed canonicalization for shifts and adds. --- .../InstCombine/InstCombineMulDivRem.cpp | 53 ++++++++++++++-------- llvm/test/Transforms/InstCombine/div-shift.ll | 18 ++++++-- .../PhaseOrdering/reassociate-instcombine.ll | 9 +--- 3 files changed, 49 insertions(+), 31 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index b861406..dfa7a6b 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -826,28 +826,43 @@ static Instruction *foldIDivShl(BinaryOperator &I, Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); Type *Ty = I.getType(); + Instruction *Ret = nullptr; + Value *X, *Y, *Z; + // With appropriate no-wrap constraints, remove a common factor in the // dividend and divisor that is disguised as a left-shifted value. - Value *X, *Y, *Z; - if (!match(Op1, m_Shl(m_Value(X), m_Value(Z))) || - !match(Op0, m_c_Mul(m_Specific(X), m_Value(Y)))) - return nullptr; + if (match(Op1, m_Shl(m_Value(X), m_Value(Z))) && + match(Op0, m_c_Mul(m_Specific(X), m_Value(Y)))) { + // Both operands must have the matching no-wrap for this kind of division. + auto *Mul = cast(Op0); + auto *Shl = cast(Op1); + bool HasNUW = Mul->hasNoUnsignedWrap() && Shl->hasNoUnsignedWrap(); + bool HasNSW = Mul->hasNoSignedWrap() && Shl->hasNoSignedWrap(); + + // (X * Y) u/ (X << Z) --> Y u>> Z + if (!IsSigned && HasNUW) + Ret = BinaryOperator::CreateLShr(Y, Z); + + // (X * Y) s/ (X << Z) --> Y s/ (1 << Z) + if (IsSigned && HasNSW && (Op0->hasOneUse() || Op1->hasOneUse())) { + Value *Shl = Builder.CreateShl(ConstantInt::get(Ty, 1), Z); + Ret = BinaryOperator::CreateSDiv(Y, Shl); + } + } - // Both operands must have the matching no-wrap for this kind of division. - Instruction *Ret = nullptr; - auto *Mul = cast(Op0); - auto *Shl = cast(Op1); - bool HasNUW = Mul->hasNoUnsignedWrap() && Shl->hasNoUnsignedWrap(); - bool HasNSW = Mul->hasNoSignedWrap() && Shl->hasNoSignedWrap(); - - // (X * Y) u/ (X << Z) --> Y u>> Z - if (!IsSigned && HasNUW) - Ret = BinaryOperator::CreateLShr(Y, Z); - - // (X * Y) s/ (X << Z) --> Y s/ (1 << Z) - if (IsSigned && HasNSW && (Op0->hasOneUse() || Op1->hasOneUse())) { - Value *Shl = Builder.CreateShl(ConstantInt::get(Ty, 1), Z); - Ret = BinaryOperator::CreateSDiv(Y, Shl); + // With appropriate no-wrap constraints, remove a common factor in the + // dividend and divisor that is disguised as a left-shift amount. + if (match(Op0, m_Shl(m_Value(X), m_Value(Z))) && + match(Op1, m_Shl(m_Value(Y), m_Specific(Z)))) { + auto *Shl0 = cast(Op0); + auto *Shl1 = cast(Op1); + + // For unsigned div, we need 'nuw' on both shifts. + // (X << Z) / (Y << Z) --> X / Y + if (!IsSigned && Shl0->hasNoUnsignedWrap() && Shl1->hasNoUnsignedWrap()) + Ret = BinaryOperator::CreateUDiv(X, Y); + + // TODO: Handle sdiv. } if (!Ret) diff --git a/llvm/test/Transforms/InstCombine/div-shift.ll b/llvm/test/Transforms/InstCombine/div-shift.ll index bac752a..075fb51 100644 --- a/llvm/test/Transforms/InstCombine/div-shift.ll +++ b/llvm/test/Transforms/InstCombine/div-shift.ll @@ -742,6 +742,8 @@ define i8 @sdiv_lshr_mul_nsw(i8 %x, i8 %y, i8 %z) { ret i8 %div } +; TODO: (X << Z) / (Y << Z) --> X / Y + define i8 @sdiv_shl_shl_nsw2_nuw(i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: @sdiv_shl_shl_nsw2_nuw( ; CHECK-NEXT: [[XZ:%.*]] = shl nsw i8 [[X:%.*]], [[Z:%.*]] @@ -809,11 +811,11 @@ define i8 @sdiv_shl_shl_nuw_nsw2(i8 %x, i8 %y, i8 %z) { ret i8 %d } +; (X << Z) / (Y << Z) --> X / Y + define <2 x i8> @udiv_shl_shl_nuw2(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z) { ; CHECK-LABEL: @udiv_shl_shl_nuw2( -; CHECK-NEXT: [[XZ:%.*]] = shl nuw <2 x i8> [[X:%.*]], [[Z:%.*]] -; CHECK-NEXT: [[YZ:%.*]] = shl nuw <2 x i8> [[Y:%.*]], [[Z]] -; CHECK-NEXT: [[D:%.*]] = udiv <2 x i8> [[XZ]], [[YZ]] +; CHECK-NEXT: [[D:%.*]] = udiv <2 x i8> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: ret <2 x i8> [[D]] ; %xz = shl nuw <2 x i8> %x, %z @@ -822,13 +824,15 @@ define <2 x i8> @udiv_shl_shl_nuw2(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z) { ret <2 x i8> %d } +; extra uses are ok and 'exact' propagates + define i8 @udiv_shl_shl_nuw2_exact_use2(i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: @udiv_shl_shl_nuw2_exact_use2( ; CHECK-NEXT: [[XZ:%.*]] = shl nuw i8 [[X:%.*]], [[Z:%.*]] ; CHECK-NEXT: call void @use(i8 [[XZ]]) ; CHECK-NEXT: [[YZ:%.*]] = shl nuw i8 [[Y:%.*]], [[Z]] ; CHECK-NEXT: call void @use(i8 [[YZ]]) -; CHECK-NEXT: [[D:%.*]] = udiv exact i8 [[XZ]], [[YZ]] +; CHECK-NEXT: [[D:%.*]] = udiv exact i8 [[X]], [[Y]] ; CHECK-NEXT: ret i8 [[D]] ; %xz = shl nuw i8 %x, %z @@ -839,6 +843,8 @@ define i8 @udiv_shl_shl_nuw2_exact_use2(i8 %x, i8 %y, i8 %z) { ret i8 %d } +; negative test - wrong wrap + define i8 @udiv_shl_shl_nuw_nsw(i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: @udiv_shl_shl_nuw_nsw( ; CHECK-NEXT: [[XZ:%.*]] = shl nuw i8 [[X:%.*]], [[Z:%.*]] @@ -852,6 +858,8 @@ define i8 @udiv_shl_shl_nuw_nsw(i8 %x, i8 %y, i8 %z) { ret i8 %d } +; negative test - wrong wrap + define i8 @udiv_shl_shl_nsw_nuw(i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: @udiv_shl_shl_nsw_nuw( ; CHECK-NEXT: [[XZ:%.*]] = shl nsw i8 [[X:%.*]], [[Z:%.*]] @@ -865,6 +873,8 @@ define i8 @udiv_shl_shl_nsw_nuw(i8 %x, i8 %y, i8 %z) { ret i8 %d } +; TODO: This could fold. + define i8 @udiv_shl_shl_nuw_nsw2(i8 %x, i8 %y, i8 %z) { ; CHECK-LABEL: @udiv_shl_shl_nuw_nsw2( ; CHECK-NEXT: [[XZ:%.*]] = shl nuw nsw i8 [[X:%.*]], [[Z:%.*]] diff --git a/llvm/test/Transforms/PhaseOrdering/reassociate-instcombine.ll b/llvm/test/Transforms/PhaseOrdering/reassociate-instcombine.ll index 281d5c7..7e958e8 100644 --- a/llvm/test/Transforms/PhaseOrdering/reassociate-instcombine.ll +++ b/llvm/test/Transforms/PhaseOrdering/reassociate-instcombine.ll @@ -50,14 +50,7 @@ define i32 @PR58137_sdiv(i32 %a, i32 %b) { define i32 @PR58137_udiv(i32 %x, i32 %y) { ; CHECK-LABEL: @PR58137_udiv( -; CHECK-NEXT: [[ZX:%.*]] = zext i32 [[X:%.*]] to i64 -; CHECK-NEXT: [[ZY:%.*]] = zext i32 [[Y:%.*]] to i64 -; CHECK-NEXT: [[M1:%.*]] = shl nuw nsw i64 [[ZX]], 2 -; CHECK-NEXT: [[M2:%.*]] = mul i64 [[M1]], [[ZY]] -; CHECK-NEXT: [[M3:%.*]] = shl nuw nsw i64 [[ZY]], 2 -; CHECK-NEXT: [[D:%.*]] = udiv i64 [[M2]], [[M3]] -; CHECK-NEXT: [[T:%.*]] = trunc i64 [[D]] to i32 -; CHECK-NEXT: ret i32 [[T]] +; CHECK-NEXT: ret i32 [[X:%.*]] ; %zx = zext i32 %x to i64 %zy = zext i32 %y to i64 -- 2.7.4