From 0cfc6510323fbb5a56a5de23cbc65f7cc30fd34c Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Wed, 24 Aug 2022 11:39:24 -0400 Subject: [PATCH] [InstCombine] ease use constraint in tryFactorization() The stronger one-use checks prevented transforms like this: (x * y) + x --> x * (y + 1) (x * y) - x --> x * (y - 1) https://alive2.llvm.org/ce/z/eMhvQa This is one of the IR transforms suggested in issue #57255. This should be better in IR because it removes a use of a variable operand (we already fold the case with a constant multiply operand). The backend should be able to re-distribute the multiply if that's better for the target. Differential Revision: https://reviews.llvm.org/D132412 --- .../InstCombine/InstructionCombining.cpp | 8 +++---- llvm/test/Transforms/InstCombine/add.ll | 20 ++++++++++------- llvm/test/Transforms/InstCombine/and-or.ll | 12 +++++------ llvm/test/Transforms/InstCombine/ctpop.ll | 12 +++++------ llvm/test/Transforms/InstCombine/sub.ll | 25 +++++++++++++++------- 5 files changed, 45 insertions(+), 32 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 0fd21ad..07f8c68 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -657,9 +657,9 @@ Value *InstCombinerImpl::tryFactorization(BinaryOperator &I, // If "B op D" simplifies then it can be formed with no cost. V = simplifyBinOp(TopLevelOpcode, B, D, SQ.getWithInstruction(&I)); - // If "B op D" doesn't simplify then only go on if both of the existing + // If "B op D" doesn't simplify then only go on if one of the existing // operations "A op' B" and "C op' D" will be zapped as no longer used. - if (!V && LHS->hasOneUse() && RHS->hasOneUse()) + if (!V && (LHS->hasOneUse() || RHS->hasOneUse())) V = Builder.CreateBinOp(TopLevelOpcode, B, D, RHS->getName()); if (V) RetVal = Builder.CreateBinOp(InnerOpcode, A, V); @@ -677,9 +677,9 @@ Value *InstCombinerImpl::tryFactorization(BinaryOperator &I, // If "A op C" simplifies then it can be formed with no cost. V = simplifyBinOp(TopLevelOpcode, A, C, SQ.getWithInstruction(&I)); - // If "A op C" doesn't simplify then only go on if both of the existing + // If "A op C" doesn't simplify then only go on if one of the existing // operations "A op' B" and "C op' D" will be zapped as no longer used. - if (!V && LHS->hasOneUse() && RHS->hasOneUse()) + if (!V && (LHS->hasOneUse() || RHS->hasOneUse())) V = Builder.CreateBinOp(TopLevelOpcode, A, C, LHS->getName()); if (V) RetVal = Builder.CreateBinOp(InnerOpcode, V, B); diff --git a/llvm/test/Transforms/InstCombine/add.ll b/llvm/test/Transforms/InstCombine/add.ll index 1289ebe..68b394f 100644 --- a/llvm/test/Transforms/InstCombine/add.ll +++ b/llvm/test/Transforms/InstCombine/add.ll @@ -1756,10 +1756,12 @@ define i32 @add_add_add_commute3(i32 %A, i32 %B, i32 %C, i32 %D) { ret i32 %G } +; x * y + x --> (y + 1) * x + define i8 @mul_add_common_factor_commute1(i8 %x, i8 %y) { ; CHECK-LABEL: @mul_add_common_factor_commute1( -; CHECK-NEXT: [[M:%.*]] = mul nsw i8 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[A:%.*]] = add nsw i8 [[M]], [[X]] +; CHECK-NEXT: [[X1:%.*]] = add i8 [[Y:%.*]], 1 +; CHECK-NEXT: [[A:%.*]] = mul i8 [[X1]], [[X:%.*]] ; CHECK-NEXT: ret i8 [[A]] ; %m = mul nsw i8 %x, %y @@ -1769,8 +1771,8 @@ define i8 @mul_add_common_factor_commute1(i8 %x, i8 %y) { define <2 x i8> @mul_add_common_factor_commute2(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @mul_add_common_factor_commute2( -; CHECK-NEXT: [[M:%.*]] = mul nuw <2 x i8> [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[A:%.*]] = add nuw <2 x i8> [[M]], [[X]] +; CHECK-NEXT: [[M1:%.*]] = add <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[A:%.*]] = mul nuw <2 x i8> [[M1]], [[X:%.*]] ; CHECK-NEXT: ret <2 x i8> [[A]] ; %m = mul nuw <2 x i8> %y, %x @@ -1781,8 +1783,8 @@ define <2 x i8> @mul_add_common_factor_commute2(<2 x i8> %x, <2 x i8> %y) { define i8 @mul_add_common_factor_commute3(i8 %p, i8 %y) { ; CHECK-LABEL: @mul_add_common_factor_commute3( ; CHECK-NEXT: [[X:%.*]] = mul i8 [[P:%.*]], [[P]] -; CHECK-NEXT: [[M:%.*]] = mul nuw i8 [[X]], [[Y:%.*]] -; CHECK-NEXT: [[A:%.*]] = add nsw i8 [[X]], [[M]] +; CHECK-NEXT: [[M1:%.*]] = add i8 [[Y:%.*]], 1 +; CHECK-NEXT: [[A:%.*]] = mul i8 [[X]], [[M1]] ; CHECK-NEXT: ret i8 [[A]] ; %x = mul i8 %p, %p ; thwart complexity-based canonicalization @@ -1795,8 +1797,8 @@ define i8 @mul_add_common_factor_commute4(i8 %p, i8 %q) { ; CHECK-LABEL: @mul_add_common_factor_commute4( ; CHECK-NEXT: [[X:%.*]] = mul i8 [[P:%.*]], [[P]] ; CHECK-NEXT: [[Y:%.*]] = mul i8 [[Q:%.*]], [[Q]] -; CHECK-NEXT: [[M:%.*]] = mul nsw i8 [[Y]], [[X]] -; CHECK-NEXT: [[A:%.*]] = add nuw i8 [[X]], [[M]] +; CHECK-NEXT: [[M1:%.*]] = add i8 [[Y]], 1 +; CHECK-NEXT: [[A:%.*]] = mul i8 [[X]], [[M1]] ; CHECK-NEXT: ret i8 [[A]] ; %x = mul i8 %p, %p ; thwart complexity-based canonicalization @@ -1806,6 +1808,8 @@ define i8 @mul_add_common_factor_commute4(i8 %p, i8 %q) { ret i8 %a } +; negative test - uses + define i8 @mul_add_common_factor_use(i8 %x, i8 %y) { ; CHECK-LABEL: @mul_add_common_factor_use( ; CHECK-NEXT: [[M:%.*]] = mul i8 [[X:%.*]], [[Y:%.*]] diff --git a/llvm/test/Transforms/InstCombine/and-or.ll b/llvm/test/Transforms/InstCombine/and-or.ll index 2b1d453..7a37af4 100644 --- a/llvm/test/Transforms/InstCombine/and-or.ll +++ b/llvm/test/Transforms/InstCombine/and-or.ll @@ -671,11 +671,11 @@ define i32 @or_or_and_noOneUse_fail1(i32 %a, i32 %b) { ; CHECK-NEXT: [[SHR:%.*]] = ashr i32 [[A:%.*]], 23 ; CHECK-NEXT: [[AND:%.*]] = and i32 [[SHR]], 157 ; CHECK-NEXT: call void @use2(i32 [[AND]]) -; CHECK-NEXT: [[AND3:%.*]] = and i32 [[SHR]], [[B:%.*]] +; CHECK-NEXT: [[AND1:%.*]] = or i32 [[B:%.*]], 157 +; CHECK-NEXT: [[OR:%.*]] = and i32 [[SHR]], [[AND1]] ; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[B]], 23 ; CHECK-NEXT: [[AND9:%.*]] = and i32 [[TMP1]], 157 -; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[AND3]], [[AND9]] -; CHECK-NEXT: [[R:%.*]] = or i32 [[TMP2]], [[AND]] +; CHECK-NEXT: [[R:%.*]] = or i32 [[OR]], [[AND9]] ; CHECK-NEXT: ret i32 [[R]] ; %shr = ashr i32 %a, 23 @@ -700,9 +700,9 @@ define { i1, i1, i1, i1, i1 } @or_or_and_noOneUse_fail2(i1 %a_0, i1 %a_1, i1 %a_ ; CHECK-NEXT: [[TMP3:%.*]] = and i1 [[A_1:%.*]], [[B_1:%.*]] ; CHECK-NEXT: [[TMP4:%.*]] = xor i1 [[TMP3]], true ; CHECK-NEXT: [[TMP5:%.*]] = and i1 [[TMP0]], [[A_1]] -; CHECK-NEXT: [[TMP6:%.*]] = and i1 [[TMP2]], [[B_1]] -; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP6]], [[TMP5]] -; CHECK-NEXT: [[D:%.*]] = or i1 [[TMP7]], [[TMP3]] +; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP2]], [[A_1]] +; CHECK-NEXT: [[TMP7:%.*]] = and i1 [[TMP6]], [[B_1]] +; CHECK-NEXT: [[D:%.*]] = or i1 [[TMP7]], [[TMP5]] ; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP1]], [[TMP3]] ; CHECK-NEXT: [[TMP9:%.*]] = insertvalue { i1, i1, i1, i1, i1 } zeroinitializer, i1 [[D]], 0 ; CHECK-NEXT: [[TMP10:%.*]] = insertvalue { i1, i1, i1, i1, i1 } [[TMP9]], i1 [[TMP4]], 1 diff --git a/llvm/test/Transforms/InstCombine/ctpop.ll b/llvm/test/Transforms/InstCombine/ctpop.ll index d714d624..a7b0fb4 100644 --- a/llvm/test/Transforms/InstCombine/ctpop.ll +++ b/llvm/test/Transforms/InstCombine/ctpop.ll @@ -448,9 +448,9 @@ define i32 @parity_xor_extra_use(i32 %arg, i32 %arg1) { ; CHECK-NEXT: [[I:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[ARG:%.*]]), !range [[RNG1]] ; CHECK-NEXT: [[I2:%.*]] = and i32 [[I]], 1 ; CHECK-NEXT: tail call void @use(i32 [[I2]]) -; CHECK-NEXT: [[I3:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[ARG1:%.*]]), !range [[RNG1]] -; CHECK-NEXT: [[I4:%.*]] = and i32 [[I3]], 1 -; CHECK-NEXT: [[I5:%.*]] = xor i32 [[I4]], [[I2]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[ARG1:%.*]], [[ARG]] +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ctpop.i32(i32 [[TMP1]]), !range [[RNG1]] +; CHECK-NEXT: [[I5:%.*]] = and i32 [[TMP2]], 1 ; CHECK-NEXT: ret i32 [[I5]] ; %i = tail call i32 @llvm.ctpop.i32(i32 %arg) @@ -467,9 +467,9 @@ define i32 @parity_xor_extra_use2(i32 %arg, i32 %arg1) { ; CHECK-NEXT: [[I:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[ARG1:%.*]]), !range [[RNG1]] ; CHECK-NEXT: [[I2:%.*]] = and i32 [[I]], 1 ; CHECK-NEXT: tail call void @use(i32 [[I2]]) -; CHECK-NEXT: [[I3:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[ARG:%.*]]), !range [[RNG1]] -; CHECK-NEXT: [[I4:%.*]] = and i32 [[I3]], 1 -; CHECK-NEXT: [[I5:%.*]] = xor i32 [[I2]], [[I4]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[ARG1]], [[ARG:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ctpop.i32(i32 [[TMP1]]), !range [[RNG1]] +; CHECK-NEXT: [[I5:%.*]] = and i32 [[TMP2]], 1 ; CHECK-NEXT: ret i32 [[I5]] ; %i = tail call i32 @llvm.ctpop.i32(i32 %arg1) diff --git a/llvm/test/Transforms/InstCombine/sub.ll b/llvm/test/Transforms/InstCombine/sub.ll index 871489f..dd1471c 100644 --- a/llvm/test/Transforms/InstCombine/sub.ll +++ b/llvm/test/Transforms/InstCombine/sub.ll @@ -2003,10 +2003,13 @@ define i16 @urem_zext_noundef(i8 noundef %x, i8 %y) { ret i16 %z } +; x * y - x --> (y - 1) * x +; TODO: The mul could retain nsw. + define i8 @mul_sub_common_factor_commute1(i8 %x, i8 %y) { ; CHECK-LABEL: @mul_sub_common_factor_commute1( -; CHECK-NEXT: [[M:%.*]] = mul nsw i8 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[A:%.*]] = sub nsw i8 [[M]], [[X]] +; CHECK-NEXT: [[X1:%.*]] = add i8 [[Y:%.*]], -1 +; CHECK-NEXT: [[A:%.*]] = mul i8 [[X1]], [[X:%.*]] ; CHECK-NEXT: ret i8 [[A]] ; %m = mul nsw i8 %x, %y @@ -2014,10 +2017,12 @@ define i8 @mul_sub_common_factor_commute1(i8 %x, i8 %y) { ret i8 %a } +; TODO: The mul could retain nuw. + define <2 x i8> @mul_sub_common_factor_commute2(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @mul_sub_common_factor_commute2( -; CHECK-NEXT: [[M:%.*]] = mul nuw <2 x i8> [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[A:%.*]] = sub nuw <2 x i8> [[M]], [[X]] +; CHECK-NEXT: [[M1:%.*]] = add <2 x i8> [[Y:%.*]], +; CHECK-NEXT: [[A:%.*]] = mul <2 x i8> [[M1]], [[X:%.*]] ; CHECK-NEXT: ret <2 x i8> [[A]] ; %m = mul nuw <2 x i8> %y, %x @@ -2025,10 +2030,12 @@ define <2 x i8> @mul_sub_common_factor_commute2(<2 x i8> %x, <2 x i8> %y) { ret <2 x i8> %a } +; x - (x * y) --> (1 - y) * x + define i8 @mul_sub_common_factor_commute3(i8 %x, i8 %y) { ; CHECK-LABEL: @mul_sub_common_factor_commute3( -; CHECK-NEXT: [[M:%.*]] = mul nuw i8 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[A:%.*]] = sub nsw i8 [[X]], [[M]] +; CHECK-NEXT: [[M1:%.*]] = sub i8 1, [[Y:%.*]] +; CHECK-NEXT: [[A:%.*]] = mul i8 [[M1]], [[X:%.*]] ; CHECK-NEXT: ret i8 [[A]] ; %m = mul nuw i8 %x, %y @@ -2038,8 +2045,8 @@ define i8 @mul_sub_common_factor_commute3(i8 %x, i8 %y) { define i8 @mul_sub_common_factor_commute4(i8 %x, i8 %y) { ; CHECK-LABEL: @mul_sub_common_factor_commute4( -; CHECK-NEXT: [[M:%.*]] = mul nsw i8 [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[A:%.*]] = sub nuw i8 [[X]], [[M]] +; CHECK-NEXT: [[M1:%.*]] = sub i8 1, [[Y:%.*]] +; CHECK-NEXT: [[A:%.*]] = mul i8 [[M1]], [[X:%.*]] ; CHECK-NEXT: ret i8 [[A]] ; %m = mul nsw i8 %y, %x @@ -2047,6 +2054,8 @@ define i8 @mul_sub_common_factor_commute4(i8 %x, i8 %y) { ret i8 %a } +; negative test - uses + define i8 @mul_sub_common_factor_use(i8 %x, i8 %y) { ; CHECK-LABEL: @mul_sub_common_factor_use( ; CHECK-NEXT: [[M:%.*]] = mul i8 [[X:%.*]], [[Y:%.*]] -- 2.7.4