From 68c197f07eeae71b9b772c9e0c3b846c7025b332 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 17 Jan 2023 14:37:18 -0500 Subject: [PATCH] [InstCombine] factor difference-of-squares to reduce multiplication (X * X) - (Y * Y) --> (X + Y) * (X - Y) https://alive2.llvm.org/ce/z/BAuRCf The no-wrap propagation could be relaxed in some cases, but there does not seem to be an obvious rule for that. --- .../Transforms/InstCombine/InstCombineAddSub.cpp | 16 ++++++++ llvm/test/Transforms/InstCombine/sub.ll | 44 ++++++++++++++-------- 2 files changed, 45 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 21bb916..c458878 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -2372,6 +2372,22 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) { I, Builder.CreateIntrinsic(Intrinsic::ctpop, {I.getType()}, {Builder.CreateNot(X)})); + // Reduce multiplies for difference-of-squares by factoring: + // (X * X) - (Y * Y) --> (X + Y) * (X - Y) + if (match(Op0, m_OneUse(m_Mul(m_Value(X), m_Deferred(X)))) && + match(Op1, m_OneUse(m_Mul(m_Value(Y), m_Deferred(Y))))) { + auto *OBO0 = cast(Op0); + auto *OBO1 = cast(Op1); + bool PropagateNSW = I.hasNoSignedWrap() && OBO0->hasNoSignedWrap() && + OBO1->hasNoSignedWrap(); + bool PropagateNUW = I.hasNoUnsignedWrap() && OBO0->hasNoUnsignedWrap() && + OBO1->hasNoUnsignedWrap(); + Value *Add = Builder.CreateAdd(X, Y, "add", PropagateNUW, PropagateNSW); + Value *Sub = Builder.CreateSub(X, Y, "sub", PropagateNUW, PropagateNSW); + Value *Mul = Builder.CreateMul(Add, Sub, "", PropagateNUW, PropagateNSW); + return replaceInstUsesWith(I, Mul); + } + return TryToNarrowDeduceFlags(); } diff --git a/llvm/test/Transforms/InstCombine/sub.ll b/llvm/test/Transforms/InstCombine/sub.ll index 4a6a133..120036a 100644 --- a/llvm/test/Transforms/InstCombine/sub.ll +++ b/llvm/test/Transforms/InstCombine/sub.ll @@ -2390,11 +2390,13 @@ define <2 x i8> @sub_to_and_vector4(<2 x i8> %x) { ret <2 x i8> %r } +; (X * X) - (Y * Y) --> (X + Y) * (X - Y) + define i8 @diff_of_squares(i8 %x, i8 %y) { ; CHECK-LABEL: @diff_of_squares( -; CHECK-NEXT: [[X2:%.*]] = mul i8 [[X:%.*]], [[X]] -; CHECK-NEXT: [[Y2:%.*]] = mul i8 [[Y:%.*]], [[Y]] -; CHECK-NEXT: [[R:%.*]] = sub i8 [[X2]], [[Y2]] +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[SUB:%.*]] = sub i8 [[X]], [[Y]] +; CHECK-NEXT: [[R:%.*]] = mul i8 [[ADD]], [[SUB]] ; CHECK-NEXT: ret i8 [[R]] ; %x2 = mul i8 %x, %x @@ -2403,11 +2405,13 @@ define i8 @diff_of_squares(i8 %x, i8 %y) { ret i8 %r } +; All-or-nothing for propagation of no-wrap flags (possibly conservative) + define i5 @diff_of_squares_nuw(i5 %x, i5 %y) { ; CHECK-LABEL: @diff_of_squares_nuw( -; CHECK-NEXT: [[X2:%.*]] = mul nuw i5 [[X:%.*]], [[X]] -; CHECK-NEXT: [[Y2:%.*]] = mul nuw i5 [[Y:%.*]], [[Y]] -; CHECK-NEXT: [[R:%.*]] = sub nuw i5 [[X2]], [[Y2]] +; CHECK-NEXT: [[ADD:%.*]] = add nuw i5 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[SUB:%.*]] = sub nuw i5 [[X]], [[Y]] +; CHECK-NEXT: [[R:%.*]] = mul nuw i5 [[ADD]], [[SUB]] ; CHECK-NEXT: ret i5 [[R]] ; %x2 = mul nuw i5 %x, %x @@ -2416,11 +2420,13 @@ define i5 @diff_of_squares_nuw(i5 %x, i5 %y) { ret i5 %r } +; All-or-nothing for propagation of no-wrap flags (possibly conservative) + define i5 @diff_of_squares_partial_nuw(i5 %x, i5 %y) { ; CHECK-LABEL: @diff_of_squares_partial_nuw( -; CHECK-NEXT: [[X2:%.*]] = mul nuw i5 [[X:%.*]], [[X]] -; CHECK-NEXT: [[Y2:%.*]] = mul nuw i5 [[Y:%.*]], [[Y]] -; CHECK-NEXT: [[R:%.*]] = sub i5 [[X2]], [[Y2]] +; CHECK-NEXT: [[ADD:%.*]] = add i5 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[SUB:%.*]] = sub i5 [[X]], [[Y]] +; CHECK-NEXT: [[R:%.*]] = mul i5 [[ADD]], [[SUB]] ; CHECK-NEXT: ret i5 [[R]] ; %x2 = mul nuw i5 %x, %x @@ -2429,11 +2435,13 @@ define i5 @diff_of_squares_partial_nuw(i5 %x, i5 %y) { ret i5 %r } +; All-or-nothing for propagation of no-wrap flags (possibly conservative) + define <2 x i5> @diff_of_squares_nsw(<2 x i5> %x, <2 x i5> %y) { ; CHECK-LABEL: @diff_of_squares_nsw( -; CHECK-NEXT: [[X2:%.*]] = mul nsw <2 x i5> [[X:%.*]], [[X]] -; CHECK-NEXT: [[Y2:%.*]] = mul nsw <2 x i5> [[Y:%.*]], [[Y]] -; CHECK-NEXT: [[R:%.*]] = sub nsw <2 x i5> [[X2]], [[Y2]] +; CHECK-NEXT: [[ADD:%.*]] = add nsw <2 x i5> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[SUB:%.*]] = sub nsw <2 x i5> [[X]], [[Y]] +; CHECK-NEXT: [[R:%.*]] = mul nsw <2 x i5> [[ADD]], [[SUB]] ; CHECK-NEXT: ret <2 x i5> [[R]] ; %x2 = mul nsw <2 x i5> %x, %x @@ -2442,11 +2450,13 @@ define <2 x i5> @diff_of_squares_nsw(<2 x i5> %x, <2 x i5> %y) { ret <2 x i5> %r } +; All-or-nothing for propagation of no-wrap flags (possibly conservative) + define <2 x i5> @diff_of_squares_partial_nsw(<2 x i5> %x, <2 x i5> %y) { ; CHECK-LABEL: @diff_of_squares_partial_nsw( -; CHECK-NEXT: [[X2:%.*]] = mul nsw <2 x i5> [[X:%.*]], [[X]] -; CHECK-NEXT: [[Y2:%.*]] = mul <2 x i5> [[Y:%.*]], [[Y]] -; CHECK-NEXT: [[R:%.*]] = sub nsw <2 x i5> [[X2]], [[Y2]] +; CHECK-NEXT: [[ADD:%.*]] = add <2 x i5> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[SUB:%.*]] = sub <2 x i5> [[X]], [[Y]] +; CHECK-NEXT: [[R:%.*]] = mul <2 x i5> [[ADD]], [[SUB]] ; CHECK-NEXT: ret <2 x i5> [[R]] ; %x2 = mul nsw <2 x i5> %x, %x @@ -2455,6 +2465,8 @@ define <2 x i5> @diff_of_squares_partial_nsw(<2 x i5> %x, <2 x i5> %y) { ret <2 x i5> %r } +; negative test + define i8 @diff_of_squares_use1(i8 %x, i8 %y) { ; CHECK-LABEL: @diff_of_squares_use1( ; CHECK-NEXT: [[X2:%.*]] = mul i8 [[X:%.*]], [[X]] @@ -2470,6 +2482,8 @@ define i8 @diff_of_squares_use1(i8 %x, i8 %y) { ret i8 %r } +; negative test + define i8 @diff_of_squares_use2(i8 %x, i8 %y) { ; CHECK-LABEL: @diff_of_squares_use2( ; CHECK-NEXT: [[X2:%.*]] = mul i8 [[X:%.*]], [[X]] -- 2.7.4