From 230c8c56f21cfe4e23a24793f3137add1af1d1f0 Mon Sep 17 00:00:00 2001
From: Warren Ristow <warren.ristow@sony.com>
Date: Thu, 14 Jul 2022 08:21:04 -0700
Subject: [PATCH] [Reassociate] Cleanup minor missed optimizations

In analyzing issue #56483, it was noticed that running `opt` with
`-reassociate` was missing some minor optimizations. For example,
there were cases where the running `opt` on IR with floating-point
instructions that have the `fast` flags applied, sometimes resulted in
less efficient code than the input IR (things like dead instructions
left behind, and missed reassociations). These were sometimes noted
in the test-files with TODOs, to investigate further. This commit
fixes some of these problems, removing some TODOs in the process.

FTR, I refer to these as "minor" missed optimizations, because when
running a full clang/llvm compilation, these inefficiencies are not
happening, as other passes clean that residue up. Regardless, having
cleaner IR produced by `opt`, makes assessing the quality of fixes done
in `opt` easier.
---
 llvm/lib/Transforms/Scalar/Reassociate.cpp         | 38 +++++++-----
 .../Reassociate/fast-ReassociateVector.ll          |  6 +-
 llvm/test/Transforms/Reassociate/fast-basictest.ll | 68 ++++++++++------------
 llvm/test/Transforms/Reassociate/pr42349.ll        |  1 -
 4 files changed, 58 insertions(+), 55 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/Reassociate.cpp b/llvm/lib/Transforms/Scalar/Reassociate.cpp
index 75f0896..63351dd 100644
--- a/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -449,7 +449,8 @@ using RepeatedValue = std::pair<Value*, APInt>;
 /// of the expression) if it can turn them into binary operators of the right
 /// type and thus make the expression bigger.
 static bool LinearizeExprTree(Instruction *I,
-                              SmallVectorImpl<RepeatedValue> &Ops) {
+                              SmallVectorImpl<RepeatedValue> &Ops,
+                              ReassociatePass::OrderedSet &ToRedo) {
   assert((isa<UnaryOperator>(I) || isa<BinaryOperator>(I)) &&
          "Expected a UnaryOperator or BinaryOperator!");
   LLVM_DEBUG(dbgs() << "LINEARIZE: " << *I << '\n');
@@ -577,18 +578,27 @@ static bool LinearizeExprTree(Instruction *I,
       assert(Op->hasOneUse() && "Has uses outside the expression tree!");
 
       // If this is a multiply expression, turn any internal negations into
-      // multiplies by -1 so they can be reassociated.
-      if (Instruction *Tmp = dyn_cast<Instruction>(Op))
-        if ((Opcode == Instruction::Mul && match(Tmp, m_Neg(m_Value()))) ||
-            (Opcode == Instruction::FMul && match(Tmp, m_FNeg(m_Value())))) {
-          LLVM_DEBUG(dbgs()
-                     << "MORPH LEAF: " << *Op << " (" << Weight << ") TO ");
-          Tmp = LowerNegateToMultiply(Tmp);
-          LLVM_DEBUG(dbgs() << *Tmp << '\n');
-          Worklist.push_back(std::make_pair(Tmp, Weight));
-          Changed = true;
-          continue;
+      // multiplies by -1 so they can be reassociated.  Add any users of the
+      // newly created multiplication by -1 to the redo list, so any
+      // reassociation opportunities that are exposed will be reassociated
+      // further.
+      Instruction *Neg;
+      if (((Opcode == Instruction::Mul && match(Op, m_Neg(m_Value()))) ||
+           (Opcode == Instruction::FMul && match(Op, m_FNeg(m_Value())))) &&
+           match(Op, m_Instruction(Neg))) {
+        LLVM_DEBUG(dbgs()
+                   << "MORPH LEAF: " << *Op << " (" << Weight << ") TO ");
+        Instruction *Mul = LowerNegateToMultiply(Neg);
+        LLVM_DEBUG(dbgs() << *Mul << '\n');
+        Worklist.push_back(std::make_pair(Mul, Weight));
+        for (User *U : Mul->users()) {
+          if (BinaryOperator *UserBO = dyn_cast<BinaryOperator>(U))
+            ToRedo.insert(UserBO);
         }
+        ToRedo.insert(Neg);
+        Changed = true;
+        continue;
+      }
 
       // Failed to morph into an expression of the right type.  This really is
       // a leaf.
@@ -1141,7 +1151,7 @@ Value *ReassociatePass::RemoveFactorFromExpression(Value *V, Value *Factor) {
     return nullptr;
 
   SmallVector<RepeatedValue, 8> Tree;
-  MadeChange |= LinearizeExprTree(BO, Tree);
+  MadeChange |= LinearizeExprTree(BO, Tree, RedoInsts);
   SmallVector<ValueEntry, 8> Factors;
   Factors.reserve(Tree.size());
   for (unsigned i = 0, e = Tree.size(); i != e; ++i) {
@@ -2320,7 +2330,7 @@ void ReassociatePass::ReassociateExpression(BinaryOperator *I) {
   // First, walk the expression tree, linearizing the tree, collecting the
   // operand information.
   SmallVector<RepeatedValue, 8> Tree;
-  MadeChange |= LinearizeExprTree(I, Tree);
+  MadeChange |= LinearizeExprTree(I, Tree, RedoInsts);
   SmallVector<ValueEntry, 8> Ops;
   Ops.reserve(Tree.size());
   for (const RepeatedValue &E : Tree)
diff --git a/llvm/test/Transforms/Reassociate/fast-ReassociateVector.ll b/llvm/test/Transforms/Reassociate/fast-ReassociateVector.ll
index 339884b..9a96afa 100644
--- a/llvm/test/Transforms/Reassociate/fast-ReassociateVector.ll
+++ b/llvm/test/Transforms/Reassociate/fast-ReassociateVector.ll
@@ -281,11 +281,10 @@ define <2 x double> @test9_reassoc_unary_fneg(<2 x double> %b, <2 x double> %a)
 
 define <2 x float> @test10(<2 x float> %a, <2 x float> %b, <2 x float> %z) {
 ; CHECK-LABEL: @test10(
-; CHECK-NEXT:    [[TMP1:%.*]] = fsub fast <2 x float> zeroinitializer, zeroinitializer
 ; CHECK-NEXT:    [[C:%.*]] = fmul fast <2 x float> [[A:%.*]], <float 4.000000e+01, float 4.000000e+01>
 ; CHECK-NEXT:    [[E:%.*]] = fmul fast <2 x float> [[C]], [[Z:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = fadd fast <2 x float> [[E]], zeroinitializer
-; CHECK-NEXT:    ret <2 x float> [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd fast <2 x float> [[E]], zeroinitializer
+; CHECK-NEXT:    ret <2 x float> [[TMP1]]
 ;
   %d = fmul fast <2 x float> %z, <float 4.000000e+01, float 4.000000e+01>
   %c = fsub fast <2 x float> <float 0.000000e+00, float 0.000000e+00>, %d
@@ -296,7 +295,6 @@ define <2 x float> @test10(<2 x float> %a, <2 x float> %b, <2 x float> %z) {
 
 define <2 x float> @test10_unary_fneg(<2 x float> %a, <2 x float> %b, <2 x float> %z) {
 ; CHECK-LABEL: @test10_unary_fneg(
-; CHECK-NEXT:    [[TMP1:%.*]] = fneg fast <2 x float> zeroinitializer
 ; CHECK-NEXT:    [[E:%.*]] = fmul fast <2 x float> [[A:%.*]], <float 4.000000e+01, float 4.000000e+01>
 ; CHECK-NEXT:    [[F:%.*]] = fmul fast <2 x float> [[E]], [[Z:%.*]]
 ; CHECK-NEXT:    ret <2 x float> [[F]]
diff --git a/llvm/test/Transforms/Reassociate/fast-basictest.ll b/llvm/test/Transforms/Reassociate/fast-basictest.ll
index 038c8c8..c6cb63d 100644
--- a/llvm/test/Transforms/Reassociate/fast-basictest.ll
+++ b/llvm/test/Transforms/Reassociate/fast-basictest.ll
@@ -181,16 +181,12 @@ define float @test6_reassoc(float %A, float %B, float %C) {
 }
 
 ; (-X)*Y + Z -> Z-X*Y
-; TODO: check why IR transformation of test7 with 'fast' math flag
-; is worse than without it (and even without transformation)
 
 define float @test7(float %X, float %Y, float %Z) {
 ; CHECK-LABEL: @test7(
-; CHECK-NEXT:    [[TMP1:%.*]] = fsub fast float 0.000000e+00, 0.000000e+00
-; CHECK-NEXT:    [[A:%.*]] = fmul fast float [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT:    [[B:%.*]] = fmul fast float [[A]], 1.000000e+00
-; CHECK-NEXT:    [[TMP2:%.*]] = fsub fast float [[Z:%.*]], [[B]]
-; CHECK-NEXT:    ret float [[TMP2]]
+; CHECK-NEXT:    [[B:%.*]] = fmul fast float [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub fast float [[Z:%.*]], [[B]]
+; CHECK-NEXT:    ret float [[TMP1]]
 ;
   %A = fsub fast float 0.0, %X
   %B = fmul fast float %A, %Y
@@ -200,11 +196,9 @@ define float @test7(float %X, float %Y, float %Z) {
 
 define float @test7_unary_fneg(float %X, float %Y, float %Z) {
 ; CHECK-LABEL: @test7_unary_fneg(
-; CHECK-NEXT:    [[TMP1:%.*]] = fneg fast float 0.000000e+00
-; CHECK-NEXT:    [[A:%.*]] = fmul fast float [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT:    [[B:%.*]] = fmul fast float [[A]], 1.000000e+00
-; CHECK-NEXT:    [[TMP2:%.*]] = fsub fast float [[Z:%.*]], [[B]]
-; CHECK-NEXT:    ret float [[TMP2]]
+; CHECK-NEXT:    [[B:%.*]] = fmul fast float [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub fast float [[Z:%.*]], [[B]]
+; CHECK-NEXT:    ret float [[TMP1]]
 ;
   %A = fneg fast float %X
   %B = fmul fast float %A, %Y
@@ -239,6 +233,22 @@ define float @test7_reassoc(float %X, float %Y, float %Z) {
   ret float %C
 }
 
+; Integer version of:
+;   (-X)*Y + Z -> Z-X*Y
+; TODO: check if we can change the mul of -1 and the add to a sub.
+define i32 @test7_int(i32 %X, i32 %Y, i32 %Z) {
+; CHECK-LABEL: @test7_int(
+; CHECK-NEXT:    [[A:%.*]] = mul i32 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = mul i32 [[A]], -1
+; CHECK-NEXT:    [[C:%.*]] = add i32 [[B]], [[Z:%.*]]
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %A = sub i32 0, %X
+  %B = mul i32 %A, %Y
+  %C = add i32 %B, %Z
+  ret i32 %C
+}
+
 define float @test8(float %X) {
 ; CHECK-LABEL: @test8(
 ; CHECK-NEXT:    [[FACTOR:%.*]] = fmul fast float [[X:%.*]], 9.400000e+01
@@ -276,7 +286,6 @@ define float @test10(float %W) {
 
 define float @test11(float %X) {
 ; CHECK-LABEL: @test11(
-; CHECK-NEXT:    [[TMP1:%.*]] = fneg fast float 0.000000e+00
 ; CHECK-NEXT:    [[FACTOR:%.*]] = fmul fast float [[X:%.*]], -3.000000e+00
 ; CHECK-NEXT:    [[Z:%.*]] = fadd fast float [[FACTOR]], 6.000000e+00
 ; CHECK-NEXT:    ret float [[Z]]
@@ -289,17 +298,12 @@ define float @test11(float %X) {
   ret float %Z
 }
 
-; TODO: check why IR transformation of test12 with 'fast' math flag
-; is worse than without it (and even without transformation)
-
 define float @test12(float %X1, float %X2, float %X3) {
 ; CHECK-LABEL: @test12(
-; CHECK-NEXT:    [[TMP1:%.*]] = fsub fast float 0.000000e+00, 0.000000e+00
-; CHECK-NEXT:    [[A:%.*]] = fmul fast float [[X2:%.*]], [[X1:%.*]]
-; CHECK-NEXT:    [[B:%.*]] = fmul fast float [[A]], 1.000000e+00
+; CHECK-NEXT:    [[B:%.*]] = fmul fast float [[X2:%.*]], [[X1:%.*]]
 ; CHECK-NEXT:    [[C:%.*]] = fmul fast float [[X3:%.*]], [[X1]]
-; CHECK-NEXT:    [[TMP2:%.*]] = fsub fast float [[C]], [[B]]
-; CHECK-NEXT:    ret float [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub fast float [[C]], [[B]]
+; CHECK-NEXT:    ret float [[TMP1]]
 ;
   %A = fsub fast float 0.000000e+00, %X1
   %B = fmul fast float %A, %X2   ; -X1*X2
@@ -310,12 +314,10 @@ define float @test12(float %X1, float %X2, float %X3) {
 
 define float @test12_unary_fneg(float %X1, float %X2, float %X3) {
 ; CHECK-LABEL: @test12_unary_fneg(
-; CHECK-NEXT:    [[TMP1:%.*]] = fneg fast float 0.000000e+00
-; CHECK-NEXT:    [[A:%.*]] = fmul fast float [[X2:%.*]], [[X1:%.*]]
-; CHECK-NEXT:    [[B:%.*]] = fmul fast float [[A]], 1.000000e+00
+; CHECK-NEXT:    [[B:%.*]] = fmul fast float [[X2:%.*]], [[X1:%.*]]
 ; CHECK-NEXT:    [[C:%.*]] = fmul fast float [[X3:%.*]], [[X1]]
-; CHECK-NEXT:    [[TMP2:%.*]] = fsub fast float [[C]], [[B]]
-; CHECK-NEXT:    ret float [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub fast float [[C]], [[B]]
+; CHECK-NEXT:    ret float [[TMP1]]
 ;
   %A = fneg fast float %X1
   %B = fmul fast float %A, %X2   ; -X1*X2
@@ -490,12 +492,11 @@ define float @test15_reassoc(float %b, float %a) {
 
 define float @test16(float %a, float %b, float %z) {
 ; CHECK-LABEL: @test16(
-; CHECK-NEXT:    [[TMP1:%.*]] = fsub fast float 0.000000e+00, 0.000000e+00
 ; CHECK-NEXT:    [[C:%.*]] = fmul fast float [[A:%.*]], 1.234500e+04
 ; CHECK-NEXT:    [[E:%.*]] = fmul fast float [[C]], [[B:%.*]]
 ; CHECK-NEXT:    [[F:%.*]] = fmul fast float [[E]], [[Z:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = fadd fast float [[F]], 0.000000e+00
-; CHECK-NEXT:    ret float [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd fast float [[F]], 0.000000e+00
+; CHECK-NEXT:    ret float [[TMP1]]
 ;
   %c = fsub fast float 0.000000e+00, %z
   %d = fmul fast float %a, %b
@@ -507,7 +508,6 @@ define float @test16(float %a, float %b, float %z) {
 
 define float @test16_unary_fneg(float %a, float %b, float %z) {
 ; CHECK-LABEL: @test16_unary_fneg(
-; CHECK-NEXT:    [[TMP1:%.*]] = fneg fast float 0.000000e+00
 ; CHECK-NEXT:    [[E:%.*]] = fmul fast float [[A:%.*]], 1.234500e+04
 ; CHECK-NEXT:    [[F:%.*]] = fmul fast float [[E]], [[B:%.*]]
 ; CHECK-NEXT:    [[G:%.*]] = fmul fast float [[F]], [[Z:%.*]]
@@ -539,16 +539,14 @@ define float @test16_reassoc(float %a, float %b, float %z) {
 }
 
 ; TODO: check if we can remove:
-; - fsub fast 0, 0
 ; - fadd fast x, 0
 ; ... as 'fast' implies 'nsz'
 define float @test17(float %a, float %b, float %z) {
 ; CHECK-LABEL: @test17(
-; CHECK-NEXT:    [[TMP1:%.*]] = fsub fast float 0.000000e+00, 0.000000e+00
 ; CHECK-NEXT:    [[C:%.*]] = fmul fast float [[A:%.*]], 4.000000e+01
 ; CHECK-NEXT:    [[E:%.*]] = fmul fast float [[C]], [[Z:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = fadd fast float [[E]], 0.000000e+00
-; CHECK-NEXT:    ret float [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd fast float [[E]], 0.000000e+00
+; CHECK-NEXT:    ret float [[TMP1]]
 ;
   %d = fmul fast float %z, 4.000000e+01
   %c = fsub fast float 0.000000e+00, %d
@@ -557,10 +555,8 @@ define float @test17(float %a, float %b, float %z) {
   ret float %f
 }
 
-; TODO: check if we can remove fneg fast 0 as 'fast' implies 'nsz'
 define float @test17_unary_fneg(float %a, float %b, float %z) {
 ; CHECK-LABEL: @test17_unary_fneg(
-; CHECK-NEXT:    [[TMP1:%.*]] = fneg fast float 0.000000e+00
 ; CHECK-NEXT:    [[E:%.*]] = fmul fast float [[A:%.*]], 4.000000e+01
 ; CHECK-NEXT:    [[F:%.*]] = fmul fast float [[E]], [[Z:%.*]]
 ; CHECK-NEXT:    ret float [[F]]
diff --git a/llvm/test/Transforms/Reassociate/pr42349.ll b/llvm/test/Transforms/Reassociate/pr42349.ll
index ce008c3..d888f68 100644
--- a/llvm/test/Transforms/Reassociate/pr42349.ll
+++ b/llvm/test/Transforms/Reassociate/pr42349.ll
@@ -5,7 +5,6 @@ define  float @wibble(float %tmp6) #0 {
 ; CHECK-LABEL: @wibble(
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    [[TMP7:%.*]] = fmul float [[TMP6:%.*]], -1.000000e+00
-; CHECK-NEXT:    [[TMP0:%.*]] = fsub float -0.000000e+00, 0.000000e+00
 ; CHECK-NEXT:    [[TMP9:%.*]] = fmul fast float [[TMP6]], 0xFFF0000000000000
 ; CHECK-NEXT:    ret float [[TMP9]]
 ;
-- 
2.7.4