From c6507930493bf57d88bcb1f7d83da1b3c08ab02a Mon Sep 17 00:00:00 2001
From: Warren Ristow <warren.ristow@sony.com>
Date: Fri, 15 Jul 2022 11:44:35 -0700
Subject: [PATCH] [Reassociate] Enable FP reassociation via 'reassoc' and 'nsz'

Compiling with '-ffast-math' tuns on all the FastMathFlags (FMF), as
expected, and that enables FP reassociation. Only the two FMF flags
'reassoc' and 'nsz' are technically required to perform reassociation,
but disabling other unrelated FMF bits is needlessly suppressing the
optimization.

This patch fixes that needless suppression, and makes appropriate
adjustments to test-cases, fixing some outstanding TODOs in the process.

Fixes: #56483

Reviewed By: spatel

Differential Revision: https://reviews.llvm.org/D129523
---
 llvm/lib/Transforms/Scalar/Reassociate.cpp         | 20 +++++++++++----
 .../Transforms/PhaseOrdering/fast-basictest.ll     | 29 +++++++++++-----------
 llvm/test/Transforms/Reassociate/fast-basictest.ll | 26 ++++++++-----------
 3 files changed, 40 insertions(+), 35 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/Reassociate.cpp b/llvm/lib/Transforms/Scalar/Reassociate.cpp
index 63351dd..240fb5e 100644
--- a/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -142,12 +142,21 @@ XorOpnd::XorOpnd(Value *V) {
   isOr = true;
 }
 
+/// Return true if I is an instruction with the FastMathFlags that are needed
+/// for general reassociation set.  This is not the same as testing
+/// Instruction::isAssociative() because it includes operations like fsub.
+/// (This routine is only intended to be called for floating-point operations.)
+static bool hasFPAssociativeFlags(Instruction *I) {
+  assert(I && I->getType()->isFPOrFPVectorTy() && "Should only check FP ops");
+  return I->hasAllowReassoc() && I->hasNoSignedZeros();
+}
+
 /// Return true if V is an instruction of the specified opcode and if it
 /// only has one use.
 static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) {
   auto *I = dyn_cast<Instruction>(V);
   if (I && I->hasOneUse() && I->getOpcode() == Opcode)
-    if (!isa<FPMathOperator>(I) || I->isFast())
+    if (!isa<FPMathOperator>(I) || hasFPAssociativeFlags(I))
       return cast<BinaryOperator>(I);
   return nullptr;
 }
@@ -157,7 +166,7 @@ static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode1,
   auto *I = dyn_cast<Instruction>(V);
   if (I && I->hasOneUse() &&
       (I->getOpcode() == Opcode1 || I->getOpcode() == Opcode2))
-    if (!isa<FPMathOperator>(I) || I->isFast())
+    if (!isa<FPMathOperator>(I) || hasFPAssociativeFlags(I))
       return cast<BinaryOperator>(I);
   return nullptr;
 }
@@ -573,7 +582,7 @@ static bool LinearizeExprTree(Instruction *I,
       assert((!isa<Instruction>(Op) ||
               cast<Instruction>(Op)->getOpcode() != Opcode
               || (isa<FPMathOperator>(Op) &&
-                  !cast<Instruction>(Op)->isFast())) &&
+                  !hasFPAssociativeFlags(cast<Instruction>(Op)))) &&
              "Should have been handled above!");
       assert(Op->hasOneUse() && "Has uses outside the expression tree!");
 
@@ -2216,8 +2225,9 @@ void ReassociatePass::OptimizeInst(Instruction *I) {
   if (Instruction *Res = canonicalizeNegFPConstants(I))
     I = Res;
 
-  // Don't optimize floating-point instructions unless they are 'fast'.
-  if (I->getType()->isFPOrFPVectorTy() && !I->isFast())
+  // Don't optimize floating-point instructions unless they have the
+  // appropriate FastMathFlags for reassociation enabled.
+  if (I->getType()->isFPOrFPVectorTy() && !hasFPAssociativeFlags(I))
     return;
 
   // Do not reassociate boolean (i1) expressions.  We want to preserve the
diff --git a/llvm/test/Transforms/PhaseOrdering/fast-basictest.ll b/llvm/test/Transforms/PhaseOrdering/fast-basictest.ll
index f44ee73..a5780dd 100644
--- a/llvm/test/Transforms/PhaseOrdering/fast-basictest.ll
+++ b/llvm/test/Transforms/PhaseOrdering/fast-basictest.ll
@@ -122,14 +122,10 @@ define float @test15_unary_fneg(float %b, float %a) {
   ret float %4
 }
 
-; TODO: check if it is possible to perform the optimization without 'fast'
-; with 'reassoc' and 'nsz' only.
 define float @test15_reassoc_nsz(float %b, float %a) {
 ; CHECK-LABEL: @test15_reassoc_nsz(
-; CHECK-NEXT:    [[TMP1:%.*]] = fadd reassoc nsz float [[A:%.*]], 1.234000e+03
-; CHECK-NEXT:    [[TMP2:%.*]] = fadd reassoc nsz float [[TMP1]], [[B:%.*]]
-; CHECK-NEXT:    [[TMP3:%.*]] = fsub reassoc nsz float [[TMP2]], [[A]]
-; CHECK-NEXT:    ret float [[TMP3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd reassoc nsz float [[B:%.*]], 1.234000e+03
+; CHECK-NEXT:    ret float [[TMP1]]
 ;
   %1 = fadd reassoc nsz float %a, 1234.0
   %2 = fadd reassoc nsz float %b, %1
@@ -197,15 +193,18 @@ define float @test16_unary_fneg(float %a, float %b, float %z) {
   ret float %g
 }
 
-; TODO: check if it is possible to perform the optimization without 'fast'
-; with 'reassoc' and 'nsz' only.
 define float @test16_reassoc_nsz(float %a, float %b, float %z) {
-; CHECK-LABEL: @test16_reassoc_nsz(
-; CHECK-NEXT:    [[C:%.*]] = fneg reassoc nsz float [[Z:%.*]]
-; CHECK-NEXT:    [[D:%.*]] = fmul reassoc nsz float [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[E:%.*]] = fmul reassoc nsz float [[D]], [[C]]
-; CHECK-NEXT:    [[G:%.*]] = fmul reassoc nsz float [[E]], -1.234500e+04
-; CHECK-NEXT:    ret float [[G]]
+; REASSOC_AND_IC-LABEL: @test16_reassoc_nsz(
+; REASSOC_AND_IC-NEXT:    [[C:%.*]] = fmul reassoc nsz float [[A:%.*]], 1.234500e+04
+; REASSOC_AND_IC-NEXT:    [[E:%.*]] = fmul reassoc nsz float [[C]], [[B:%.*]]
+; REASSOC_AND_IC-NEXT:    [[F:%.*]] = fmul reassoc nsz float [[E]], [[Z:%.*]]
+; REASSOC_AND_IC-NEXT:    ret float [[F]]
+;
+; O2-LABEL: @test16_reassoc_nsz(
+; O2-NEXT:    [[D:%.*]] = fmul reassoc nsz float [[A:%.*]], 1.234500e+04
+; O2-NEXT:    [[E:%.*]] = fmul reassoc nsz float [[D]], [[B:%.*]]
+; O2-NEXT:    [[G:%.*]] = fmul reassoc nsz float [[E]], [[Z:%.*]]
+; O2-NEXT:    ret float [[G]]
 ;
   %c = fsub reassoc nsz float 0.000000e+00, %z
   %d = fmul reassoc nsz float %a, %b
@@ -282,7 +281,7 @@ define float @test19(float %a, float %b, float %c) nounwind  {
 
 define float @test19_reassoc_nsz(float %a, float %b, float %c) nounwind  {
 ; CHECK-LABEL: @test19_reassoc_nsz(
-; CHECK-NEXT:    [[TMP1:%.*]] = fadd reassoc nsz float [[B:%.*]], [[C:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd reassoc nsz float [[C:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[T7:%.*]] = fneg reassoc nsz float [[TMP1]]
 ; CHECK-NEXT:    ret float [[T7]]
 ;
diff --git a/llvm/test/Transforms/Reassociate/fast-basictest.ll b/llvm/test/Transforms/Reassociate/fast-basictest.ll
index c6cb63d..189613c 100644
--- a/llvm/test/Transforms/Reassociate/fast-basictest.ll
+++ b/llvm/test/Transforms/Reassociate/fast-basictest.ll
@@ -181,7 +181,6 @@ define float @test6_reassoc(float %A, float %B, float %C) {
 }
 
 ; (-X)*Y + Z -> Z-X*Y
-
 define float @test7(float %X, float %Y, float %Z) {
 ; CHECK-LABEL: @test7(
 ; CHECK-NEXT:    [[B:%.*]] = fmul fast float [[Y:%.*]], [[X:%.*]]
@@ -208,10 +207,9 @@ define float @test7_unary_fneg(float %X, float %Y, float %Z) {
 
 define float @test7_reassoc_nsz(float %X, float %Y, float %Z) {
 ; CHECK-LABEL: @test7_reassoc_nsz(
-; CHECK-NEXT:    [[A:%.*]] = fsub reassoc nsz float 0.000000e+00, [[X:%.*]]
-; CHECK-NEXT:    [[B:%.*]] = fmul reassoc nsz float [[A]], [[Y:%.*]]
-; CHECK-NEXT:    [[C:%.*]] = fadd reassoc nsz float [[B]], [[Z:%.*]]
-; CHECK-NEXT:    ret float [[C]]
+; CHECK-NEXT:    [[B:%.*]] = fmul reassoc nsz float [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub reassoc nsz float [[Z:%.*]], [[B]]
+; CHECK-NEXT:    ret float [[TMP1]]
 ;
   %A = fsub reassoc nsz float 0.0, %X
   %B = fmul reassoc nsz float %A, %Y
@@ -328,11 +326,10 @@ define float @test12_unary_fneg(float %X1, float %X2, float %X3) {
 
 define float @test12_reassoc_nsz(float %X1, float %X2, float %X3) {
 ; CHECK-LABEL: @test12_reassoc_nsz(
-; CHECK-NEXT:    [[A:%.*]] = fsub reassoc nsz float 0.000000e+00, [[X1:%.*]]
-; CHECK-NEXT:    [[B:%.*]] = fmul reassoc nsz float [[A]], [[X2:%.*]]
-; CHECK-NEXT:    [[C:%.*]] = fmul reassoc nsz float [[X1]], [[X3:%.*]]
-; CHECK-NEXT:    [[D:%.*]] = fadd reassoc nsz float [[B]], [[C]]
-; CHECK-NEXT:    ret float [[D]]
+; CHECK-NEXT:    [[B:%.*]] = fmul reassoc nsz float [[X2:%.*]], [[X1:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = fmul reassoc nsz float [[X3:%.*]], [[X1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub reassoc nsz float [[C]], [[B]]
+; CHECK-NEXT:    ret float [[TMP1]]
 ;
   %A = fsub reassoc nsz float 0.000000e+00, %X1
   %B = fmul reassoc nsz float %A, %X2   ; -X1*X2
@@ -456,13 +453,12 @@ define float @test15_unary_fneg(float %b, float %a) {
   ret float %4
 }
 
+; TODO: check if we can remove dead fsub.
 define float @test15_reassoc_nsz(float %b, float %a) {
 ; CHECK-LABEL: @test15_reassoc_nsz(
-; CHECK-NEXT:    [[TMP1:%.*]] = fadd reassoc nsz float [[A:%.*]], 1.234000e+03
-; CHECK-NEXT:    [[TMP2:%.*]] = fadd reassoc nsz float [[B:%.*]], [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = fsub reassoc nsz float 0.000000e+00, [[A]]
-; CHECK-NEXT:    [[TMP4:%.*]] = fadd reassoc nsz float [[TMP3]], [[TMP2]]
-; CHECK-NEXT:    ret float [[TMP4]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub reassoc nsz float 0.000000e+00, [[A:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fadd reassoc nsz float [[B:%.*]], 1.234000e+03
+; CHECK-NEXT:    ret float [[TMP2]]
 ;
   %1 = fadd reassoc nsz float %a, 1234.0
   %2 = fadd reassoc nsz float %b, %1
-- 
2.7.4