From 230c8c56f21cfe4e23a24793f3137add1af1d1f0 Mon Sep 17 00:00:00 2001 From: Warren Ristow Date: Thu, 14 Jul 2022 08:21:04 -0700 Subject: [PATCH] [Reassociate] Cleanup minor missed optimizations In analyzing issue #56483, it was noticed that running `opt` with `-reassociate` was missing some minor optimizations. For example, there were cases where the running `opt` on IR with floating-point instructions that have the `fast` flags applied, sometimes resulted in less efficient code than the input IR (things like dead instructions left behind, and missed reassociations). These were sometimes noted in the test-files with TODOs, to investigate further. This commit fixes some of these problems, removing some TODOs in the process. FTR, I refer to these as "minor" missed optimizations, because when running a full clang/llvm compilation, these inefficiencies are not happening, as other passes clean that residue up. Regardless, having cleaner IR produced by `opt`, makes assessing the quality of fixes done in `opt` easier. --- llvm/lib/Transforms/Scalar/Reassociate.cpp | 38 +++++++----- .../Reassociate/fast-ReassociateVector.ll | 6 +- llvm/test/Transforms/Reassociate/fast-basictest.ll | 68 ++++++++++------------ llvm/test/Transforms/Reassociate/pr42349.ll | 1 - 4 files changed, 58 insertions(+), 55 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/Reassociate.cpp b/llvm/lib/Transforms/Scalar/Reassociate.cpp index 75f0896..63351dd 100644 --- a/llvm/lib/Transforms/Scalar/Reassociate.cpp +++ b/llvm/lib/Transforms/Scalar/Reassociate.cpp @@ -449,7 +449,8 @@ using RepeatedValue = std::pair; /// of the expression) if it can turn them into binary operators of the right /// type and thus make the expression bigger. static bool LinearizeExprTree(Instruction *I, - SmallVectorImpl &Ops) { + SmallVectorImpl &Ops, + ReassociatePass::OrderedSet &ToRedo) { assert((isa(I) || isa(I)) && "Expected a UnaryOperator or BinaryOperator!"); LLVM_DEBUG(dbgs() << "LINEARIZE: " << *I << '\n'); @@ -577,18 +578,27 @@ static bool LinearizeExprTree(Instruction *I, assert(Op->hasOneUse() && "Has uses outside the expression tree!"); // If this is a multiply expression, turn any internal negations into - // multiplies by -1 so they can be reassociated. - if (Instruction *Tmp = dyn_cast(Op)) - if ((Opcode == Instruction::Mul && match(Tmp, m_Neg(m_Value()))) || - (Opcode == Instruction::FMul && match(Tmp, m_FNeg(m_Value())))) { - LLVM_DEBUG(dbgs() - << "MORPH LEAF: " << *Op << " (" << Weight << ") TO "); - Tmp = LowerNegateToMultiply(Tmp); - LLVM_DEBUG(dbgs() << *Tmp << '\n'); - Worklist.push_back(std::make_pair(Tmp, Weight)); - Changed = true; - continue; + // multiplies by -1 so they can be reassociated. Add any users of the + // newly created multiplication by -1 to the redo list, so any + // reassociation opportunities that are exposed will be reassociated + // further. + Instruction *Neg; + if (((Opcode == Instruction::Mul && match(Op, m_Neg(m_Value()))) || + (Opcode == Instruction::FMul && match(Op, m_FNeg(m_Value())))) && + match(Op, m_Instruction(Neg))) { + LLVM_DEBUG(dbgs() + << "MORPH LEAF: " << *Op << " (" << Weight << ") TO "); + Instruction *Mul = LowerNegateToMultiply(Neg); + LLVM_DEBUG(dbgs() << *Mul << '\n'); + Worklist.push_back(std::make_pair(Mul, Weight)); + for (User *U : Mul->users()) { + if (BinaryOperator *UserBO = dyn_cast(U)) + ToRedo.insert(UserBO); } + ToRedo.insert(Neg); + Changed = true; + continue; + } // Failed to morph into an expression of the right type. This really is // a leaf. @@ -1141,7 +1151,7 @@ Value *ReassociatePass::RemoveFactorFromExpression(Value *V, Value *Factor) { return nullptr; SmallVector Tree; - MadeChange |= LinearizeExprTree(BO, Tree); + MadeChange |= LinearizeExprTree(BO, Tree, RedoInsts); SmallVector Factors; Factors.reserve(Tree.size()); for (unsigned i = 0, e = Tree.size(); i != e; ++i) { @@ -2320,7 +2330,7 @@ void ReassociatePass::ReassociateExpression(BinaryOperator *I) { // First, walk the expression tree, linearizing the tree, collecting the // operand information. SmallVector Tree; - MadeChange |= LinearizeExprTree(I, Tree); + MadeChange |= LinearizeExprTree(I, Tree, RedoInsts); SmallVector Ops; Ops.reserve(Tree.size()); for (const RepeatedValue &E : Tree) diff --git a/llvm/test/Transforms/Reassociate/fast-ReassociateVector.ll b/llvm/test/Transforms/Reassociate/fast-ReassociateVector.ll index 339884b..9a96afa 100644 --- a/llvm/test/Transforms/Reassociate/fast-ReassociateVector.ll +++ b/llvm/test/Transforms/Reassociate/fast-ReassociateVector.ll @@ -281,11 +281,10 @@ define <2 x double> @test9_reassoc_unary_fneg(<2 x double> %b, <2 x double> %a) define <2 x float> @test10(<2 x float> %a, <2 x float> %b, <2 x float> %z) { ; CHECK-LABEL: @test10( -; CHECK-NEXT: [[TMP1:%.*]] = fsub fast <2 x float> zeroinitializer, zeroinitializer ; CHECK-NEXT: [[C:%.*]] = fmul fast <2 x float> [[A:%.*]], ; CHECK-NEXT: [[E:%.*]] = fmul fast <2 x float> [[C]], [[Z:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = fadd fast <2 x float> [[E]], zeroinitializer -; CHECK-NEXT: ret <2 x float> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = fadd fast <2 x float> [[E]], zeroinitializer +; CHECK-NEXT: ret <2 x float> [[TMP1]] ; %d = fmul fast <2 x float> %z, %c = fsub fast <2 x float> , %d @@ -296,7 +295,6 @@ define <2 x float> @test10(<2 x float> %a, <2 x float> %b, <2 x float> %z) { define <2 x float> @test10_unary_fneg(<2 x float> %a, <2 x float> %b, <2 x float> %z) { ; CHECK-LABEL: @test10_unary_fneg( -; CHECK-NEXT: [[TMP1:%.*]] = fneg fast <2 x float> zeroinitializer ; CHECK-NEXT: [[E:%.*]] = fmul fast <2 x float> [[A:%.*]], ; CHECK-NEXT: [[F:%.*]] = fmul fast <2 x float> [[E]], [[Z:%.*]] ; CHECK-NEXT: ret <2 x float> [[F]] diff --git a/llvm/test/Transforms/Reassociate/fast-basictest.ll b/llvm/test/Transforms/Reassociate/fast-basictest.ll index 038c8c8..c6cb63d 100644 --- a/llvm/test/Transforms/Reassociate/fast-basictest.ll +++ b/llvm/test/Transforms/Reassociate/fast-basictest.ll @@ -181,16 +181,12 @@ define float @test6_reassoc(float %A, float %B, float %C) { } ; (-X)*Y + Z -> Z-X*Y -; TODO: check why IR transformation of test7 with 'fast' math flag -; is worse than without it (and even without transformation) define float @test7(float %X, float %Y, float %Z) { ; CHECK-LABEL: @test7( -; CHECK-NEXT: [[TMP1:%.*]] = fsub fast float 0.000000e+00, 0.000000e+00 -; CHECK-NEXT: [[A:%.*]] = fmul fast float [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[B:%.*]] = fmul fast float [[A]], 1.000000e+00 -; CHECK-NEXT: [[TMP2:%.*]] = fsub fast float [[Z:%.*]], [[B]] -; CHECK-NEXT: ret float [[TMP2]] +; CHECK-NEXT: [[B:%.*]] = fmul fast float [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = fsub fast float [[Z:%.*]], [[B]] +; CHECK-NEXT: ret float [[TMP1]] ; %A = fsub fast float 0.0, %X %B = fmul fast float %A, %Y @@ -200,11 +196,9 @@ define float @test7(float %X, float %Y, float %Z) { define float @test7_unary_fneg(float %X, float %Y, float %Z) { ; CHECK-LABEL: @test7_unary_fneg( -; CHECK-NEXT: [[TMP1:%.*]] = fneg fast float 0.000000e+00 -; CHECK-NEXT: [[A:%.*]] = fmul fast float [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[B:%.*]] = fmul fast float [[A]], 1.000000e+00 -; CHECK-NEXT: [[TMP2:%.*]] = fsub fast float [[Z:%.*]], [[B]] -; CHECK-NEXT: ret float [[TMP2]] +; CHECK-NEXT: [[B:%.*]] = fmul fast float [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = fsub fast float [[Z:%.*]], [[B]] +; CHECK-NEXT: ret float [[TMP1]] ; %A = fneg fast float %X %B = fmul fast float %A, %Y @@ -239,6 +233,22 @@ define float @test7_reassoc(float %X, float %Y, float %Z) { ret float %C } +; Integer version of: +; (-X)*Y + Z -> Z-X*Y +; TODO: check if we can change the mul of -1 and the add to a sub. +define i32 @test7_int(i32 %X, i32 %Y, i32 %Z) { +; CHECK-LABEL: @test7_int( +; CHECK-NEXT: [[A:%.*]] = mul i32 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[B:%.*]] = mul i32 [[A]], -1 +; CHECK-NEXT: [[C:%.*]] = add i32 [[B]], [[Z:%.*]] +; CHECK-NEXT: ret i32 [[C]] +; + %A = sub i32 0, %X + %B = mul i32 %A, %Y + %C = add i32 %B, %Z + ret i32 %C +} + define float @test8(float %X) { ; CHECK-LABEL: @test8( ; CHECK-NEXT: [[FACTOR:%.*]] = fmul fast float [[X:%.*]], 9.400000e+01 @@ -276,7 +286,6 @@ define float @test10(float %W) { define float @test11(float %X) { ; CHECK-LABEL: @test11( -; CHECK-NEXT: [[TMP1:%.*]] = fneg fast float 0.000000e+00 ; CHECK-NEXT: [[FACTOR:%.*]] = fmul fast float [[X:%.*]], -3.000000e+00 ; CHECK-NEXT: [[Z:%.*]] = fadd fast float [[FACTOR]], 6.000000e+00 ; CHECK-NEXT: ret float [[Z]] @@ -289,17 +298,12 @@ define float @test11(float %X) { ret float %Z } -; TODO: check why IR transformation of test12 with 'fast' math flag -; is worse than without it (and even without transformation) - define float @test12(float %X1, float %X2, float %X3) { ; CHECK-LABEL: @test12( -; CHECK-NEXT: [[TMP1:%.*]] = fsub fast float 0.000000e+00, 0.000000e+00 -; CHECK-NEXT: [[A:%.*]] = fmul fast float [[X2:%.*]], [[X1:%.*]] -; CHECK-NEXT: [[B:%.*]] = fmul fast float [[A]], 1.000000e+00 +; CHECK-NEXT: [[B:%.*]] = fmul fast float [[X2:%.*]], [[X1:%.*]] ; CHECK-NEXT: [[C:%.*]] = fmul fast float [[X3:%.*]], [[X1]] -; CHECK-NEXT: [[TMP2:%.*]] = fsub fast float [[C]], [[B]] -; CHECK-NEXT: ret float [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = fsub fast float [[C]], [[B]] +; CHECK-NEXT: ret float [[TMP1]] ; %A = fsub fast float 0.000000e+00, %X1 %B = fmul fast float %A, %X2 ; -X1*X2 @@ -310,12 +314,10 @@ define float @test12(float %X1, float %X2, float %X3) { define float @test12_unary_fneg(float %X1, float %X2, float %X3) { ; CHECK-LABEL: @test12_unary_fneg( -; CHECK-NEXT: [[TMP1:%.*]] = fneg fast float 0.000000e+00 -; CHECK-NEXT: [[A:%.*]] = fmul fast float [[X2:%.*]], [[X1:%.*]] -; CHECK-NEXT: [[B:%.*]] = fmul fast float [[A]], 1.000000e+00 +; CHECK-NEXT: [[B:%.*]] = fmul fast float [[X2:%.*]], [[X1:%.*]] ; CHECK-NEXT: [[C:%.*]] = fmul fast float [[X3:%.*]], [[X1]] -; CHECK-NEXT: [[TMP2:%.*]] = fsub fast float [[C]], [[B]] -; CHECK-NEXT: ret float [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = fsub fast float [[C]], [[B]] +; CHECK-NEXT: ret float [[TMP1]] ; %A = fneg fast float %X1 %B = fmul fast float %A, %X2 ; -X1*X2 @@ -490,12 +492,11 @@ define float @test15_reassoc(float %b, float %a) { define float @test16(float %a, float %b, float %z) { ; CHECK-LABEL: @test16( -; CHECK-NEXT: [[TMP1:%.*]] = fsub fast float 0.000000e+00, 0.000000e+00 ; CHECK-NEXT: [[C:%.*]] = fmul fast float [[A:%.*]], 1.234500e+04 ; CHECK-NEXT: [[E:%.*]] = fmul fast float [[C]], [[B:%.*]] ; CHECK-NEXT: [[F:%.*]] = fmul fast float [[E]], [[Z:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = fadd fast float [[F]], 0.000000e+00 -; CHECK-NEXT: ret float [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = fadd fast float [[F]], 0.000000e+00 +; CHECK-NEXT: ret float [[TMP1]] ; %c = fsub fast float 0.000000e+00, %z %d = fmul fast float %a, %b @@ -507,7 +508,6 @@ define float @test16(float %a, float %b, float %z) { define float @test16_unary_fneg(float %a, float %b, float %z) { ; CHECK-LABEL: @test16_unary_fneg( -; CHECK-NEXT: [[TMP1:%.*]] = fneg fast float 0.000000e+00 ; CHECK-NEXT: [[E:%.*]] = fmul fast float [[A:%.*]], 1.234500e+04 ; CHECK-NEXT: [[F:%.*]] = fmul fast float [[E]], [[B:%.*]] ; CHECK-NEXT: [[G:%.*]] = fmul fast float [[F]], [[Z:%.*]] @@ -539,16 +539,14 @@ define float @test16_reassoc(float %a, float %b, float %z) { } ; TODO: check if we can remove: -; - fsub fast 0, 0 ; - fadd fast x, 0 ; ... as 'fast' implies 'nsz' define float @test17(float %a, float %b, float %z) { ; CHECK-LABEL: @test17( -; CHECK-NEXT: [[TMP1:%.*]] = fsub fast float 0.000000e+00, 0.000000e+00 ; CHECK-NEXT: [[C:%.*]] = fmul fast float [[A:%.*]], 4.000000e+01 ; CHECK-NEXT: [[E:%.*]] = fmul fast float [[C]], [[Z:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = fadd fast float [[E]], 0.000000e+00 -; CHECK-NEXT: ret float [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = fadd fast float [[E]], 0.000000e+00 +; CHECK-NEXT: ret float [[TMP1]] ; %d = fmul fast float %z, 4.000000e+01 %c = fsub fast float 0.000000e+00, %d @@ -557,10 +555,8 @@ define float @test17(float %a, float %b, float %z) { ret float %f } -; TODO: check if we can remove fneg fast 0 as 'fast' implies 'nsz' define float @test17_unary_fneg(float %a, float %b, float %z) { ; CHECK-LABEL: @test17_unary_fneg( -; CHECK-NEXT: [[TMP1:%.*]] = fneg fast float 0.000000e+00 ; CHECK-NEXT: [[E:%.*]] = fmul fast float [[A:%.*]], 4.000000e+01 ; CHECK-NEXT: [[F:%.*]] = fmul fast float [[E]], [[Z:%.*]] ; CHECK-NEXT: ret float [[F]] diff --git a/llvm/test/Transforms/Reassociate/pr42349.ll b/llvm/test/Transforms/Reassociate/pr42349.ll index ce008c3..d888f68 100644 --- a/llvm/test/Transforms/Reassociate/pr42349.ll +++ b/llvm/test/Transforms/Reassociate/pr42349.ll @@ -5,7 +5,6 @@ define float @wibble(float %tmp6) #0 { ; CHECK-LABEL: @wibble( ; CHECK-NEXT: bb: ; CHECK-NEXT: [[TMP7:%.*]] = fmul float [[TMP6:%.*]], -1.000000e+00 -; CHECK-NEXT: [[TMP0:%.*]] = fsub float -0.000000e+00, 0.000000e+00 ; CHECK-NEXT: [[TMP9:%.*]] = fmul fast float [[TMP6]], 0xFFF0000000000000 ; CHECK-NEXT: ret float [[TMP9]] ; -- 2.7.4