From b84ab1f7c9b87cb0a0bea26dc4299c4fd19df548 Mon Sep 17 00:00:00 2001 From: "chenglin.bi" Date: Wed, 11 Jan 2023 01:24:34 +0800 Subject: [PATCH] Revert "[LSR] Hoist IVInc to loop header if its all uses are in the loop header" The original commit seems to cause a regression in numba test. This reverts commit b1b4758e7f4b2ffe1faa28b00eb037832e5d26a7. --- llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp | 37 +--------- .../LoopStrengthReduce/AArch64/pr53625.ll | 79 ++-------------------- 2 files changed, 8 insertions(+), 108 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 6285298..7325694 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -5643,36 +5643,6 @@ void LSRInstance::Rewrite(const LSRUse &LU, const LSRFixup &LF, DeadInsts.emplace_back(OperandIsInstr); } -// Trying to hoist the IVInc to loop header if all IVInc users are in -// the loop header. It will help backend to generate post index load/store -// when the latch block is different from loop header block. -static bool canHoistIVInc(const TargetTransformInfo &TTI, const LSRFixup &Fixup, - const LSRUse &LU, Instruction *IVIncInsertPos, - Loop *L) { - if (LU.Kind != LSRUse::Address) - return false; - - // For now this code do the conservative optimization, only work for - // the header block. Later we can hoist the IVInc to the block post - // dominate all users. - BasicBlock *LHeader = L->getHeader(); - if (IVIncInsertPos->getParent() == LHeader) - return false; - - if (!Fixup.OperandValToReplace || - any_of(Fixup.OperandValToReplace->users(), [&LHeader](User *U) { - Instruction *UI = cast(U); - return UI->getParent() != LHeader; - })) - return false; - - Instruction *I = Fixup.UserInst; - return (isa(I) && - TTI.isIndexedLoadLegal(TTI.MIM_PostInc, I->getType())) || - (isa(I) && - TTI.isIndexedStoreLegal(TTI.MIM_PostInc, I->getType())); -} - /// Rewrite all the fixup locations with new values, following the chosen /// solution. void LSRInstance::ImplementSolution( @@ -5681,6 +5651,8 @@ void LSRInstance::ImplementSolution( // we can remove them after we are done working. SmallVector DeadInsts; + Rewriter.setIVIncInsertPos(L, IVIncInsertPos); + // Mark phi nodes that terminate chains so the expander tries to reuse them. for (const IVChain &Chain : IVChainVec) { if (PHINode *PN = dyn_cast(Chain.tailUserInst())) @@ -5690,11 +5662,6 @@ void LSRInstance::ImplementSolution( // Expand the new value definitions and update the users. for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) for (const LSRFixup &Fixup : Uses[LUIdx].Fixups) { - Instruction *InsertPos = - canHoistIVInc(TTI, Fixup, Uses[LUIdx], IVIncInsertPos, L) - ? L->getHeader()->getTerminator() - : IVIncInsertPos; - Rewriter.setIVIncInsertPos(L, InsertPos); Rewrite(Uses[LUIdx], Fixup, *Solution[LUIdx], DeadInsts); Changed = true; } diff --git a/llvm/test/Transforms/LoopStrengthReduce/AArch64/pr53625.ll b/llvm/test/Transforms/LoopStrengthReduce/AArch64/pr53625.ll index 7f7e009..515de76 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/AArch64/pr53625.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/AArch64/pr53625.ll @@ -13,10 +13,11 @@ define i32 @test(i32 %c, ptr %a, ptr %b) { ; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: .LBB0_2: // %for.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldr w9, [x1], #4 +; CHECK-NEXT: ldr w9, [x1] ; CHECK-NEXT: cbnz w9, .LBB0_5 ; CHECK-NEXT: // %bb.3: // %for.cond ; CHECK-NEXT: // in Loop: Header=BB0_2 Depth=1 +; CHECK-NEXT: add x1, x1, #4 ; CHECK-NEXT: subs x8, x8, #1 ; CHECK-NEXT: b.ne .LBB0_2 ; CHECK-NEXT: .LBB0_4: @@ -34,13 +35,13 @@ for.body.preheader: ; preds = %entry br label %for.body for.cond: ; preds = %for.body - %iv.next = add nuw nsw i64 %iv, 1 - %exitcond.not = icmp eq i64 %iv.next, %wide.trip.count + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count br i1 %exitcond.not, label %return, label %for.body for.body: ; preds = %for.body.preheader, %for.cond - %iv = phi i64 [ 0, %for.body.preheader ], [ %iv.next, %for.cond ] - %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv + %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.cond ] + %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv %val = load i32, ptr %arrayidx, align 4 %tobool3.not = icmp eq i32 %val, 0 br i1 %tobool3.not, label %for.cond, label %return @@ -49,71 +50,3 @@ return: ; preds = %for.cond, %for.body %retval.1 = phi i32 [ 0, %entry ], [ 0, %for.cond ], [ 1, %for.body ] ret i32 %retval.1 } - -; negative case: %arrayidx.b is not in header - -define i64 @IVIncHoist_not_all_user_in_header(i32 %c, ptr %a, ptr %b) { -; CHECK-LABEL: IVIncHoist_not_all_user_in_header: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: cmp w0, #1 -; CHECK-NEXT: b.lt .LBB1_5 -; CHECK-NEXT: // %bb.1: // %for.body.preheader -; CHECK-NEXT: mov x8, xzr -; CHECK-NEXT: mov w9, w0 -; CHECK-NEXT: add x10, x1, #4 -; CHECK-NEXT: add x11, x2, #8 -; CHECK-NEXT: mov w0, #1 -; CHECK-NEXT: .LBB1_2: // %for.body -; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldr w12, [x10, x8, lsl #2] -; CHECK-NEXT: cbnz w12, .LBB1_7 -; CHECK-NEXT: // %bb.3: // %if.then -; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1 -; CHECK-NEXT: ldr w12, [x11, x8, lsl #2] -; CHECK-NEXT: cbnz w12, .LBB1_6 -; CHECK-NEXT: // %bb.4: // %for.cond -; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1 -; CHECK-NEXT: add x8, x8, #1 -; CHECK-NEXT: cmp x9, x8 -; CHECK-NEXT: b.ne .LBB1_2 -; CHECK-NEXT: .LBB1_5: -; CHECK-NEXT: mov x0, xzr -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB1_6: // %if.then.return.loopexit_crit_edge -; CHECK-NEXT: add x0, x8, #3 -; CHECK-NEXT: .LBB1_7: // %return -; CHECK-NEXT: ret -entry: - %cmp13 = icmp sgt i32 %c, 0 - br i1 %cmp13, label %for.body.preheader, label %return - -for.body.preheader: ; preds = %entry - %wide.trip.count = zext i32 %c to i64 - br label %for.body - -for.cond: ; preds = %for.body - %iv.next = add nuw nsw i64 %iv, 1 - %exitcond.not = icmp eq i64 %iv.next, %wide.trip.count - br i1 %exitcond.not, label %return, label %for.body - -for.body: ; preds = %for.body.preheader, %for.cond - %iv = phi i64 [ 0, %for.body.preheader ], [ %iv.next, %for.cond ] - %iv.a = phi i64 [ 1, %for.body.preheader ], [ %iv.next.a, %for.cond ] - %iv.b = phi i64 [ 2, %for.body.preheader ], [ %iv.next.b, %for.cond ] - %arrayidx.a = getelementptr inbounds i32, ptr %a, i64 %iv.a - %iv.next.a = add nuw nsw i64 %iv.a, 1 - %val.a = load i32, ptr %arrayidx.a, align 4 - %tobool3.not = icmp eq i32 %val.a, 0 - br i1 %tobool3.not, label %if.then, label %return - -if.then: - %arrayidx.b = getelementptr inbounds i32, ptr %b, i64 %iv.b - %iv.next.b = add nuw nsw i64 %iv.b, 1 - %val.b = load i32, ptr %arrayidx.b, align 4 - %tobool4.not = icmp eq i32 %val.b, 0 - br i1 %tobool4.not, label %for.cond, label %return - -return: ; preds = %for.cond, %for.body, %entry - %retval.1 = phi i64 [ 0, %entry ], [ 0, %for.cond ], [ 1, %for.body ], [ %iv.next.b, %if.then ] - ret i64 %retval.1 -} -- 2.7.4