void deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE,
LoopInfo *LI, MemorySSA *MSSA = nullptr);
+/// Remove the backedge of the specified loop. Handles loop nests and general
+/// loop structures subject to the precondition that the loop has no parent
+/// loop and has a single latch block. Preserves all listed analyses.
+void breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
+ LoopInfo &LI, MemorySSA *MSSA);
+
/// Try to promote memory values to scalars by sinking stores out of
/// the loop and moving loads to before the loop. We do this by looping over
/// the stores in the loop, looking for stores to Must pointers which are
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
+
using namespace llvm;
#define DEBUG_TYPE "loop-delete"
Deleted,
};
+static LoopDeletionResult merge(LoopDeletionResult A, LoopDeletionResult B) {
+ if (A == LoopDeletionResult::Deleted || B == LoopDeletionResult::Deleted)
+ return LoopDeletionResult::Deleted;
+ if (A == LoopDeletionResult::Modified || B == LoopDeletionResult::Modified)
+ return LoopDeletionResult::Modified;
+ return LoopDeletionResult::Unmodified;
+}
+
/// Determines if a loop is dead.
///
/// This assumes that we've already checked for unique exit and exiting blocks,
return true;
}
+/// If we can prove the backedge is untaken, remove it. This destroys the
+/// loop, but leaves the (now trivially loop invariant) control flow and
+/// side effects (if any) in place.
+static LoopDeletionResult
+breakBackedgeIfNotTaken(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
+ LoopInfo &LI, MemorySSA *MSSA,
+ OptimizationRemarkEmitter &ORE) {
+ assert(L->isLCSSAForm(DT) && "Expected LCSSA!");
+
+ if (!L->getLoopLatch())
+ return LoopDeletionResult::Unmodified;
+
+ auto *BTC = SE.getBackedgeTakenCount(L);
+ if (!BTC->isZero())
+ return LoopDeletionResult::Unmodified;
+
+ // For non-outermost loops, the tricky case is that we can drop blocks
+ // out of both inner and outer loops at the same time. This results in
+ // new exiting block for the outer loop appearing, and possibly needing
+ // an lcssa phi inserted. (See loop_nest_lcssa test case in zero-btc.ll)
+ // TODO: We can handle a bunch of cases here without much work, revisit.
+ if (!L->isOutermost())
+ return LoopDeletionResult::Unmodified;
+
+ breakLoopBackedge(L, DT, SE, LI, MSSA);
+ return LoopDeletionResult::Deleted;
+}
+
/// Remove a loop if it is dead.
///
/// A loop is considered dead either if it does not impact the observable
// but ORE cannot be preserved (see comment before the pass definition).
OptimizationRemarkEmitter ORE(L.getHeader()->getParent());
auto Result = deleteLoopIfDead(&L, AR.DT, AR.SE, AR.LI, AR.MSSA, ORE);
+
+ // If we can prove the backedge isn't taken, just break it and be done. This
+ // leaves the loop structure in place which means it can handle dispatching
+ // to the right exit based on whatever loop invariant structure remains.
+ if (Result != LoopDeletionResult::Deleted)
+ Result = merge(Result, breakBackedgeIfNotTaken(&L, AR.DT, AR.SE, AR.LI,
+ AR.MSSA, ORE));
+
if (Result == LoopDeletionResult::Unmodified)
return PreservedAnalyses::all();
LoopDeletionResult Result = deleteLoopIfDead(L, DT, SE, LI, MSSA, ORE);
+ // If we can prove the backedge isn't taken, just break it and be done. This
+ // leaves the loop structure in place which means it can handle dispatching
+ // to the right exit based on whatever loop invariant structure remains.
+ if (Result != LoopDeletionResult::Deleted)
+ Result = merge(Result, breakBackedgeIfNotTaken(L, DT, SE, LI, MSSA, ORE));
+
if (Result == LoopDeletionResult::Deleted)
LPM.markLoopAsDeleted(*L);
}
}
+void llvm::breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
+ LoopInfo &LI, MemorySSA *MSSA) {
+
+ assert(L->isOutermost() && "Can't yet preserve LCSSA for this case");
+ auto *Latch = L->getLoopLatch();
+ assert(Latch && "multiple latches not yet supported");
+ auto *Header = L->getHeader();
+
+ SE.forgetLoop(L);
+
+ // Note: By splitting the backedge, and then explicitly making it unreachable
+ // we gracefully handle corner cases such as non-bottom tested loops and the
+ // like. We also have the benefit of being able to reuse existing well tested
+ // code. It might be worth special casing the common bottom tested case at
+ // some point to avoid code churn.
+
+ std::unique_ptr<MemorySSAUpdater> MSSAU;
+ if (MSSA)
+ MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
+
+ auto *BackedgeBB = SplitEdge(Latch, Header, &DT, &LI, MSSAU.get());
+
+ DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager);
+ (void)changeToUnreachable(BackedgeBB->getTerminator(), /*UseTrap*/false,
+ /*PreserveLCSSA*/true, &DTU, MSSAU.get());
+
+ // Erase (and destroy) this loop instance. Handles relinking sub-loops
+ // and blocks within the loop as needed.
+ LI.erase(L);
+}
+
+
/// Checks if \p L has single exit through latch block except possibly
/// "deoptimizing" exits. Returns branch instruction terminating the loop
/// latch if above check is successful, nullptr otherwise.
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[UNKNOWN_NEXT:%.*]] = load volatile i32, i32* [[UNKNOWN_MEM:%.*]]
-; CHECK-NEXT: br i1 false, label [[LOOP]], label [[LEAVE:%.*]]
+; CHECK-NEXT: [[UNKNOWN_NEXT:%.*]] = load volatile i32, i32* [[UNKNOWN_MEM:%.*]], align 4
+; CHECK-NEXT: br i1 false, label [[LOOP_LOOP_CRIT_EDGE:%.*]], label [[LEAVE:%.*]]
+; CHECK: loop.loop_crit_edge:
+; CHECK-NEXT: unreachable
; CHECK: leave:
; CHECK-NEXT: ret i32 [[UNKNOWN_INIT:%.*]]
;
%conv10 = zext i1 %cmp9 to i32
%and = and i32 %conv10, %g.138
%inc = add i32 %h.039, 1
- br i1 undef, label %for.inc11, label %for.body6
+ %exit = icmp eq i32 %inc, 20000
+ br i1 %exit, label %for.inc11, label %for.body6
for.inc11: ; preds = %for.body6
%and.lcssa = phi i32 [ %and, %for.body6 ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: store i32 0, i32* @G, align 4
-; CHECK-NEXT: br i1 false, label [[LOOP]], label [[EXIT:%.*]]
+; CHECK-NEXT: br i1 false, label [[LOOP_LOOP_CRIT_EDGE:%.*]], label [[EXIT:%.*]]
+; CHECK: loop.loop_crit_edge:
+; CHECK-NEXT: unreachable
; CHECK: exit:
; CHECK-NEXT: ret void
;
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_INC:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: store i32 0, i32* @G, align 4
-; CHECK-NEXT: [[IV_INC]] = add i32 [[IV]], 1
+; CHECK-NEXT: [[IV_INC:%.*]] = add i32 [[IV]], 1
; CHECK-NEXT: [[BE_TAKEN:%.*]] = icmp ne i32 [[IV_INC]], 1
-; CHECK-NEXT: br i1 [[BE_TAKEN]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK-NEXT: br i1 [[BE_TAKEN]], label [[LOOP_LOOP_CRIT_EDGE:%.*]], label [[EXIT:%.*]]
+; CHECK: loop.loop_crit_edge:
+; CHECK-NEXT: unreachable
; CHECK: exit:
; CHECK-NEXT: ret void
;
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_INC:%.*]], [[LATCH:%.*]] ]
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: store i32 0, i32* @G, align 4
-; CHECK-NEXT: [[IV_INC]] = add i32 [[IV]], 1
+; CHECK-NEXT: [[IV_INC:%.*]] = add i32 [[IV]], 1
; CHECK-NEXT: [[BE_TAKEN:%.*]] = icmp ne i32 [[IV_INC]], 1
-; CHECK-NEXT: br i1 [[BE_TAKEN]], label [[LATCH]], label [[EXIT:%.*]]
+; CHECK-NEXT: br i1 [[BE_TAKEN]], label [[LATCH:%.*]], label [[EXIT:%.*]]
; CHECK: latch:
-; CHECK-NEXT: br label [[LOOP]]
+; CHECK-NEXT: br label [[LATCH_SPLIT:%.*]]
+; CHECK: latch.split:
+; CHECK-NEXT: unreachable
; CHECK: exit:
; CHECK-NEXT: ret void
;
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_INC:%.*]], [[LATCH:%.*]] ]
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: store i32 0, i32* @G, align 4
-; CHECK-NEXT: [[IV_INC]] = add i32 [[IV]], 1
+; CHECK-NEXT: [[IV_INC:%.*]] = add i32 [[IV]], 1
; CHECK-NEXT: [[BE_TAKEN:%.*]] = icmp ne i32 [[IV_INC]], 1
-; CHECK-NEXT: br i1 [[BE_TAKEN]], label [[LATCH]], label [[EXIT:%.*]]
+; CHECK-NEXT: br i1 [[BE_TAKEN]], label [[LATCH:%.*]], label [[EXIT:%.*]]
; CHECK: latch:
; CHECK-NEXT: store i32 1, i32* @G, align 4
; CHECK-NEXT: [[COND2:%.*]] = icmp ult i32 [[IV_INC]], 30
-; CHECK-NEXT: br i1 [[COND2]], label [[LOOP]], label [[EXIT]]
+; CHECK-NEXT: br i1 [[COND2]], label [[LATCH_LOOP_CRIT_EDGE:%.*]], label [[EXIT]]
+; CHECK: latch.loop_crit_edge:
+; CHECK-NEXT: unreachable
; CHECK: exit:
; CHECK-NEXT: ret void
;
; CHECK-NEXT: br i1 true, label [[LATCH:%.*]], label [[EXIT:%.*]]
; CHECK: latch:
; CHECK-NEXT: store i32 1, i32* @G, align 4
-; CHECK-NEXT: br i1 false, label [[LOOP]], label [[EXIT]]
+; CHECK-NEXT: br i1 false, label [[LATCH_LOOP_CRIT_EDGE:%.*]], label [[EXIT]]
+; CHECK: latch.loop_crit_edge:
+; CHECK-NEXT: unreachable
; CHECK: exit:
; CHECK-NEXT: ret void
;
; CHECK-NEXT: br i1 true, label [[LATCH:%.*]], label [[EXIT1:%.*]]
; CHECK: latch:
; CHECK-NEXT: store i32 1, i32* @G, align 4
-; CHECK-NEXT: br i1 false, label [[LOOP]], label [[EXIT2:%.*]]
+; CHECK-NEXT: br i1 false, label [[LATCH_LOOP_CRIT_EDGE:%.*]], label [[EXIT2:%.*]]
+; CHECK: latch.loop_crit_edge:
+; CHECK-NEXT: unreachable
; CHECK: exit1:
; CHECK-NEXT: ret void
; CHECK: exit2:
; CHECK-NEXT: [[CND:%.*]] = icmp ult i32 [[IV_INC]], 200
; CHECK-NEXT: br i1 [[CND]], label [[INNER]], label [[LATCH:%.*]]
; CHECK: latch:
-; CHECK-NEXT: br i1 false, label [[LOOP]], label [[EXIT:%.*]]
+; CHECK-NEXT: br i1 false, label [[LATCH_LOOP_CRIT_EDGE:%.*]], label [[EXIT:%.*]]
+; CHECK: latch.loop_crit_edge:
+; CHECK-NEXT: unreachable
; CHECK: exit:
; CHECK-NEXT: ret void
;
exit:
ret void
}
+
+; Key point is that inner_latch drops out of the outer loop when
+; the inner loop is deleted, and thus the lcssa phi needs to be
+; in the inner_latch block to preserve LCSSA. We either have to
+; insert the LCSSA phi, or not break the inner backedge.
+define void @loop_nest_lcssa() {
+; CHECK-LABEL: @loop_nest_lcssa(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = add i32 1, 2
+; CHECK-NEXT: br label [[OUTER_HEADER:%.*]]
+; CHECK: outer_header:
+; CHECK-NEXT: br label [[INNER_HEADER:%.*]]
+; CHECK: inner_header:
+; CHECK-NEXT: br i1 false, label [[INNER_LATCH:%.*]], label [[OUTER_LATCH:%.*]]
+; CHECK: inner_latch:
+; CHECK-NEXT: br i1 false, label [[INNER_HEADER]], label [[LOOPEXIT:%.*]]
+; CHECK: outer_latch:
+; CHECK-NEXT: br label [[OUTER_HEADER]]
+; CHECK: loopexit:
+; CHECK-NEXT: [[DOTLCSSA32:%.*]] = phi i32 [ [[TMP0]], [[INNER_LATCH]] ]
+; CHECK-NEXT: unreachable
+;
+entry:
+ br label %outer_header
+
+outer_header:
+ %0 = add i32 1, 2
+ br label %inner_header
+
+inner_header:
+ br i1 false, label %inner_latch, label %outer_latch
+
+inner_latch:
+ br i1 false, label %inner_header, label %loopexit
+
+outer_latch:
+ br label %outer_header
+
+loopexit:
+ %.lcssa32 = phi i32 [ %0, %inner_latch ]
+ unreachable
+}