STATISTIC(NotRotated, "Candidate is not rotated");
STATISTIC(OnlySecondCandidateIsGuarded,
"The second candidate is guarded while the first one is not");
+STATISTIC(NumHoistedInsts, "Number of hoisted preheader instructions.");
+STATISTIC(NumSunkInsts, "Number of hoisted preheader instructions.");
enum FusionDependenceAnalysisChoice {
FUSION_DEPENDENCE_ANALYSIS_SCEV,
OptimizationRemarkEmitter &ORE;
- FusionCandidate(Loop *L, DominatorTree &DT,
- const PostDominatorTree *PDT, OptimizationRemarkEmitter &ORE,
- TTI::PeelingPreferences PP)
+ FusionCandidate(Loop *L, DominatorTree &DT, const PostDominatorTree *PDT,
+ OptimizationRemarkEmitter &ORE, TTI::PeelingPreferences PP)
: Preheader(L->getLoopPreheader()), Header(L->getHeader()),
ExitingBlock(L->getExitingBlock()), ExitBlock(L->getExitBlock()),
Latch(L->getLoopLatch()), L(L), Valid(true),
PostDominatorTree &PDT;
OptimizationRemarkEmitter &ORE;
AssumptionCache ∾
-
const TargetTransformInfo &TTI;
public:
}
// Cannot modify the predecessors inside the above loop as it will cause
// the iterators to be nullptrs, causing memory errors.
- for (Instruction *CurrentBranch: WorkList) {
+ for (Instruction *CurrentBranch : WorkList) {
BasicBlock *Succ = CurrentBranch->getSuccessor(0);
if (Succ == BB)
Succ = CurrentBranch->getSuccessor(1);
continue;
}
- if (!isSafeToMoveBefore(*FC1->Preheader,
- *FC0->Preheader->getTerminator(), DT, &PDT,
- &DI)) {
- LLVM_DEBUG(dbgs() << "Fusion candidate contains unsafe "
- "instructions in preheader. Not fusing.\n");
- reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
- NonEmptyPreheader);
- continue;
- }
-
if (FC0->GuardBranch) {
assert(FC1->GuardBranch && "Expecting valid FC1 guard branch");
continue;
}
+ // If the second loop has instructions in the pre-header, attempt to
+ // hoist them up to the first loop's pre-header or sink them into the
+ // body of the second loop.
+ SmallVector<Instruction *, 4> SafeToHoist;
+ SmallVector<Instruction *, 4> SafeToSink;
+ // At this point, this is the last remaining legality check.
+ // Which means if we can make this pre-header empty, we can fuse
+ // these loops
+ if (!isEmptyPreheader(*FC1)) {
+ LLVM_DEBUG(dbgs() << "Fusion candidate does not have empty "
+ "preheader.\n");
+
+ // If it is not safe to hoist/sink all instructions in the
+ // pre-header, we cannot fuse these loops.
+ if (!collectMovablePreheaderInsts(*FC0, *FC1, SafeToHoist,
+ SafeToSink)) {
+ LLVM_DEBUG(dbgs() << "Could not hoist/sink all instructions in "
+ "Fusion Candidate Pre-header.\n"
+ << "Not Fusing.\n");
+ reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
+ NonEmptyPreheader);
+ continue;
+ }
+ }
+
bool BeneficialToFuse = isBeneficialFusion(*FC0, *FC1);
LLVM_DEBUG(dbgs()
<< "\tFusion appears to be "
// and profitable. At this point, start transforming the code and
// perform fusion.
+ // Execute the hoist/sink operations on preheader instructions
+ movePreheaderInsts(*FC0, *FC1, SafeToHoist, SafeToSink);
+
LLVM_DEBUG(dbgs() << "\tFusion is performed: " << *FC0 << " and "
<< *FC1 << "\n");
return Fused;
}
+ /// Collect instructions in the \p FC1 Preheader that can be hoisted
+ /// to the \p FC0 Preheader or sunk into the \p FC1 Body
+ bool collectMovablePreheaderInsts(
+ const FusionCandidate &FC0, const FusionCandidate &FC1,
+ SmallVector<Instruction *, 4> &SafeToHoist,
+ SmallVector<Instruction *, 4> &SafeToSink) const {
+ BasicBlock *FC1Preheader = FC1.Preheader;
+ for (Instruction &I : *FC1Preheader) {
+ // Can't move a branch
+ if (&I == FC1Preheader->getTerminator())
+ continue;
+ // If the instruction has side-effects, give up.
+ // TODO: The case of mayReadFromMemory we can handle but requires
+ // additional work with a dependence analysis so for now we give
+ // up on memory reads.
+ if (I.mayHaveSideEffects() || I.mayReadFromMemory()) {
+ LLVM_DEBUG(dbgs() << "Inst: " << I << " may have side-effects.\n");
+ return false;
+ }
+
+ LLVM_DEBUG(dbgs() << "Checking Inst: " << I << "\n");
+
+ // First check if can be hoisted
+ // If the operands of this instruction dominate the FC0 Preheader
+ // target block, then it is safe to move them to the end of the FC0
+ const BasicBlock *FC0PreheaderTarget =
+ FC0.Preheader->getSingleSuccessor();
+ assert(FC0PreheaderTarget &&
+ "Expected single successor for loop preheader.");
+ bool CanHoistInst = true;
+ for (Use &Op : I.operands()) {
+ if (auto *OpInst = dyn_cast<Instruction>(Op)) {
+ bool OpHoisted = is_contained(SafeToHoist, OpInst);
+ // Check if we have already decided to hoist this operand. In this
+ // case, it does not dominate FC0 *yet*, but will after we hoist it.
+ if (!(OpHoisted || DT.dominates(OpInst, FC0PreheaderTarget))) {
+ CanHoistInst = false;
+ break;
+ }
+ }
+ }
+ if (CanHoistInst) {
+ SafeToHoist.push_back(&I);
+ LLVM_DEBUG(dbgs() << "\tSafe to hoist.\n");
+ } else {
+ LLVM_DEBUG(dbgs() << "\tCould not hoist. Trying to sink...\n");
+
+ for (User *U : I.users()) {
+ if (auto *UI{dyn_cast<Instruction>(U)}) {
+ // Cannot sink if user in loop
+ // If FC1 has phi users of this value, we cannot sink it into FC1.
+ if (FC1.L->contains(UI)) {
+ // Cannot hoist or sink this instruction. No hoisting/sinking
+ // should take place, loops should not fuse
+ LLVM_DEBUG(dbgs() << "\tCould not sink.\n");
+ return false;
+ }
+ }
+ }
+ SafeToSink.push_back(&I);
+ LLVM_DEBUG(dbgs() << "\tSafe to sink.\n");
+ }
+ }
+ LLVM_DEBUG(
+ dbgs() << "All preheader instructions could be sunk or hoisted!\n");
+ return true;
+ }
+
/// Rewrite all additive recurrences in a SCEV to use a new loop.
class AddRecLoopReplacer : public SCEVRewriteVisitor<AddRecLoopReplacer> {
public:
return FC0.ExitBlock == FC1.getEntryBlock();
}
+ bool isEmptyPreheader(const FusionCandidate &FC) const {
+ return FC.Preheader->size() == 1;
+ }
+
+ /// Hoist \p FC1 Preheader instructions to \p FC0 Preheader
+ /// and sink others into the body of \p FC1.
+ void movePreheaderInsts(const FusionCandidate &FC0,
+ const FusionCandidate &FC1,
+ SmallVector<Instruction *, 4> &HoistInsts,
+ SmallVector<Instruction *, 4> &SinkInsts) const {
+
+ // All preheader instructions except the branch must be hoisted or sunk
+ assert(HoistInsts.size() + SinkInsts.size() == FC1.Preheader->size() - 1 &&
+ "Attempting to sink and hoist preheader instructions, but not all "
+ "the preheader instructions are accounted for.");
+
+ NumHoistedInsts += HoistInsts.size();
+ NumSunkInsts += SinkInsts.size();
+
+ LLVM_DEBUG(if (VerboseFusionDebugging) {
+ if (!HoistInsts.empty())
+ dbgs() << "Hoisting: \n";
+ for (Instruction *I : HoistInsts)
+ dbgs() << *I << "\n";
+ if (!SinkInsts.empty())
+ dbgs() << "Sinking: \n";
+ for (Instruction *I : SinkInsts)
+ dbgs() << *I << "\n";
+ });
+
+ for (Instruction *I : HoistInsts) {
+ assert(I->getParent() == FC1.Preheader);
+ I->moveBefore(FC0.Preheader->getTerminator());
+ }
+ // insert instructions in reverse order to maintain dominance relationship
+ for (Instruction *I : reverse(SinkInsts)) {
+ assert(I->getParent() == FC1.Preheader);
+ I->moveBefore(&*FC1.ExitBlock->getFirstInsertionPt());
+ }
+ }
+
/// Determine if two fusion candidates have identical guards
///
/// This method will determine if two fusion candidates have the same guards.
bool runOnFunction(Function &F) override {
if (skipFunction(F))
return false;
+
auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto &DI = getAnalysis<DependenceAnalysisWrapperPass>().getDI();
--- /dev/null
+; RUN: opt -S -loop-fusion < %s | FileCheck %s
+
+define void @hoist_preheader(i32 %N) {
+
+; CHECK:pre1:
+; CHECK-NEXT: %hoistme = add i32 1, %N
+; CHECK-NEXT: %hoistme2 = add i32 1, %hoistme
+; CHECK-NEXT: br label %body1
+pre1:
+ br label %body1
+
+; CHECK: body1:
+; CHECK-NOT: %hoistme
+body1: ; preds = %pre1, %body1
+ %i = phi i32 [%i_next, %body1], [0, %pre1]
+ %i_next = add i32 1, %i
+ %cond = icmp ne i32 %i, %N
+ br i1 %cond, label %body1, label %pre2
+
+pre2:
+ %hoistme = add i32 1, %N
+ %hoistme2 = add i32 1, %hoistme
+ br label %body2
+
+body2: ; preds = %pre2, %body2
+ %i2 = phi i32 [%i_next2, %body2], [0, %pre2]
+ %i_next2 = add i32 1, %i2
+ %cond2 = icmp ne i32 %i2, %N
+ br i1 %cond2, label %body2, label %exit
+
+exit:
+ ret void
+}
--- /dev/null
+; RUN: opt -S -loop-simplify -loop-fusion -debug-only=loop-fusion < %s 2>&1 | FileCheck %s
+; REQUIRES: asserts
+; CHECK: Could not hoist/sink all instructions
+
+define void @sink_preheader(i32 %N) {
+; CHECK:pre1:
+; CHECK-NEXT: br label %body1
+pre1:
+ br label %body1
+
+; CHECK:body1:
+; CHECK-NOT: %stay =
+body1: ; preds = %pre1, %body1
+ %i = phi i32 [%i_next, %body1], [0, %pre1]
+ %i_next = add i32 1, %i
+ %cond = icmp ne i32 %i, %N
+ %barrier = add i32 1, %N
+ br i1 %cond, label %body1, label %pre2
+
+; CHECK:pre2:
+; CHECK: %stay =
+pre2:
+ %stay = add i32 1, %barrier
+ br label %body2
+
+; CHECK: body2:
+; CHECK-NOT: %stay =
+body2: ; preds = %pre2, %body2
+ %i2 = phi i32 [%i_next2, %body2], [0, %pre2]
+ %i_next2 = add i32 1, %i2
+ %cond2 = icmp ne i32 %i2, %N
+ %barrier2 = add i32 1, %stay
+ br i1 %cond2, label %body2, label %exit
+
+; CHECK: exit:
+; CHECK-NOT: %stay =
+exit:
+ ret void
+}
--- /dev/null
+; RUN: opt -S -loop-fusion -debug-only=loop-fusion < %s 2>&1 | FileCheck %s
+; REQUIRES: asserts
+
+; CHECK: Could not hoist/sink all instructions
+
+define void @sink_preheader(i32 %N) {
+; CHECK:pre1:
+; CHECK-NEXT: br label %body1
+pre1:
+ br label %body1
+
+; CHECK:body1:
+; CHECK-NOT: %no_hoist =
+; CHECK-NOT: %no_hoist_sink =
+; CHECK-NOT: %no_sink =
+body1: ; preds = %pre1, %body1
+ %i = phi i32 [%i_next, %body1], [0, %pre1]
+ %i_next = add i32 1, %i
+ %cond = icmp ne i32 %i, %N
+ %barrier = add i32 1, %N
+ br i1 %cond, label %body1, label %pre2
+
+; CHECK:pre2:
+; CHECK: %no_hoist =
+; CHECK: %no_hoist_sink =
+; CHECK: %no_sink =
+pre2:
+ %no_hoist = add i32 1, %barrier
+ %no_hoist_sink = add i32 1, %no_hoist
+ %no_sink = add i32 1, %no_hoist_sink
+ br label %body2
+
+; CHECK: body2:
+; CHECK-NOT: %no_hoist =
+; CHECK-NOT: %no_hoist_sink =
+; CHECK-NOT: %no_sink =
+body2: ; preds = %pre2, %body2
+ %i2 = phi i32 [%i_next2, %body2], [0, %pre2]
+ %i_next2 = add i32 1, %i2
+ %cond2 = icmp ne i32 %i2, %N
+ %barrier2 = add i32 1, %no_sink
+ br i1 %cond2, label %body2, label %exit
+
+; CHECK: exit:
+; CHECK-NOT: %stay =
+exit:
+ ret void
+}
--- /dev/null
+; RUN: opt -S -loop-simplify -loop-fusion -debug-only=loop-fusion < %s 2>&1 | FileCheck %s
+; REQUIRES: asserts
+; CHECK: have side-effects.
+; CHECK: Could not hoist/sink all instructions
+
+@A = common global [100 x i32] zeroinitializer, align 16
+define void @sink_preheader(i32 %N) {
+; CHECK:pre1:
+; CHECK-NEXT: %ptr = alloca i32, align 4
+; CHECK-NEXT: br label %body1
+pre1:
+ %ptr = alloca i32
+ br label %body1
+
+; CHECK:body1:
+; CHECK-NOT: %stay =
+body1: ; preds = %pre1, %body1
+ %i = phi i32 [%i_next, %body1], [0, %pre1]
+ %i_next = add i32 1, %i
+ %cond = icmp ne i32 %i, %N
+ br i1 %cond, label %body1, label %pre2
+
+; CHECK:pre2:
+; CHECK-NEXT: store i32 3, i32* %ptr
+pre2:
+ store i32 3, i32* %ptr
+ br label %body2
+
+; CHECK: body2:
+; CHECK-NOT: %stay =
+body2: ; preds = %pre2, %body2
+ %i2 = phi i32 [%i_next2, %body2], [0, %pre2]
+ %i_next2 = add i32 1, %i2
+ %cond2 = icmp ne i32 %i2, %N
+ br i1 %cond2, label %body2, label %exit
+
+; CHECK: exit:
+; CHECK-NOT: %stay =
+exit:
+ ret void
+}
--- /dev/null
+; RUN: opt -S -loop-simplify -loop-fusion -debug-only=loop-fusion < %s 2>&1 | FileCheck %s
+; REQUIRES: asserts
+; CHECK: may have side-effects
+; CHECK: Could not hoist/sink all instructions
+
+declare void @unknown_func()
+
+define void @sink_preheader(i32 %N) {
+; CHECK:pre1:
+; CHECK-NEXT: br label %body1
+pre1:
+ br label %body1
+
+; CHECK:body1:
+; CHECK-NOT: %stay =
+body1: ; preds = %pre1, %body1
+ %i = phi i32 [%i_next, %body1], [0, %pre1]
+ %i_next = add i32 1, %i
+ %cond = icmp ne i32 %i, %N
+ br i1 %cond, label %body1, label %pre2
+
+; CHECK:pre2:
+; CHECK-NEXT: call void @unknown_func()
+pre2:
+ call void @unknown_func()
+ br label %body2
+
+; CHECK: body2:
+; CHECK-NOT: %stay =
+body2: ; preds = %pre2, %body2
+ %i2 = phi i32 [%i_next2, %body2], [0, %pre2]
+ %i_next2 = add i32 1, %i2
+ %cond2 = icmp ne i32 %i2, %N
+ br i1 %cond2, label %body2, label %exit
+
+; CHECK: exit:
+; CHECK-NOT: %stay =
+exit:
+ ret void
+}
}
; Test that `%add` cannot be moved to basic block entry, as it uses %i, which
-; defined after basic block entry. And the two loops for.first and for.second
-; are not fused.
+; defined after basic block entry. It also cannot be moved to for.second.exit
+; since it is used in for.second. Check also that the two loops for.first and
+; for.second are not fused.
define i64 @unsafe_preheader(i32* %A, i64 %x) {
; CHECK-LABEL: @unsafe_preheader(
; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[X:%.*]], [[I]]
; CHECK-NEXT: br label [[FOR_SECOND:%.*]]
; CHECK: for.second:
-; CHECK-NEXT: [[J:%.*]] = phi i64 [ 0, [[FOR_FIRST_EXIT]] ], [ [[INC_J:%.*]], [[FOR_SECOND]] ]
+; CHECK-NEXT: [[J:%.*]] = phi i64 [ [[ADD]], [[FOR_FIRST_EXIT]] ], [ [[INC_J:%.*]], [[FOR_SECOND]] ]
; CHECK-NEXT: [[AJ:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[J]]
; CHECK-NEXT: store i32 2, i32* [[AJ]], align 4
; CHECK-NEXT: [[INC_J]] = add nsw i64 [[J]], 1
br label %for.second
for.second:
- %j = phi i64 [ 0, %for.first.exit ], [ %inc.j, %for.second ]
+ %j = phi i64 [ %add, %for.first.exit ], [ %inc.j, %for.second ]
%Aj = getelementptr inbounds i32, i32* %A, i64 %j
store i32 2, i32* %Aj, align 4
%inc.j = add nsw i64 %j, 1
--- /dev/null
+; RUN: opt -S -loop-fusion < %s | FileCheck %s
+
+define void @sink_preheader(i32 %N) {
+; CHECK-LABEL: @sink_preheader(
+; CHECK-NEXT: pre1:
+; CHECK-NEXT: br label [[BODY1:%.*]]
+; CHECK: body1:
+; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[BODY1]] ], [ 0, [[PRE1:%.*]] ]
+; CHECK-NEXT: [[I2:%.*]] = phi i32 [ [[I_NEXT2:%.*]], [[BODY1]] ], [ 0, [[PRE1]] ]
+; CHECK-NEXT: [[I_NEXT]] = add i32 1, [[I]]
+; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[I]], [[N:%.*]]
+; CHECK-NEXT: [[BARRIER:%.*]] = add i32 1, [[N]]
+; CHECK-NEXT: [[I_NEXT2]] = add i32 1, [[I2]]
+; CHECK-NEXT: [[COND2:%.*]] = icmp ne i32 [[I2]], [[N]]
+; CHECK-NEXT: br i1 [[COND2]], label [[BODY1]], label [[EXIT:%.*]]
+; CHECK: exit:
+; CHECK-NEXT: [[SINKME:%.*]] = add i32 1, [[BARRIER]]
+; CHECK-NEXT: [[SINKME2:%.*]] = add i32 1, [[BARRIER]]
+; CHECK-NEXT: [[SINKME3:%.*]] = add i32 1, [[SINKME2]]
+; CHECK-NEXT: ret void
+;
+pre1:
+ br label %body1
+
+body1: ; preds = %pre1, %body1
+ %i = phi i32 [%i_next, %body1], [0, %pre1]
+ %i_next = add i32 1, %i
+ %cond = icmp ne i32 %i, %N
+ %barrier = add i32 1, %N
+ br i1 %cond, label %body1, label %pre2
+
+pre2:
+ %sinkme = add i32 1, %barrier
+ %sinkme2 = add i32 1, %barrier
+ %sinkme3 = add i32 1, %sinkme2
+ br label %body2
+
+body2: ; preds = %pre2, %body2
+ %i2 = phi i32 [%i_next2, %body2], [0, %pre2]
+ %i_next2 = add i32 1, %i2
+ %cond2 = icmp ne i32 %i2, %N
+ br i1 %cond2, label %body2, label %exit
+
+exit:
+ ret void
+}