DeadInsts.emplace_back(OperandIsInstr);
}
+// Trying to hoist the IVInc to loop header if all IVInc users are in
+// the loop header. It will help backend to generate post index load/store
+// when the latch block is different from loop header block.
+static bool canHoistIVInc(const TargetTransformInfo &TTI, const LSRFixup &Fixup,
+ const LSRUse &LU, Instruction *IVIncInsertPos,
+ Loop *L) {
+ if (LU.Kind != LSRUse::Address)
+ return false;
+
+ // For now this code do the conservative optimization, only work for
+ // the header block. Later we can hoist the IVInc to the block post
+ // dominate all users.
+ BasicBlock *LHeader = L->getHeader();
+ if (IVIncInsertPos->getParent() == LHeader)
+ return false;
+
+ if (!Fixup.OperandValToReplace ||
+ any_of(Fixup.OperandValToReplace->users(), [&LHeader](User *U) {
+ Instruction *UI = cast<Instruction>(U);
+ return UI->getParent() != LHeader;
+ }))
+ return false;
+
+ Instruction *I = Fixup.UserInst;
+ return (isa<LoadInst>(I) &&
+ TTI.isIndexedLoadLegal(TTI.MIM_PostInc, I->getType())) ||
+ (isa<StoreInst>(I) &&
+ TTI.isIndexedStoreLegal(TTI.MIM_PostInc, I->getType()));
+}
+
/// Rewrite all the fixup locations with new values, following the chosen
/// solution.
void LSRInstance::ImplementSolution(
// we can remove them after we are done working.
SmallVector<WeakTrackingVH, 16> DeadInsts;
- Rewriter.setIVIncInsertPos(L, IVIncInsertPos);
-
// Mark phi nodes that terminate chains so the expander tries to reuse them.
for (const IVChain &Chain : IVChainVec) {
if (PHINode *PN = dyn_cast<PHINode>(Chain.tailUserInst()))
// Expand the new value definitions and update the users.
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx)
for (const LSRFixup &Fixup : Uses[LUIdx].Fixups) {
+ Instruction *InsertPos =
+ canHoistIVInc(TTI, Fixup, Uses[LUIdx], IVIncInsertPos, L)
+ ? L->getHeader()->getTerminator()
+ : IVIncInsertPos;
+ Rewriter.setIVIncInsertPos(L, InsertPos);
Rewrite(Uses[LUIdx], Fixup, *Solution[LUIdx], DeadInsts);
Changed = true;
}
; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: .LBB0_2: // %for.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr w9, [x1]
+; CHECK-NEXT: ldr w9, [x1], #4
; CHECK-NEXT: cbnz w9, .LBB0_5
; CHECK-NEXT: // %bb.3: // %for.cond
; CHECK-NEXT: // in Loop: Header=BB0_2 Depth=1
-; CHECK-NEXT: add x1, x1, #4
; CHECK-NEXT: subs x8, x8, #1
; CHECK-NEXT: b.ne .LBB0_2
; CHECK-NEXT: .LBB0_4:
br label %for.body
for.cond: ; preds = %for.body
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, %wide.trip.count
br i1 %exitcond.not, label %return, label %for.body
for.body: ; preds = %for.body.preheader, %for.cond
- %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.cond ]
- %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+ %iv = phi i64 [ 0, %for.body.preheader ], [ %iv.next, %for.cond ]
+ %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
%val = load i32, ptr %arrayidx, align 4
%tobool3.not = icmp eq i32 %val, 0
br i1 %tobool3.not, label %for.cond, label %return
%retval.1 = phi i32 [ 0, %entry ], [ 0, %for.cond ], [ 1, %for.body ]
ret i32 %retval.1
}
+
+; negative case: %arrayidx.b is not in header
+
+define i64 @IVIncHoist_not_all_user_in_header(i32 %c, ptr %a, ptr %b) {
+; CHECK-LABEL: IVIncHoist_not_all_user_in_header:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: cmp w0, #1
+; CHECK-NEXT: b.lt .LBB1_5
+; CHECK-NEXT: // %bb.1: // %for.body.preheader
+; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov w9, w0
+; CHECK-NEXT: add x10, x1, #4
+; CHECK-NEXT: add x11, x2, #8
+; CHECK-NEXT: mov w0, #1
+; CHECK-NEXT: .LBB1_2: // %for.body
+; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: ldr w12, [x10, x8, lsl #2]
+; CHECK-NEXT: cbnz w12, .LBB1_7
+; CHECK-NEXT: // %bb.3: // %if.then
+; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1
+; CHECK-NEXT: ldr w12, [x11, x8, lsl #2]
+; CHECK-NEXT: cbnz w12, .LBB1_6
+; CHECK-NEXT: // %bb.4: // %for.cond
+; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1
+; CHECK-NEXT: add x8, x8, #1
+; CHECK-NEXT: cmp x9, x8
+; CHECK-NEXT: b.ne .LBB1_2
+; CHECK-NEXT: .LBB1_5:
+; CHECK-NEXT: mov x0, xzr
+; CHECK-NEXT: ret
+; CHECK-NEXT: .LBB1_6: // %if.then.return.loopexit_crit_edge
+; CHECK-NEXT: add x0, x8, #3
+; CHECK-NEXT: .LBB1_7: // %return
+; CHECK-NEXT: ret
+entry:
+ %cmp13 = icmp sgt i32 %c, 0
+ br i1 %cmp13, label %for.body.preheader, label %return
+
+for.body.preheader: ; preds = %entry
+ %wide.trip.count = zext i32 %c to i64
+ br label %for.body
+
+for.cond: ; preds = %for.body
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, %wide.trip.count
+ br i1 %exitcond.not, label %return, label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.cond
+ %iv = phi i64 [ 0, %for.body.preheader ], [ %iv.next, %for.cond ]
+ %iv.a = phi i64 [ 1, %for.body.preheader ], [ %iv.next.a, %for.cond ]
+ %iv.b = phi i64 [ 2, %for.body.preheader ], [ %iv.next.b, %for.cond ]
+ %arrayidx.a = getelementptr inbounds i32, ptr %a, i64 %iv.a
+ %iv.next.a = add nuw nsw i64 %iv.a, 1
+ %val.a = load i32, ptr %arrayidx.a, align 4
+ %tobool3.not = icmp eq i32 %val.a, 0
+ br i1 %tobool3.not, label %if.then, label %return
+
+if.then:
+ %arrayidx.b = getelementptr inbounds i32, ptr %b, i64 %iv.b
+ %iv.next.b = add nuw nsw i64 %iv.b, 1
+ %val.b = load i32, ptr %arrayidx.b, align 4
+ %tobool4.not = icmp eq i32 %val.b, 0
+ br i1 %tobool4.not, label %for.cond, label %return
+
+return: ; preds = %for.cond, %for.body, %entry
+ %retval.1 = phi i64 [ 0, %entry ], [ 0, %for.cond ], [ 1, %for.body ], [ %iv.next.b, %if.then ]
+ ret i64 %retval.1
+}