There's no need to require the start value to come directly from the loop predecessor. This was sometimes covering up a latent miscompile in this off-by-default option, but the miscompile needs fixed anyways and the issue has been raised on the original review.
Differential Revision: https://reviews.llvm.org/D142240
return std::nullopt;
}
- BasicBlock *LoopPreheader = L->getLoopPreheader();
BasicBlock *LoopLatch = L->getLoopLatch();
// TODO: Can we do something for greater than and less than?
<< "\n");
return {false, nullptr};
}
- // TODO: Right now we limit the phi node to help the folding be of a start
- // value of getelementptr. We can extend to any kinds of IV as long as it is
- // an affine AddRec. Add a switch to cover more types of instructions here
- // and down in the actual transformation.
- return {isa<GetElementPtrInst>(PN.getIncomingValueForBlock(LoopPreheader)),
- TermValueS};
+ return {true, TermValueS};
};
PHINode *ToFold = nullptr;
SCEVExpanderCleaner ExpCleaner(Expander);
// Create new terminating value at loop header
- GetElementPtrInst *StartValueGEP = cast<GetElementPtrInst>(StartValue);
- Type *PtrTy = StartValueGEP->getPointerOperand()->getType();
-
const SCEV *TermValueS = CanFoldTerminatingCondition->second.second;
assert(
Expander.isSafeToExpand(TermValueS) &&
"Terminating value was checked safe in canFoldTerminatingCondition");
- Value *TermValue = Expander.expandCodeFor(TermValueS, PtrTy,
+ Value *TermValue = Expander.expandCodeFor(TermValueS, ToHelpFold->getType(),
LoopPreheader->getTerminator());
LLVM_DEBUG(dbgs() << "Start value of new term-cond phi-node:\n"
br i1 %cmp2, label %for.body4, label %for.cond.cleanup3
}
-; The terminating condition folding transformation cannot find the ptr IV
-; because it checks if the value comes from the LoopPreheader. %mark is from
-; the function argument, so it is not qualified for the transformation.
-define void @no_iv_to_help(ptr %mark, i32 signext %length) {
-; CHECK: Cannot find other AddRec IV to help folding
-entry:
- %tobool.not3 = icmp eq i32 %length, 0
- br i1 %tobool.not3, label %for.cond.cleanup, label %for.body
-
-for.cond.cleanup: ; preds = %for.body, %entry
- ret void
-
-for.body: ; preds = %entry, %for.body
- %i.05 = phi i32 [ %dec, %for.body ], [ %length, %entry ]
- %dst.04 = phi ptr [ %incdec.ptr, %for.body ], [ %mark, %entry ]
- %0 = load ptr, ptr %dst.04, align 8
- call ptr @foo(ptr %0)
- %incdec.ptr = getelementptr inbounds ptr, ptr %dst.04, i64 1
- %dec = add nsw i32 %i.05, -1
- %tobool.not = icmp eq i32 %dec, 0
- br i1 %tobool.not, label %for.cond.cleanup, label %for.body
-}
-
declare void @foo(ptr)
define void @NonAddRecIV(ptr %a) {
}
declare void @foo(ptr)
+
+define void @iv_start_non_preheader(ptr %mark, i32 signext %length) {
+; CHECK-LABEL: @iv_start_non_preheader(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TOBOOL_NOT3:%.*]] = icmp eq i32 [[LENGTH:%.*]], 0
+; CHECK-NEXT: br i1 [[TOBOOL_NOT3]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
+; CHECK: for.body.preheader:
+; CHECK-NEXT: [[TMP0:%.*]] = shl i32 [[LENGTH]], 2
+; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[MARK:%.*]], i32 [[TMP0]]
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.cond.cleanup.loopexit:
+; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
+; CHECK: for.cond.cleanup:
+; CHECK-NEXT: ret void
+; CHECK: for.body:
+; CHECK-NEXT: [[DST_04:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[MARK]], [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DST_04]], align 8
+; CHECK-NEXT: [[TMP2:%.*]] = call ptr @foo(ptr [[TMP1]])
+; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds ptr, ptr [[DST_04]], i64 1
+; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[INCDEC_PTR]], [[UGLYGEP]]
+; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]]
+;
+entry:
+ %tobool.not3 = icmp eq i32 %length, 0
+ br i1 %tobool.not3, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup: ; preds = %for.body, %entry
+ ret void
+
+for.body: ; preds = %entry, %for.body
+ %i.05 = phi i32 [ %dec, %for.body ], [ %length, %entry ]
+ %dst.04 = phi ptr [ %incdec.ptr, %for.body ], [ %mark, %entry ]
+ %0 = load ptr, ptr %dst.04, align 8
+ call ptr @foo(ptr %0)
+ %incdec.ptr = getelementptr inbounds ptr, ptr %dst.04, i64 1
+ %dec = add nsw i32 %i.05, -1
+ %tobool.not = icmp eq i32 %dec, 0
+ br i1 %tobool.not, label %for.cond.cleanup, label %for.body
+}