return nullptr;
}
-static std::optional<std::tuple<PHINode *, PHINode *, const SCEV *>>
+static std::optional<std::tuple<PHINode *, PHINode *, const SCEV *, bool>>
canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
const LoopInfo &LI) {
if (!L->isInnermost()) {
PHINode *ToHelpFold = nullptr;
const SCEV *TermValueS = nullptr;
+ bool MustDropPoison = false;
for (PHINode &PN : L->getHeader()->phis()) {
if (ToFold == &PN)
continue;
<< "\n");
continue;
}
+
+ // The candidate IV may have been otherwise dead and poison from the
+ // very first iteration. If we can't disprove that, we can't use the IV.
+ if (!mustExecuteUBIfPoisonOnPathTo(&PN, LoopLatch->getTerminator(), &DT)) {
+ LLVM_DEBUG(dbgs() << "Can not prove poison safety for IV "
+ << PN << "\n");
+ continue;
+ }
+
+ // The candidate IV may become poison on the last iteration. If this
+ // value is not branched on, this is a well defined program. We're
+ // about to add a new use to this IV, and we have to ensure we don't
+ // insert UB which didn't previously exist.
+ bool MustDropPoisonLocal = false;
+ Instruction *PostIncV =
+ cast<Instruction>(PN.getIncomingValueForBlock(LoopLatch));
+ if (!mustExecuteUBIfPoisonOnPathTo(PostIncV, LoopLatch->getTerminator(),
+ &DT)) {
+ LLVM_DEBUG(dbgs() << "Can not prove poison safety to insert use"
+ << PN << "\n");
+
+ // If this is a complex recurrance with multiple instructions computing
+ // the backedge value, we might need to strip poison flags from all of
+ // them.
+ if (PostIncV->getOperand(0) != &PN)
+ continue;
+
+ // In order to perform the transform, we need to drop the poison generating
+ // flags on this instruction (if any).
+ MustDropPoisonLocal = PostIncV->hasPoisonGeneratingFlags();
+ }
+
// We pick the last legal alternate IV. We could expore choosing an optimal
// alternate IV if we had a decent heuristic to do so.
ToHelpFold = &PN;
TermValueS = TermValueSLocal;
+ MustDropPoison = MustDropPoisonLocal;
}
LLVM_DEBUG(if (ToFold && !ToHelpFold) dbgs()
if (!ToFold || !ToHelpFold)
return std::nullopt;
- return std::make_tuple(ToFold, ToHelpFold, TermValueS);
+ return std::make_tuple(ToFold, ToHelpFold, TermValueS, MustDropPoison);
}
static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
if (AllowTerminatingConditionFoldingAfterLSR) {
if (auto Opt = canFoldTermCondOfLoop(L, SE, DT, LI)) {
- auto [ToFold, ToHelpFold, TermValueS] = *Opt;
+ auto [ToFold, ToHelpFold, TermValueS, MustDrop] = *Opt;
Changed = true;
NumTermFold++;
(void)StartValue;
Value *LoopValue = ToHelpFold->getIncomingValueForBlock(LoopLatch);
+ // See comment in canFoldTermCondOfLoop on why this is sufficient.
+ if (MustDrop)
+ cast<Instruction>(LoopValue)->dropPoisonGeneratingFlags();
+
// SCEVExpander for both use in preheader and latch
const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
SCEVExpander Expander(SE, DL, "lsr_fold_term_cond");
; CHECK-NEXT: [[IT_04:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[START_PTRPTR]], [[ENTRY:%.*]] ]
; CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[IT_04]], align 8
; CHECK-NEXT: tail call void @foo(ptr [[TMP4]])
-; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds ptr, ptr [[IT_04]], i64 1
+; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr ptr, ptr [[IT_04]], i64 1
; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[INCDEC_PTR]], [[SCEVGEP]]
; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; CHECK: for.end:
; CHECK-NEXT: [[DST_04:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[MARK]], [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DST_04]], align 8
; CHECK-NEXT: [[TMP5:%.*]] = call ptr @foo(ptr [[TMP4]])
-; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds ptr, ptr [[DST_04]], i64 1
+; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr ptr, ptr [[DST_04]], i64 1
; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[INCDEC_PTR]], [[SCEVGEP]]
; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]]
;
; advance the pointer IV by *4* each time, and thus on the iteration we write
; byte 16, %uglygep2 (the pointer increment) is past the end of the underlying
; storage and thus violates the inbounds requirements. As a result, %uglygep2
-; is poison on the final iteration. If we insert a branch on that value, we
-; have inserted undefined behavior where it did not previously exist.
-; FIXME: miscompile
+; is poison on the final iteration. If we insert a branch on that value
+; (without stripping the poison flag), we have inserted undefined behavior
+; where it did not previously exist.
define void @inbounds_poison_use(ptr %a) {
; CHECK-LABEL: @inbounds_poison_use(
; CHECK-NEXT: entry:
; CHECK: for.body:
; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[A]], [[ENTRY:%.*]] ]
; CHECK-NEXT: store i8 1, ptr [[LSR_IV1]], align 4
-; CHECK-NEXT: [[UGLYGEP2]] = getelementptr inbounds i8, ptr [[LSR_IV1]], i64 4
+; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i64 4
; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[SCEVGEP]]
; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; CHECK: for.end: