From 7ad786a29e7bcd75bf3e7af2d96a0bf419faff64 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Fri, 20 Jan 2023 12:15:44 -0800 Subject: [PATCH] [LSR] Generalize one aspect of terminator folding (recently introduced in D132443) There's no need to require the start value to come directly from the loop predecessor. This was sometimes covering up a latent miscompile in this off-by-default option, but the miscompile needs fixed anyways and the issue has been raised on the original review. Differential Revision: https://reviews.llvm.org/D142240 --- llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp | 13 ++------ .../lsr-term-fold-negative-testcase.ll | 23 ------------- .../Transforms/LoopStrengthReduce/lsr-term-fold.ll | 39 ++++++++++++++++++++++ 3 files changed, 41 insertions(+), 34 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 7325694..f04420b 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -6625,7 +6625,6 @@ canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT, return std::nullopt; } - BasicBlock *LoopPreheader = L->getLoopPreheader(); BasicBlock *LoopLatch = L->getLoopLatch(); // TODO: Can we do something for greater than and less than? @@ -6712,12 +6711,7 @@ canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT, << "\n"); return {false, nullptr}; } - // TODO: Right now we limit the phi node to help the folding be of a start - // value of getelementptr. We can extend to any kinds of IV as long as it is - // an affine AddRec. Add a switch to cover more types of instructions here - // and down in the actual transformation. - return {isa(PN.getIncomingValueForBlock(LoopPreheader)), - TermValueS}; + return {true, TermValueS}; }; PHINode *ToFold = nullptr; @@ -6850,15 +6844,12 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, SCEVExpanderCleaner ExpCleaner(Expander); // Create new terminating value at loop header - GetElementPtrInst *StartValueGEP = cast(StartValue); - Type *PtrTy = StartValueGEP->getPointerOperand()->getType(); - const SCEV *TermValueS = CanFoldTerminatingCondition->second.second; assert( Expander.isSafeToExpand(TermValueS) && "Terminating value was checked safe in canFoldTerminatingCondition"); - Value *TermValue = Expander.expandCodeFor(TermValueS, PtrTy, + Value *TermValue = Expander.expandCodeFor(TermValueS, ToHelpFold->getType(), LoopPreheader->getTerminator()); LLVM_DEBUG(dbgs() << "Start value of new term-cond phi-node:\n" diff --git a/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold-negative-testcase.ll b/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold-negative-testcase.ll index 21bb8e5..2856da9 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold-negative-testcase.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold-negative-testcase.ll @@ -51,29 +51,6 @@ for.body4: ; preds = %for.body, %for.body br i1 %cmp2, label %for.body4, label %for.cond.cleanup3 } -; The terminating condition folding transformation cannot find the ptr IV -; because it checks if the value comes from the LoopPreheader. %mark is from -; the function argument, so it is not qualified for the transformation. -define void @no_iv_to_help(ptr %mark, i32 signext %length) { -; CHECK: Cannot find other AddRec IV to help folding -entry: - %tobool.not3 = icmp eq i32 %length, 0 - br i1 %tobool.not3, label %for.cond.cleanup, label %for.body - -for.cond.cleanup: ; preds = %for.body, %entry - ret void - -for.body: ; preds = %entry, %for.body - %i.05 = phi i32 [ %dec, %for.body ], [ %length, %entry ] - %dst.04 = phi ptr [ %incdec.ptr, %for.body ], [ %mark, %entry ] - %0 = load ptr, ptr %dst.04, align 8 - call ptr @foo(ptr %0) - %incdec.ptr = getelementptr inbounds ptr, ptr %dst.04, i64 1 - %dec = add nsw i32 %i.05, -1 - %tobool.not = icmp eq i32 %dec, 0 - br i1 %tobool.not, label %for.cond.cleanup, label %for.body -} - declare void @foo(ptr) define void @NonAddRecIV(ptr %a) { diff --git a/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll b/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll index 30fbed9..23b06ba 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll @@ -133,3 +133,42 @@ for.end: ; preds = %for.body } declare void @foo(ptr) + +define void @iv_start_non_preheader(ptr %mark, i32 signext %length) { +; CHECK-LABEL: @iv_start_non_preheader( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TOBOOL_NOT3:%.*]] = icmp eq i32 [[LENGTH:%.*]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT3]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]] +; CHECK: for.body.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = shl i32 [[LENGTH]], 2 +; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[MARK:%.*]], i32 [[TMP0]] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.cond.cleanup.loopexit: +; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void +; CHECK: for.body: +; CHECK-NEXT: [[DST_04:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[MARK]], [[FOR_BODY_PREHEADER]] ] +; CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DST_04]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = call ptr @foo(ptr [[TMP1]]) +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds ptr, ptr [[DST_04]], i64 1 +; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[INCDEC_PTR]], [[UGLYGEP]] +; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]] +; +entry: + %tobool.not3 = icmp eq i32 %length, 0 + br i1 %tobool.not3, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + ret void + +for.body: ; preds = %entry, %for.body + %i.05 = phi i32 [ %dec, %for.body ], [ %length, %entry ] + %dst.04 = phi ptr [ %incdec.ptr, %for.body ], [ %mark, %entry ] + %0 = load ptr, ptr %dst.04, align 8 + call ptr @foo(ptr %0) + %incdec.ptr = getelementptr inbounds ptr, ptr %dst.04, i64 1 + %dec = add nsw i32 %i.05, -1 + %tobool.not = icmp eq i32 %dec, 0 + br i1 %tobool.not, label %for.cond.cleanup, label %for.body +} -- 2.7.4