From: Joshua Cao Date: Fri, 6 Jan 2023 04:48:13 +0000 (-0800) Subject: [LoopReroll] Allow for multiple loop control only induction vars X-Git-Tag: upstream/17.0.6~21085 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=f1705509400f4fbfb168f8f81f8eaa71dc68fa2a;p=platform%2Fupstream%2Fllvm.git [LoopReroll] Allow for multiple loop control only induction vars Before this, LoopReroll would fail an assertion, falsely assuming that there can only possibly a single loop control only induction variable. For example: ``` %a = phi i16 [ %dec2, %for.body ], [ 0, %entry ] %b = phi i16 [ %dec1, %for.body ], [ 0, %entry ] %a.next = add nsw i16 %1, -1 %b.next = add nsw i16 %0, -1 %add = add nsw i16 %a, %b ; ... rerollable code %cmp.not = icmp eq i16 -10, %add br i1 %cmp.not, label %exit, label %loop ``` Both %a and %b are valid loop control only induction vars Additionally, some NFC changes to remove unnecessary isa check Updated complex_reroll checks Differential Revision: https://reviews.llvm.org/D141109 --- diff --git a/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp b/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp index 62618880..a0b3189 100644 --- a/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp @@ -191,13 +191,14 @@ namespace { using SmallInstructionVector = SmallVector; using SmallInstructionSet = SmallPtrSet; + using TinyInstructionVector = SmallVector; // Map between induction variable and its increment DenseMap IVToIncMap; - // For loop with multiple induction variable, remember the one used only to + // For loop with multiple induction variables, remember the ones used only to // control the loop. - Instruction *LoopControlIV; + TinyInstructionVector LoopControlIVs; // A chain of isomorphic instructions, identified by a single-use PHI // representing a reduction. Only the last value may be used outside the @@ -386,10 +387,10 @@ namespace { TargetLibraryInfo *TLI, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA, DenseMap &IncrMap, - Instruction *LoopCtrlIV) + TinyInstructionVector LoopCtrlIVs) : Parent(Parent), L(L), SE(SE), AA(AA), TLI(TLI), DT(DT), LI(LI), PreserveLCSSA(PreserveLCSSA), IV(IV), IVToIncMap(IncrMap), - LoopControlIV(LoopCtrlIV) {} + LoopControlIVs(LoopCtrlIVs) {} /// Stage 1: Find all the DAG roots for the induction variable. bool findRoots(); @@ -468,7 +469,7 @@ namespace { // Map between induction variable and its increment DenseMap &IVToIncMap; - Instruction *LoopControlIV; + TinyInstructionVector LoopControlIVs; }; // Check if it is a compare-like instruction whose user is a branch @@ -577,33 +578,28 @@ bool LoopReroll::isLoopControlIV(Loop *L, Instruction *IV) { // be possible to reroll the loop. void LoopReroll::collectPossibleIVs(Loop *L, SmallInstructionVector &PossibleIVs) { - BasicBlock *Header = L->getHeader(); - for (BasicBlock::iterator I = Header->begin(), - IE = Header->getFirstInsertionPt(); I != IE; ++I) { - if (!isa(I)) - continue; - if (!I->getType()->isIntegerTy() && !I->getType()->isPointerTy()) + for (Instruction &IV : L->getHeader()->phis()) { + if (!IV.getType()->isIntegerTy() && !IV.getType()->isPointerTy()) continue; if (const SCEVAddRecExpr *PHISCEV = - dyn_cast(SE->getSCEV(&*I))) { + dyn_cast(SE->getSCEV(&IV))) { if (PHISCEV->getLoop() != L) continue; if (!PHISCEV->isAffine()) continue; - auto IncSCEV = dyn_cast(PHISCEV->getStepRecurrence(*SE)); + const auto *IncSCEV = dyn_cast(PHISCEV->getStepRecurrence(*SE)); if (IncSCEV) { - IVToIncMap[&*I] = IncSCEV->getValue()->getSExtValue(); - LLVM_DEBUG(dbgs() << "LRR: Possible IV: " << *I << " = " << *PHISCEV + IVToIncMap[&IV] = IncSCEV->getValue()->getSExtValue(); + LLVM_DEBUG(dbgs() << "LRR: Possible IV: " << IV << " = " << *PHISCEV << "\n"); - if (isLoopControlIV(L, &*I)) { - assert(!LoopControlIV && "Found two loop control only IV"); - LoopControlIV = &(*I); - LLVM_DEBUG(dbgs() << "LRR: Possible loop control only IV: " << *I + if (isLoopControlIV(L, &IV)) { + LoopControlIVs.push_back(&IV); + LLVM_DEBUG(dbgs() << "LRR: Loop control only IV: " << IV << " = " << *PHISCEV << "\n"); } else - PossibleIVs.push_back(&*I); + PossibleIVs.push_back(&IV); } } } @@ -1184,7 +1180,7 @@ bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) { // Make sure we mark loop-control-only PHIs as used in all iterations. See // comment above LoopReroll::isLoopControlIV for more information. BasicBlock *Header = L->getHeader(); - if (LoopControlIV && LoopControlIV != IV) { + for (Instruction *LoopControlIV : LoopControlIVs) { for (auto *U : LoopControlIV->users()) { Instruction *IVUser = dyn_cast(U); // IVUser could be loop increment or compare @@ -1633,7 +1629,7 @@ bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header, const SCEV *BackedgeTakenCount, ReductionTracker &Reductions) { DAGRootTracker DAGRoots(this, L, IV, SE, AA, TLI, DT, LI, PreserveLCSSA, - IVToIncMap, LoopControlIV); + IVToIncMap, LoopControlIVs); if (!DAGRoots.findRoots()) return false; @@ -1676,7 +1672,7 @@ bool LoopReroll::runOnLoop(Loop *L) { // reroll (there may be several possible options). SmallInstructionVector PossibleIVs; IVToIncMap.clear(); - LoopControlIV = nullptr; + LoopControlIVs.clear(); collectPossibleIVs(L, PossibleIVs); if (PossibleIVs.empty()) { diff --git a/llvm/test/Transforms/LoopReroll/complex_reroll.ll b/llvm/test/Transforms/LoopReroll/complex_reroll.ll index 8e21e8d..7c656f5 100644 --- a/llvm/test/Transforms/LoopReroll/complex_reroll.ll +++ b/llvm/test/Transforms/LoopReroll/complex_reroll.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -passes=loop-reroll %s | FileCheck %s declare i32 @goo(i32, i32) @@ -5,21 +6,30 @@ declare i32 @goo(i32, i32) @aaa = global [16 x i8] c"\01\02\03\04\05\06\07\08\09\0A\0B\0C\0D\0E\0F\10", align 1 define i32 @test1(i32 %len) { +; CHECK-LABEL: @test1( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[WHILE_BODY:%.*]] +; CHECK: while.body: +; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], [[WHILE_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[SUM44_020:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[WHILE_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[INDVAR]] to i32 +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr [16 x i8], [16 x i8]* @aaa, i64 0, i64 [[INDVAR]] +; CHECK-NEXT: [[TMP1:%.*]] = load i8, i8* [[SCEVGEP]], align 1 +; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP1]] to i64 +; CHECK-NEXT: [[ADD]] = add i64 [[CONV]], [[SUM44_020]] +; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[TMP0]], 15 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[WHILE_END:%.*]], label [[WHILE_BODY]] +; CHECK: while.end: +; CHECK-NEXT: [[ADD9_LCSSA:%.*]] = phi i64 [ [[ADD]], [[WHILE_BODY]] ] +; CHECK-NEXT: [[CONV11:%.*]] = trunc i64 [[ADD9_LCSSA]] to i32 +; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @goo(i32 0, i32 [[CONV11]]) +; CHECK-NEXT: unreachable +; entry: br label %while.body while.body: -;CHECK-LABEL: while.body: -;CHECK-NEXT: %indvar = phi i64 [ %indvar.next, %while.body ], [ 0, %entry ] -;CHECK-NEXT: %sum44.020 = phi i64 [ 0, %entry ], [ %add, %while.body ] -;CHECK-NEXT: %0 = trunc i64 %indvar to i32 -;CHECK-NEXT: %scevgep = getelementptr [16 x i8], [16 x i8]* @aaa, i64 0, i64 %indvar -;CHECK-NEXT: [[T2:%[0-9]+]] = load i8, i8* %scevgep, align 1 -;CHECK-NEXT: %conv = zext i8 [[T2]] to i64 -;CHECK-NEXT: %add = add i64 %conv, %sum44.020 -;CHECK-NEXT: %indvar.next = add i64 %indvar, 1 -;CHECK-NEXT: %exitcond = icmp eq i32 %0, 15 -;CHECK-NEXT: br i1 %exitcond, label %while.end, label %while.body %dec22 = phi i32 [ 4, %entry ], [ %dec, %while.body ] %buf.021 = phi i8* [ getelementptr inbounds ([16 x i8], [16 x i8]* @aaa, i64 0, i64 0), %entry ], [ %add.ptr, %while.body ] @@ -51,6 +61,33 @@ while.end: ; preds = %while.body } define i32 @test2(i32 %N, i32* nocapture readonly %a, i32 %S) { +; CHECK-LABEL: @test2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP_9:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP_9]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_COND_CLEANUP:%.*]] +; CHECK: for.body.lr.ph: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[TMP0]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i32 [[TMP1]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i32 [[TMP2]], 1 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.cond.for.cond.cleanup_crit_edge: +; CHECK-NEXT: [[ADD2_LCSSA:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: [[S_ADDR_0_LCSSA:%.*]] = phi i32 [ [[ADD2_LCSSA]], [[FOR_COND_FOR_COND_CLEANUP_CRIT_EDGE:%.*]] ], [ [[S:%.*]], [[ENTRY:%.*]] ] +; CHECK-NEXT: ret i32 [[S_ADDR_0_LCSSA]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_LR_PH]] ] +; CHECK-NEXT: [[S_ADDR_011:%.*]] = phi i32 [ [[S]], [[FOR_BODY_LR_PH]] ], [ [[ADD]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[INDVAR]] to i32 +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[INDVAR]] +; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[SCEVGEP]], align 4 +; CHECK-NEXT: [[ADD]] = add nsw i32 [[TMP5]], [[S_ADDR_011]] +; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[TMP4]], [[TMP3]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_FOR_COND_CLEANUP_CRIT_EDGE]], label [[FOR_BODY]] +; entry: %cmp.9 = icmp sgt i32 %N, 0 br i1 %cmp.9, label %for.body.lr.ph, label %for.cond.cleanup @@ -66,16 +103,6 @@ for.cond.cleanup: ret i32 %S.addr.0.lcssa for.body: -;CHECK-LABEL: for.body: -;CHECK-NEXT: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %for.body.lr.ph ] -;CHECK-NEXT: %S.addr.011 = phi i32 [ %S, %for.body.lr.ph ], [ %add, %for.body ] -;CHECK-NEXT: %4 = trunc i64 %indvar to i32 -;CHECK-NEXT: %scevgep = getelementptr i32, i32* %a, i64 %indvar -;CHECK-NEXT: %5 = load i32, i32* %scevgep, align 4 -;CHECK-NEXT: %add = add nsw i32 %5, %S.addr.011 -;CHECK-NEXT: %indvar.next = add i64 %indvar, 1 -;CHECK-NEXT: %exitcond = icmp eq i32 %4, %3 -;CHECK-NEXT: br i1 %exitcond, label %for.cond.for.cond.cleanup_crit_edge, label %for.body %i.012 = phi i32 [ 0, %for.body.lr.ph ], [ %add3, %for.body ] %S.addr.011 = phi i32 [ %S, %for.body.lr.ph ], [ %add2, %for.body ] @@ -92,6 +119,34 @@ for.body: } define i32 @test3(i32* nocapture readonly %buf, i32 %len) #0 { +; CHECK-LABEL: @test3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[LEN:%.*]], 1 +; CHECK-NEXT: br i1 [[CMP10]], label [[WHILE_BODY_PREHEADER:%.*]], label [[WHILE_END:%.*]] +; CHECK: while.body.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[LEN]], -2 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[TMP0]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i32 [[TMP1]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i32 [[TMP2]], 1 +; CHECK-NEXT: br label [[WHILE_BODY:%.*]] +; CHECK: while.body: +; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], [[WHILE_BODY]] ], [ 0, [[WHILE_BODY_PREHEADER]] ] +; CHECK-NEXT: [[S_012:%.*]] = phi i32 [ [[ADD:%.*]], [[WHILE_BODY]] ], [ undef, [[WHILE_BODY_PREHEADER]] ] +; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[INDVAR]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = mul nsw i64 [[INDVAR]], -1 +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[BUF:%.*]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[SCEVGEP]], align 4 +; CHECK-NEXT: [[ADD]] = add nsw i32 [[TMP6]], [[S_012]] +; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[TMP4]], [[TMP3]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]] +; CHECK: while.end.loopexit: +; CHECK-NEXT: [[ADD2_LCSSA:%.*]] = phi i32 [ [[ADD]], [[WHILE_BODY]] ] +; CHECK-NEXT: br label [[WHILE_END]] +; CHECK: while.end: +; CHECK-NEXT: [[S_0_LCSSA:%.*]] = phi i32 [ undef, [[ENTRY:%.*]] ], [ [[ADD2_LCSSA]], [[WHILE_END_LOOPEXIT]] ] +; CHECK-NEXT: ret i32 [[S_0_LCSSA]] +; entry: %cmp10 = icmp sgt i32 %len, 1 br i1 %cmp10, label %while.body.preheader, label %while.end @@ -100,17 +155,6 @@ while.body.preheader: ; preds = %entry br label %while.body while.body: ; preds = %while.body.preheader, %while.body -;CHECK-LABEL: while.body: -;CHECK-NEXT: %indvar = phi i64 [ %indvar.next, %while.body ], [ 0, %while.body.preheader ] -;CHECK-NEXT: %S.012 = phi i32 [ %add, %while.body ], [ undef, %while.body.preheader ] -;CHECK-NEXT: %4 = trunc i64 %indvar to i32 -;CHECK-NEXT: %5 = mul nsw i64 %indvar, -1 -;CHECK-NEXT: %scevgep = getelementptr i32, i32* %buf, i64 %5 -;CHECK-NEXT: %6 = load i32, i32* %scevgep, align 4 -;CHECK-NEXT: %add = add nsw i32 %6, %S.012 -;CHECK-NEXT: %indvar.next = add i64 %indvar, 1 -;CHECK-NEXT: %exitcond = icmp eq i32 %4, %3 -;CHECK-NEXT: br i1 %exitcond, label %while.end.loopexit, label %while.body %i.013 = phi i32 [ %sub, %while.body ], [ %len, %while.body.preheader ] %S.012 = phi i32 [ %add2, %while.body ], [ undef, %while.body.preheader ] @@ -133,3 +177,60 @@ while.end: ; preds = %while.end.loopexit, ret i32 %S.0.lcssa } +define i32 @test4(i32 %len) { +; CHECK-LABEL: @test4( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[WHILE_BODY:%.*]] +; CHECK: while.body: +; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], [[WHILE_BODY]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[SUM44_020:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[WHILE_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[INDVAR]] to i32 +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr [16 x i8], [16 x i8]* @aaa, i64 0, i64 [[INDVAR]] +; CHECK-NEXT: [[TMP1:%.*]] = load i8, i8* [[SCEVGEP]], align 1 +; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP1]] to i64 +; CHECK-NEXT: [[ADD]] = add i64 [[CONV]], [[SUM44_020]] +; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[TMP0]], 23 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[WHILE_END:%.*]], label [[WHILE_BODY]] +; CHECK: while.end: +; CHECK-NEXT: [[ADD9_LCSSA:%.*]] = phi i64 [ [[ADD]], [[WHILE_BODY]] ] +; CHECK-NEXT: [[CONV11:%.*]] = trunc i64 [[ADD9_LCSSA]] to i32 +; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @goo(i32 0, i32 [[CONV11]]) +; CHECK-NEXT: unreachable +; +entry: + br label %while.body + +while.body: + %a = phi i32 [ 4, %entry ], [ %a.next, %while.body ] + %b = phi i32 [ 6, %entry ], [ %b.next, %while.body ] + %buf.021 = phi i8* [ getelementptr inbounds ([16 x i8], [16 x i8]* @aaa, i64 0, i64 0), %entry ], [ %add.ptr, %while.body ] + %sum44.020 = phi i64 [ 0, %entry ], [ %add9, %while.body ] + %0 = load i8, i8* %buf.021, align 1 + %conv = zext i8 %0 to i64 + %add = add i64 %conv, %sum44.020 + %arrayidx1 = getelementptr inbounds i8, i8* %buf.021, i64 1 + %1 = load i8, i8* %arrayidx1, align 1 + %conv2 = zext i8 %1 to i64 + %add3 = add i64 %add, %conv2 + %arrayidx4 = getelementptr inbounds i8, i8* %buf.021, i64 2 + %2 = load i8, i8* %arrayidx4, align 1 + %conv5 = zext i8 %2 to i64 + %add6 = add i64 %add3, %conv5 + %arrayidx7 = getelementptr inbounds i8, i8* %buf.021, i64 3 + %3 = load i8, i8* %arrayidx7, align 1 + %conv8 = zext i8 %3 to i64 + %add9 = add i64 %add6, %conv8 + %add.ptr = getelementptr inbounds i8, i8* %buf.021, i64 4 + %a.next = add nsw i32 %a, -1 + %b.next = add nsw i32 %b, -1 + %cond = add nsw i32 %a, %b + %tobool = icmp eq i32 %cond, 0 + br i1 %tobool, label %while.end, label %while.body + +while.end: ; preds = %while.body + %conv11 = trunc i64 %add9 to i32 + %call = tail call i32 @goo(i32 0, i32 %conv11) + unreachable +} +