#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
#include "llvm/Transforms/Utils/UnrollLoop.h"
PreserveLCSSA);
}
-/// Create a clone of the blocks in a loop and connect them together.
-/// If CreateRemainderLoop is false, loop structure will not be cloned,
-/// otherwise a new loop will be created including all cloned blocks, and the
-/// iterator of it switches to count NewIter down to 0.
+/// Create a clone of the blocks in a loop and connect them together. A new
+/// loop will be created including all cloned blocks, and the iterator of the
+/// new loop switched to count NewIter down to 0.
/// The cloned blocks should be inserted between InsertTop and InsertBot.
-/// If loop structure is cloned InsertTop should be new preheader, InsertBot
-/// new loop exit.
-/// Return the new cloned loop that is created when CreateRemainderLoop is true.
+/// InsertTop should be new preheader, InsertBot new loop exit.
+/// Returns the new cloned loop that is created.
static Loop *
-CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop,
- const bool UseEpilogRemainder, const bool UnrollRemainder,
+CloneLoopBlocks(Loop *L, Value *NewIter, const bool UseEpilogRemainder,
+ const bool UnrollRemainder,
BasicBlock *InsertTop,
BasicBlock *InsertBot, BasicBlock *Preheader,
std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
Loop *ParentLoop = L->getParentLoop();
NewLoopsMap NewLoops;
NewLoops[ParentLoop] = ParentLoop;
- if (!CreateRemainderLoop)
- NewLoops[L] = ParentLoop;
// For each block in the original loop, create a new copy,
// and update the value map with the newly created values.
BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F);
NewBlocks.push_back(NewBB);
- // If we're unrolling the outermost loop, there's no remainder loop,
- // and this block isn't in a nested loop, then the new block is not
- // in any loop. Otherwise, add it to loopinfo.
- if (CreateRemainderLoop || LI->getLoopFor(*BB) != L || ParentLoop)
- addClonedBlockToLoopInfo(*BB, NewBB, LI, NewLoops);
+ addClonedBlockToLoopInfo(*BB, NewBB, LI, NewLoops);
VMap[*BB] = NewBB;
if (Header == *BB) {
}
if (Latch == *BB) {
- // For the last block, if CreateRemainderLoop is false, create a direct
- // jump to InsertBot. If not, create a loop back to cloned head.
+ // For the last block, create a loop back to cloned head.
VMap.erase((*BB)->getTerminator());
BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]);
BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator());
IRBuilder<> Builder(LatchBR);
- if (!CreateRemainderLoop) {
- Builder.CreateBr(InsertBot);
- } else {
- PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2,
- suffix + ".iter",
- FirstLoopBB->getFirstNonPHI());
- Value *IdxSub =
- Builder.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1),
- NewIdx->getName() + ".sub");
- Value *IdxCmp =
- Builder.CreateIsNotNull(IdxSub, NewIdx->getName() + ".cmp");
- Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot);
- NewIdx->addIncoming(NewIter, InsertTop);
- NewIdx->addIncoming(IdxSub, NewBB);
- }
+ PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2,
+ suffix + ".iter",
+ FirstLoopBB->getFirstNonPHI());
+ Value *IdxSub =
+ Builder.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1),
+ NewIdx->getName() + ".sub");
+ Value *IdxCmp =
+ Builder.CreateIsNotNull(IdxSub, NewIdx->getName() + ".cmp");
+ Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot);
+ NewIdx->addIncoming(NewIter, InsertTop);
+ NewIdx->addIncoming(IdxSub, NewBB);
LatchBR->eraseFromParent();
}
}
// cloned loop.
for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
- if (!CreateRemainderLoop) {
- if (UseEpilogRemainder) {
- unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
- NewPHI->setIncomingBlock(idx, InsertTop);
- NewPHI->removeIncomingValue(Latch, false);
- } else {
- VMap[&*I] = NewPHI->getIncomingValueForBlock(Preheader);
- cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI);
- }
- } else {
- unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
- NewPHI->setIncomingBlock(idx, InsertTop);
- BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
- idx = NewPHI->getBasicBlockIndex(Latch);
- Value *InVal = NewPHI->getIncomingValue(idx);
- NewPHI->setIncomingBlock(idx, NewLatch);
- if (Value *V = VMap.lookup(InVal))
- NewPHI->setIncomingValue(idx, V);
- }
+ unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
+ NewPHI->setIncomingBlock(idx, InsertTop);
+ BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
+ idx = NewPHI->getBasicBlockIndex(Latch);
+ Value *InVal = NewPHI->getIncomingValue(idx);
+ NewPHI->setIncomingBlock(idx, NewLatch);
+ if (Value *V = VMap.lookup(InVal))
+ NewPHI->setIncomingValue(idx, V);
}
- if (!CreateRemainderLoop)
- return nullptr;
Loop *NewLoop = NewLoops[L];
assert(NewLoop && "L should have been cloned");
std::vector<BasicBlock *> NewBlocks;
ValueToValueMapTy VMap;
- // For unroll factor 2 remainder loop will have 1 iterations.
- // Do not create 1 iteration loop.
- bool CreateRemainderLoop = (Count != 2);
-
// Clone all the basic blocks in the loop. If Count is 2, we don't clone
// the loop, otherwise we create a cloned loop to execute the extra
// iterations. This function adds the appropriate CFG connections.
BasicBlock *InsertBot = UseEpilogRemainder ? LatchExit : PrologExit;
BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;
Loop *remainderLoop = CloneLoopBlocks(
- L, ModVal, CreateRemainderLoop, UseEpilogRemainder, UnrollRemainder,
- InsertTop, InsertBot,
+ L, ModVal, UseEpilogRemainder, UnrollRemainder, InsertTop, InsertBot,
NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI);
// Assign the maximum possible trip count as the back edge weight for the
assert(DT->verify(DominatorTree::VerificationLevel::Full));
#endif
+ // For unroll factor 2 remainder loop will have 1 iteration.
+ if (Count == 2 && DT && LI && SE) {
+ // TODO: This code could probably be pulled out into a helper function
+ // (e.g. breakLoopBackedgeAndSimplify) and reused in loop-deletion.
+ BasicBlock *RemainderLatch = remainderLoop->getLoopLatch();
+ assert(RemainderLatch);
+ SmallVector<BasicBlock*> RemainderBlocks(remainderLoop->getBlocks().begin(),
+ remainderLoop->getBlocks().end());
+ breakLoopBackedge(remainderLoop, *DT, *SE, *LI, nullptr);
+ remainderLoop = nullptr;
+
+ // Simplify loop values after breaking the backedge
+ const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
+ SmallVector<WeakTrackingVH, 16> DeadInsts;
+ for (BasicBlock *BB : RemainderBlocks) {
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
+ Instruction *Inst = &*I++;
+ if (Value *V = SimplifyInstruction(Inst, {DL, nullptr, DT, AC}))
+ if (LI->replacementPreservesLCSSAForm(Inst, V))
+ Inst->replaceAllUsesWith(V);
+ if (isInstructionTriviallyDead(Inst))
+ DeadInsts.emplace_back(Inst);
+ }
+ // We can't do recursive deletion until we're done iterating, as we might
+ // have a phi which (potentially indirectly) uses instructions later in
+ // the block we're iterating through.
+ RecursivelyDeleteTriviallyDeadInstructions(DeadInsts);
+ }
+
+ // Merge latch into exit block.
+ auto *ExitBB = RemainderLatch->getSingleSuccessor();
+ assert(ExitBB && "required after breaking cond br backedge");
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
+ MergeBlockIntoPredecessor(ExitBB, &DTU, LI);
+ }
+
// Canonicalize to LoopSimplifyForm both original and remainder loops. We
// cannot rely on the LoopUnrollPass to do this because it only does
// canonicalization for parent/subloops and not the sibling loops.
; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[N_VEC]], -16
; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 4
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
-; CHECK-NEXT: [[XTRAITER1:%.*]] = and i64 [[TMP2]], 1
+; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP2]], 1
; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP1]], 1
; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK_UNR_LCSSA:%.*]], label [[VECTOR_PH_NEW:%.*]]
; CHECK: vector.ph.new:
-; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[TMP2]], [[XTRAITER1]]
+; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[TMP2]], [[XTRAITER]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[INDEX_NEXT_1:%.*]], [[VECTOR_BODY]] ]
; CHECK: middle.block.unr-lcssa:
; CHECK-NEXT: [[INDEX_UNR:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_UNR_PH]], [[MIDDLE_BLOCK_UNR_LCSSA_LOOPEXIT]] ]
; CHECK-NEXT: [[VEC_IND12_UNR:%.*]] = phi <16 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, [[VECTOR_PH]] ], [ [[VEC_IND12_UNR_PH]], [[MIDDLE_BLOCK_UNR_LCSSA_LOOPEXIT]] ]
-; CHECK-NEXT: [[LCMP_MOD2:%.*]] = icmp ne i64 [[XTRAITER1]], 0
-; CHECK-NEXT: br i1 [[LCMP_MOD2]], label [[VECTOR_BODY_EPIL_PREHEADER:%.*]], label [[MIDDLE_BLOCK:%.*]]
+; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[VECTOR_BODY_EPIL_PREHEADER:%.*]], label [[MIDDLE_BLOCK:%.*]]
; CHECK: vector.body.epil.preheader:
; CHECK-NEXT: br label [[VECTOR_BODY_EPIL:%.*]]
; CHECK: vector.body.epil:
-; CHECK-NEXT: [[INDEX_EPIL:%.*]] = phi i64 [ [[INDEX_UNR]], [[VECTOR_BODY_EPIL_PREHEADER]] ]
-; CHECK-NEXT: [[VEC_IND12_EPIL:%.*]] = phi <16 x i32> [ [[VEC_IND12_UNR]], [[VECTOR_BODY_EPIL_PREHEADER]] ]
-; CHECK-NEXT: [[TMP16:%.*]] = shl <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, [[VEC_IND12_EPIL]]
+; CHECK-NEXT: [[TMP16:%.*]] = shl <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, [[VEC_IND12_UNR]]
; CHECK-NEXT: [[TMP17:%.*]] = and <16 x i32> [[TMP16]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq <16 x i32> [[TMP17]], zeroinitializer
; CHECK-NEXT: [[TMP19:%.*]] = select <16 x i1> [[TMP18]], <16 x i8> <i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48>, <16 x i8> <i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49>
-; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDEX_EPIL]]
+; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDEX_UNR]]
; CHECK-NEXT: [[TMP21:%.*]] = bitcast i8* [[TMP20]] to <16 x i8>*
; CHECK-NEXT: store <16 x i8> [[TMP19]], <16 x i8>* [[TMP21]], align 1
-; CHECK-NEXT: [[INDEX_NEXT_EPIL:%.*]] = add i64 [[INDEX_EPIL]], 16
-; CHECK-NEXT: [[VEC_IND_NEXT13_EPIL:%.*]] = add <16 x i32> [[VEC_IND12_EPIL]], <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT_EPIL]], [[N_VEC]]
-; CHECK-NEXT: br label [[MIDDLE_BLOCK_EPILOG_LCSSA:%.*]]
-; CHECK: middle.block.epilog-lcssa:
; CHECK-NEXT: br label [[MIDDLE_BLOCK]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[WIDE_TRIP_COUNT]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY_PREHEADER]]
; CHECK: for.body.preheader:
; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
-; CHECK-NEXT: [[TMP23:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[INDVARS_IV_PH]]
-; CHECK-NEXT: [[TMP24:%.*]] = add i64 [[WIDE_TRIP_COUNT]], -1
-; CHECK-NEXT: [[TMP25:%.*]] = sub i64 [[TMP24]], [[INDVARS_IV_PH]]
-; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP23]], 7
-; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
-; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[FOR_BODY_PROL_PREHEADER:%.*]], label [[FOR_BODY_PROL_LOOPEXIT:%.*]]
+; CHECK-NEXT: [[TMP22:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[INDVARS_IV_PH]]
+; CHECK-NEXT: [[TMP23:%.*]] = add i64 [[WIDE_TRIP_COUNT]], -1
+; CHECK-NEXT: [[TMP24:%.*]] = sub i64 [[TMP23]], [[INDVARS_IV_PH]]
+; CHECK-NEXT: [[XTRAITER1:%.*]] = and i64 [[TMP22]], 7
+; CHECK-NEXT: [[LCMP_MOD2:%.*]] = icmp ne i64 [[XTRAITER1]], 0
+; CHECK-NEXT: br i1 [[LCMP_MOD2]], label [[FOR_BODY_PROL_PREHEADER:%.*]], label [[FOR_BODY_PROL_LOOPEXIT:%.*]]
; CHECK: for.body.prol.preheader:
; CHECK-NEXT: br label [[FOR_BODY_PROL:%.*]]
; CHECK: for.body.prol:
; CHECK-NEXT: [[INDVARS_IV_PROL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PROL:%.*]], [[FOR_BODY_PROL]] ], [ [[INDVARS_IV_PH]], [[FOR_BODY_PROL_PREHEADER]] ]
-; CHECK-NEXT: [[PROL_ITER:%.*]] = phi i64 [ [[XTRAITER]], [[FOR_BODY_PROL_PREHEADER]] ], [ [[PROL_ITER_SUB:%.*]], [[FOR_BODY_PROL]] ]
-; CHECK-NEXT: [[TMP26:%.*]] = trunc i64 [[INDVARS_IV_PROL]] to i32
-; CHECK-NEXT: [[SHL_PROL:%.*]] = shl i32 1, [[TMP26]]
+; CHECK-NEXT: [[PROL_ITER:%.*]] = phi i64 [ [[XTRAITER1]], [[FOR_BODY_PROL_PREHEADER]] ], [ [[PROL_ITER_SUB:%.*]], [[FOR_BODY_PROL]] ]
+; CHECK-NEXT: [[TMP25:%.*]] = trunc i64 [[INDVARS_IV_PROL]] to i32
+; CHECK-NEXT: [[SHL_PROL:%.*]] = shl i32 1, [[TMP25]]
; CHECK-NEXT: [[AND_PROL:%.*]] = and i32 [[SHL_PROL]], [[X]]
; CHECK-NEXT: [[TOBOOL_PROL:%.*]] = icmp eq i32 [[AND_PROL]], 0
; CHECK-NEXT: [[CONV_PROL:%.*]] = select i1 [[TOBOOL_PROL]], i8 48, i8 49
; CHECK-NEXT: [[EXITCOND_PROL:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_PROL]], [[WIDE_TRIP_COUNT]]
; CHECK-NEXT: [[PROL_ITER_SUB]] = sub i64 [[PROL_ITER]], 1
; CHECK-NEXT: [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_SUB]], 0
-; CHECK-NEXT: br i1 [[PROL_ITER_CMP]], label [[FOR_BODY_PROL]], label [[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA:%.*]], [[LOOP0:!llvm.loop !.*]]
+; CHECK-NEXT: br i1 [[PROL_ITER_CMP]], label [[FOR_BODY_PROL]], label [[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: for.body.prol.loopexit.unr-lcssa:
; CHECK-NEXT: [[INDVARS_IV_UNR_PH:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PROL]], [[FOR_BODY_PROL]] ]
; CHECK-NEXT: br label [[FOR_BODY_PROL_LOOPEXIT]]
; CHECK: for.body.prol.loopexit:
; CHECK-NEXT: [[INDVARS_IV_UNR:%.*]] = phi i64 [ [[INDVARS_IV_PH]], [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_UNR_PH]], [[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA]] ]
-; CHECK-NEXT: [[TMP27:%.*]] = icmp ult i64 [[TMP25]], 7
-; CHECK-NEXT: br i1 [[TMP27]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY_PREHEADER_NEW:%.*]]
+; CHECK-NEXT: [[TMP26:%.*]] = icmp ult i64 [[TMP24]], 7
+; CHECK-NEXT: br i1 [[TMP26]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY_PREHEADER_NEW:%.*]]
; CHECK: for.body.preheader.new:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_UNR]], [[FOR_BODY_PREHEADER_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT: [[TMP28:%.*]] = trunc i64 [[INDVARS_IV]] to i32
-; CHECK-NEXT: [[SHL:%.*]] = shl i32 1, [[TMP28]]
+; CHECK-NEXT: [[TMP27:%.*]] = trunc i64 [[INDVARS_IV]] to i32
+; CHECK-NEXT: [[SHL:%.*]] = shl i32 1, [[TMP27]]
; CHECK-NEXT: [[AND:%.*]] = and i32 [[SHL]], [[X]]
; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[AND]], 0
; CHECK-NEXT: [[CONV:%.*]] = select i1 [[TOBOOL]], i8 48, i8 49
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV]]
; CHECK-NEXT: store i8 [[CONV]], i8* [[ARRAYIDX]], align 1
; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT: [[TMP29:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
-; CHECK-NEXT: [[SHL_1:%.*]] = shl i32 1, [[TMP29]]
+; CHECK-NEXT: [[TMP28:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
+; CHECK-NEXT: [[SHL_1:%.*]] = shl i32 1, [[TMP28]]
; CHECK-NEXT: [[AND_1:%.*]] = and i32 [[SHL_1]], [[X]]
; CHECK-NEXT: [[TOBOOL_1:%.*]] = icmp eq i32 [[AND_1]], 0
; CHECK-NEXT: [[CONV_1:%.*]] = select i1 [[TOBOOL_1]], i8 48, i8 49
; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT]]
; CHECK-NEXT: store i8 [[CONV_1]], i8* [[ARRAYIDX_1]], align 1
; CHECK-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT]], 1
-; CHECK-NEXT: [[TMP30:%.*]] = trunc i64 [[INDVARS_IV_NEXT_1]] to i32
-; CHECK-NEXT: [[SHL_2:%.*]] = shl i32 1, [[TMP30]]
+; CHECK-NEXT: [[TMP29:%.*]] = trunc i64 [[INDVARS_IV_NEXT_1]] to i32
+; CHECK-NEXT: [[SHL_2:%.*]] = shl i32 1, [[TMP29]]
; CHECK-NEXT: [[AND_2:%.*]] = and i32 [[SHL_2]], [[X]]
; CHECK-NEXT: [[TOBOOL_2:%.*]] = icmp eq i32 [[AND_2]], 0
; CHECK-NEXT: [[CONV_2:%.*]] = select i1 [[TOBOOL_2]], i8 48, i8 49
; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT_1]]
; CHECK-NEXT: store i8 [[CONV_2]], i8* [[ARRAYIDX_2]], align 1
; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_1]], 1
-; CHECK-NEXT: [[TMP31:%.*]] = trunc i64 [[INDVARS_IV_NEXT_2]] to i32
-; CHECK-NEXT: [[SHL_3:%.*]] = shl i32 1, [[TMP31]]
+; CHECK-NEXT: [[TMP30:%.*]] = trunc i64 [[INDVARS_IV_NEXT_2]] to i32
+; CHECK-NEXT: [[SHL_3:%.*]] = shl i32 1, [[TMP30]]
; CHECK-NEXT: [[AND_3:%.*]] = and i32 [[SHL_3]], [[X]]
; CHECK-NEXT: [[TOBOOL_3:%.*]] = icmp eq i32 [[AND_3]], 0
; CHECK-NEXT: [[CONV_3:%.*]] = select i1 [[TOBOOL_3]], i8 48, i8 49
; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT_2]]
; CHECK-NEXT: store i8 [[CONV_3]], i8* [[ARRAYIDX_3]], align 1
; CHECK-NEXT: [[INDVARS_IV_NEXT_3:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_2]], 1
-; CHECK-NEXT: [[TMP32:%.*]] = trunc i64 [[INDVARS_IV_NEXT_3]] to i32
-; CHECK-NEXT: [[SHL_4:%.*]] = shl i32 1, [[TMP32]]
+; CHECK-NEXT: [[TMP31:%.*]] = trunc i64 [[INDVARS_IV_NEXT_3]] to i32
+; CHECK-NEXT: [[SHL_4:%.*]] = shl i32 1, [[TMP31]]
; CHECK-NEXT: [[AND_4:%.*]] = and i32 [[SHL_4]], [[X]]
; CHECK-NEXT: [[TOBOOL_4:%.*]] = icmp eq i32 [[AND_4]], 0
; CHECK-NEXT: [[CONV_4:%.*]] = select i1 [[TOBOOL_4]], i8 48, i8 49
; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT_3]]
; CHECK-NEXT: store i8 [[CONV_4]], i8* [[ARRAYIDX_4]], align 1
; CHECK-NEXT: [[INDVARS_IV_NEXT_4:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_3]], 1
-; CHECK-NEXT: [[TMP33:%.*]] = trunc i64 [[INDVARS_IV_NEXT_4]] to i32
-; CHECK-NEXT: [[SHL_5:%.*]] = shl i32 1, [[TMP33]]
+; CHECK-NEXT: [[TMP32:%.*]] = trunc i64 [[INDVARS_IV_NEXT_4]] to i32
+; CHECK-NEXT: [[SHL_5:%.*]] = shl i32 1, [[TMP32]]
; CHECK-NEXT: [[AND_5:%.*]] = and i32 [[SHL_5]], [[X]]
; CHECK-NEXT: [[TOBOOL_5:%.*]] = icmp eq i32 [[AND_5]], 0
; CHECK-NEXT: [[CONV_5:%.*]] = select i1 [[TOBOOL_5]], i8 48, i8 49
; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT_4]]
; CHECK-NEXT: store i8 [[CONV_5]], i8* [[ARRAYIDX_5]], align 1
; CHECK-NEXT: [[INDVARS_IV_NEXT_5:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_4]], 1
-; CHECK-NEXT: [[TMP34:%.*]] = trunc i64 [[INDVARS_IV_NEXT_5]] to i32
-; CHECK-NEXT: [[SHL_6:%.*]] = shl i32 1, [[TMP34]]
+; CHECK-NEXT: [[TMP33:%.*]] = trunc i64 [[INDVARS_IV_NEXT_5]] to i32
+; CHECK-NEXT: [[SHL_6:%.*]] = shl i32 1, [[TMP33]]
; CHECK-NEXT: [[AND_6:%.*]] = and i32 [[SHL_6]], [[X]]
; CHECK-NEXT: [[TOBOOL_6:%.*]] = icmp eq i32 [[AND_6]], 0
; CHECK-NEXT: [[CONV_6:%.*]] = select i1 [[TOBOOL_6]], i8 48, i8 49
; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT_5]]
; CHECK-NEXT: store i8 [[CONV_6]], i8* [[ARRAYIDX_6]], align 1
; CHECK-NEXT: [[INDVARS_IV_NEXT_6:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_5]], 1
-; CHECK-NEXT: [[TMP35:%.*]] = trunc i64 [[INDVARS_IV_NEXT_6]] to i32
-; CHECK-NEXT: [[SHL_7:%.*]] = shl i32 1, [[TMP35]]
+; CHECK-NEXT: [[TMP34:%.*]] = trunc i64 [[INDVARS_IV_NEXT_6]] to i32
+; CHECK-NEXT: [[SHL_7:%.*]] = shl i32 1, [[TMP34]]
; CHECK-NEXT: [[AND_7:%.*]] = and i32 [[SHL_7]], [[X]]
; CHECK-NEXT: [[TOBOOL_7:%.*]] = icmp eq i32 [[AND_7]], 0
; CHECK-NEXT: [[CONV_7:%.*]] = select i1 [[TOBOOL_7]], i8 48, i8 49
; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[N_VEC]], -16
; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 4
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
-; CHECK-NEXT: [[XTRAITER1:%.*]] = and i64 [[TMP2]], 1
+; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP2]], 1
; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP1]], 1
; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK_UNR_LCSSA:%.*]], label [[VECTOR_PH_NEW:%.*]]
; CHECK: vector.ph.new:
-; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[TMP2]], [[XTRAITER1]]
+; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[TMP2]], [[XTRAITER]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[INDEX_NEXT_1:%.*]], [[VECTOR_BODY]] ]
; CHECK: middle.block.unr-lcssa:
; CHECK-NEXT: [[INDEX_UNR:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_UNR_PH]], [[MIDDLE_BLOCK_UNR_LCSSA_LOOPEXIT]] ]
; CHECK-NEXT: [[VEC_IND12_UNR:%.*]] = phi <16 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, [[VECTOR_PH]] ], [ [[VEC_IND12_UNR_PH]], [[MIDDLE_BLOCK_UNR_LCSSA_LOOPEXIT]] ]
-; CHECK-NEXT: [[LCMP_MOD2:%.*]] = icmp ne i64 [[XTRAITER1]], 0
-; CHECK-NEXT: br i1 [[LCMP_MOD2]], label [[VECTOR_BODY_EPIL_PREHEADER:%.*]], label [[MIDDLE_BLOCK:%.*]]
+; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[VECTOR_BODY_EPIL_PREHEADER:%.*]], label [[MIDDLE_BLOCK:%.*]]
; CHECK: vector.body.epil.preheader:
; CHECK-NEXT: br label [[VECTOR_BODY_EPIL:%.*]]
; CHECK: vector.body.epil:
-; CHECK-NEXT: [[INDEX_EPIL:%.*]] = phi i64 [ [[INDEX_UNR]], [[VECTOR_BODY_EPIL_PREHEADER]] ]
-; CHECK-NEXT: [[VEC_IND12_EPIL:%.*]] = phi <16 x i32> [ [[VEC_IND12_UNR]], [[VECTOR_BODY_EPIL_PREHEADER]] ]
-; CHECK-NEXT: [[TMP16:%.*]] = shl <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, [[VEC_IND12_EPIL]]
+; CHECK-NEXT: [[TMP16:%.*]] = shl <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, [[VEC_IND12_UNR]]
; CHECK-NEXT: [[TMP17:%.*]] = and <16 x i32> [[TMP16]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq <16 x i32> [[TMP17]], zeroinitializer
; CHECK-NEXT: [[TMP19:%.*]] = select <16 x i1> [[TMP18]], <16 x i8> <i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48>, <16 x i8> <i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49>
-; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDEX_EPIL]]
+; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDEX_UNR]]
; CHECK-NEXT: [[TMP21:%.*]] = bitcast i8* [[TMP20]] to <16 x i8>*
; CHECK-NEXT: store <16 x i8> [[TMP19]], <16 x i8>* [[TMP21]], align 1
-; CHECK-NEXT: [[INDEX_NEXT_EPIL:%.*]] = add i64 [[INDEX_EPIL]], 16
-; CHECK-NEXT: [[VEC_IND_NEXT13_EPIL:%.*]] = add <16 x i32> [[VEC_IND12_EPIL]], <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT_EPIL]], [[N_VEC]]
-; CHECK-NEXT: br label [[MIDDLE_BLOCK_EPILOG_LCSSA:%.*]]
-; CHECK: middle.block.epilog-lcssa:
; CHECK-NEXT: br label [[MIDDLE_BLOCK]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[WIDE_TRIP_COUNT]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY_PREHEADER]]
; CHECK: for.body.preheader:
; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
-; CHECK-NEXT: [[TMP23:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[INDVARS_IV_PH]]
-; CHECK-NEXT: [[TMP24:%.*]] = add i64 [[WIDE_TRIP_COUNT]], -1
-; CHECK-NEXT: [[TMP25:%.*]] = sub i64 [[TMP24]], [[INDVARS_IV_PH]]
-; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP23]], 7
-; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
-; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[FOR_BODY_PROL_PREHEADER:%.*]], label [[FOR_BODY_PROL_LOOPEXIT:%.*]]
+; CHECK-NEXT: [[TMP22:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[INDVARS_IV_PH]]
+; CHECK-NEXT: [[TMP23:%.*]] = add i64 [[WIDE_TRIP_COUNT]], -1
+; CHECK-NEXT: [[TMP24:%.*]] = sub i64 [[TMP23]], [[INDVARS_IV_PH]]
+; CHECK-NEXT: [[XTRAITER1:%.*]] = and i64 [[TMP22]], 7
+; CHECK-NEXT: [[LCMP_MOD2:%.*]] = icmp ne i64 [[XTRAITER1]], 0
+; CHECK-NEXT: br i1 [[LCMP_MOD2]], label [[FOR_BODY_PROL_PREHEADER:%.*]], label [[FOR_BODY_PROL_LOOPEXIT:%.*]]
; CHECK: for.body.prol.preheader:
; CHECK-NEXT: br label [[FOR_BODY_PROL:%.*]]
; CHECK: for.body.prol:
; CHECK-NEXT: [[INDVARS_IV_PROL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PROL:%.*]], [[FOR_BODY_PROL]] ], [ [[INDVARS_IV_PH]], [[FOR_BODY_PROL_PREHEADER]] ]
-; CHECK-NEXT: [[PROL_ITER:%.*]] = phi i64 [ [[XTRAITER]], [[FOR_BODY_PROL_PREHEADER]] ], [ [[PROL_ITER_SUB:%.*]], [[FOR_BODY_PROL]] ]
-; CHECK-NEXT: [[TMP26:%.*]] = trunc i64 [[INDVARS_IV_PROL]] to i32
-; CHECK-NEXT: [[SHL_PROL:%.*]] = shl i32 1, [[TMP26]]
+; CHECK-NEXT: [[PROL_ITER:%.*]] = phi i64 [ [[XTRAITER1]], [[FOR_BODY_PROL_PREHEADER]] ], [ [[PROL_ITER_SUB:%.*]], [[FOR_BODY_PROL]] ]
+; CHECK-NEXT: [[TMP25:%.*]] = trunc i64 [[INDVARS_IV_PROL]] to i32
+; CHECK-NEXT: [[SHL_PROL:%.*]] = shl i32 1, [[TMP25]]
; CHECK-NEXT: [[AND_PROL:%.*]] = and i32 [[SHL_PROL]], [[X]]
; CHECK-NEXT: [[TOBOOL_PROL:%.*]] = icmp eq i32 [[AND_PROL]], 0
; CHECK-NEXT: [[CONV_PROL:%.*]] = select i1 [[TOBOOL_PROL]], i8 48, i8 49
; CHECK-NEXT: [[EXITCOND_PROL:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_PROL]], [[WIDE_TRIP_COUNT]]
; CHECK-NEXT: [[PROL_ITER_SUB]] = sub i64 [[PROL_ITER]], 1
; CHECK-NEXT: [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_SUB]], 0
-; CHECK-NEXT: br i1 [[PROL_ITER_CMP]], label [[FOR_BODY_PROL]], label [[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop !0
+; CHECK-NEXT: br i1 [[PROL_ITER_CMP]], label [[FOR_BODY_PROL]], label [[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: for.body.prol.loopexit.unr-lcssa:
; CHECK-NEXT: [[INDVARS_IV_UNR_PH:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PROL]], [[FOR_BODY_PROL]] ]
; CHECK-NEXT: br label [[FOR_BODY_PROL_LOOPEXIT]]
; CHECK: for.body.prol.loopexit:
; CHECK-NEXT: [[INDVARS_IV_UNR:%.*]] = phi i64 [ [[INDVARS_IV_PH]], [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_UNR_PH]], [[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA]] ]
-; CHECK-NEXT: [[TMP27:%.*]] = icmp ult i64 [[TMP25]], 7
-; CHECK-NEXT: br i1 [[TMP27]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY_PREHEADER_NEW:%.*]]
+; CHECK-NEXT: [[TMP26:%.*]] = icmp ult i64 [[TMP24]], 7
+; CHECK-NEXT: br i1 [[TMP26]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY_PREHEADER_NEW:%.*]]
; CHECK: for.body.preheader.new:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_UNR]], [[FOR_BODY_PREHEADER_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT: [[TMP28:%.*]] = trunc i64 [[INDVARS_IV]] to i32
-; CHECK-NEXT: [[SHL:%.*]] = shl i32 1, [[TMP28]]
+; CHECK-NEXT: [[TMP27:%.*]] = trunc i64 [[INDVARS_IV]] to i32
+; CHECK-NEXT: [[SHL:%.*]] = shl i32 1, [[TMP27]]
; CHECK-NEXT: [[AND:%.*]] = and i32 [[SHL]], [[X]]
; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[AND]], 0
; CHECK-NEXT: [[CONV:%.*]] = select i1 [[TOBOOL]], i8 48, i8 49
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV]]
; CHECK-NEXT: store i8 [[CONV]], i8* [[ARRAYIDX]], align 1
; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT: [[TMP29:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
-; CHECK-NEXT: [[SHL_1:%.*]] = shl i32 1, [[TMP29]]
+; CHECK-NEXT: [[TMP28:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
+; CHECK-NEXT: [[SHL_1:%.*]] = shl i32 1, [[TMP28]]
; CHECK-NEXT: [[AND_1:%.*]] = and i32 [[SHL_1]], [[X]]
; CHECK-NEXT: [[TOBOOL_1:%.*]] = icmp eq i32 [[AND_1]], 0
; CHECK-NEXT: [[CONV_1:%.*]] = select i1 [[TOBOOL_1]], i8 48, i8 49
; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT]]
; CHECK-NEXT: store i8 [[CONV_1]], i8* [[ARRAYIDX_1]], align 1
; CHECK-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT]], 1
-; CHECK-NEXT: [[TMP30:%.*]] = trunc i64 [[INDVARS_IV_NEXT_1]] to i32
-; CHECK-NEXT: [[SHL_2:%.*]] = shl i32 1, [[TMP30]]
+; CHECK-NEXT: [[TMP29:%.*]] = trunc i64 [[INDVARS_IV_NEXT_1]] to i32
+; CHECK-NEXT: [[SHL_2:%.*]] = shl i32 1, [[TMP29]]
; CHECK-NEXT: [[AND_2:%.*]] = and i32 [[SHL_2]], [[X]]
; CHECK-NEXT: [[TOBOOL_2:%.*]] = icmp eq i32 [[AND_2]], 0
; CHECK-NEXT: [[CONV_2:%.*]] = select i1 [[TOBOOL_2]], i8 48, i8 49
; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT_1]]
; CHECK-NEXT: store i8 [[CONV_2]], i8* [[ARRAYIDX_2]], align 1
; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_1]], 1
-; CHECK-NEXT: [[TMP31:%.*]] = trunc i64 [[INDVARS_IV_NEXT_2]] to i32
-; CHECK-NEXT: [[SHL_3:%.*]] = shl i32 1, [[TMP31]]
+; CHECK-NEXT: [[TMP30:%.*]] = trunc i64 [[INDVARS_IV_NEXT_2]] to i32
+; CHECK-NEXT: [[SHL_3:%.*]] = shl i32 1, [[TMP30]]
; CHECK-NEXT: [[AND_3:%.*]] = and i32 [[SHL_3]], [[X]]
; CHECK-NEXT: [[TOBOOL_3:%.*]] = icmp eq i32 [[AND_3]], 0
; CHECK-NEXT: [[CONV_3:%.*]] = select i1 [[TOBOOL_3]], i8 48, i8 49
; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT_2]]
; CHECK-NEXT: store i8 [[CONV_3]], i8* [[ARRAYIDX_3]], align 1
; CHECK-NEXT: [[INDVARS_IV_NEXT_3:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_2]], 1
-; CHECK-NEXT: [[TMP32:%.*]] = trunc i64 [[INDVARS_IV_NEXT_3]] to i32
-; CHECK-NEXT: [[SHL_4:%.*]] = shl i32 1, [[TMP32]]
+; CHECK-NEXT: [[TMP31:%.*]] = trunc i64 [[INDVARS_IV_NEXT_3]] to i32
+; CHECK-NEXT: [[SHL_4:%.*]] = shl i32 1, [[TMP31]]
; CHECK-NEXT: [[AND_4:%.*]] = and i32 [[SHL_4]], [[X]]
; CHECK-NEXT: [[TOBOOL_4:%.*]] = icmp eq i32 [[AND_4]], 0
; CHECK-NEXT: [[CONV_4:%.*]] = select i1 [[TOBOOL_4]], i8 48, i8 49
; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT_3]]
; CHECK-NEXT: store i8 [[CONV_4]], i8* [[ARRAYIDX_4]], align 1
; CHECK-NEXT: [[INDVARS_IV_NEXT_4:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_3]], 1
-; CHECK-NEXT: [[TMP33:%.*]] = trunc i64 [[INDVARS_IV_NEXT_4]] to i32
-; CHECK-NEXT: [[SHL_5:%.*]] = shl i32 1, [[TMP33]]
+; CHECK-NEXT: [[TMP32:%.*]] = trunc i64 [[INDVARS_IV_NEXT_4]] to i32
+; CHECK-NEXT: [[SHL_5:%.*]] = shl i32 1, [[TMP32]]
; CHECK-NEXT: [[AND_5:%.*]] = and i32 [[SHL_5]], [[X]]
; CHECK-NEXT: [[TOBOOL_5:%.*]] = icmp eq i32 [[AND_5]], 0
; CHECK-NEXT: [[CONV_5:%.*]] = select i1 [[TOBOOL_5]], i8 48, i8 49
; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT_4]]
; CHECK-NEXT: store i8 [[CONV_5]], i8* [[ARRAYIDX_5]], align 1
; CHECK-NEXT: [[INDVARS_IV_NEXT_5:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_4]], 1
-; CHECK-NEXT: [[TMP34:%.*]] = trunc i64 [[INDVARS_IV_NEXT_5]] to i32
-; CHECK-NEXT: [[SHL_6:%.*]] = shl i32 1, [[TMP34]]
+; CHECK-NEXT: [[TMP33:%.*]] = trunc i64 [[INDVARS_IV_NEXT_5]] to i32
+; CHECK-NEXT: [[SHL_6:%.*]] = shl i32 1, [[TMP33]]
; CHECK-NEXT: [[AND_6:%.*]] = and i32 [[SHL_6]], [[X]]
; CHECK-NEXT: [[TOBOOL_6:%.*]] = icmp eq i32 [[AND_6]], 0
; CHECK-NEXT: [[CONV_6:%.*]] = select i1 [[TOBOOL_6]], i8 48, i8 49
; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT_5]]
; CHECK-NEXT: store i8 [[CONV_6]], i8* [[ARRAYIDX_6]], align 1
; CHECK-NEXT: [[INDVARS_IV_NEXT_6:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_5]], 1
-; CHECK-NEXT: [[TMP35:%.*]] = trunc i64 [[INDVARS_IV_NEXT_6]] to i32
-; CHECK-NEXT: [[SHL_7:%.*]] = shl i32 1, [[TMP35]]
+; CHECK-NEXT: [[TMP34:%.*]] = trunc i64 [[INDVARS_IV_NEXT_6]] to i32
+; CHECK-NEXT: [[SHL_7:%.*]] = shl i32 1, [[TMP34]]
; CHECK-NEXT: [[AND_7:%.*]] = and i32 [[SHL_7]], [[X]]
; CHECK-NEXT: [[TOBOOL_7:%.*]] = icmp eq i32 [[AND_7]], 0
; CHECK-NEXT: [[CONV_7:%.*]] = select i1 [[TOBOOL_7]], i8 48, i8 49
; EPILOG-BLOCK: exit1:
; EPILOG-BLOCK-NEXT: ret void
; EPILOG-BLOCK: exit2.loopexit.unr-lcssa.loopexit:
-; EPILOG-BLOCK-NEXT: %iv.unr.ph = phi i64 [ %iv_next.1, %loop_latch.1 ]
; EPILOG-BLOCK-NEXT: br label %exit2.loopexit.unr-lcssa
; EPILOG-BLOCK: exit2.loopexit.unr-lcssa:
-; EPILOG-BLOCK-NEXT: %iv.unr = phi i64 [ 0, %entry ], [ %iv.unr.ph, %exit2.loopexit.unr-lcssa.loopexit ]
; EPILOG-BLOCK-NEXT: %lcmp.mod = icmp ne i64 %xtraiter, 0
; EPILOG-BLOCK-NEXT: br i1 %lcmp.mod, label %loop_header.epil.preheader, label %exit2.loopexit
; EPILOG-BLOCK: loop_header.epil.preheader:
; EPILOG-BLOCK-NEXT: br label %loop_header.epil
; EPILOG-BLOCK: loop_header.epil:
-; EPILOG-BLOCK-NEXT: %iv.epil = phi i64 [ %iv.unr, %loop_header.epil.preheader ]
; EPILOG-BLOCK-NEXT: br i1 %cond, label %loop_latch.epil, label %loop_exiting_bb1.epil
; EPILOG-BLOCK: loop_exiting_bb1.epil:
; EPILOG-BLOCK-NEXT: br i1 false, label %loop_exiting_bb2.epil, label %exit1
; EPILOG-BLOCK: loop_exiting_bb2.epil:
; EPILOG-BLOCK-NEXT: br i1 false, label %loop_latch.epil, label %exit3
; EPILOG-BLOCK: loop_latch.epil:
-; EPILOG-BLOCK-NEXT: %iv_next.epil = add i64 %iv.epil, 1
-; EPILOG-BLOCK-NEXT: %cmp.epil = icmp ne i64 %iv_next.epil, %trip
-; EPILOG-BLOCK-NEXT: br label %exit2.loopexit.epilog-lcssa
-; EPILOG-BLOCK: exit2.loopexit.epilog-lcssa:
; EPILOG-BLOCK-NEXT: br label %exit2.loopexit
; EPILOG-BLOCK: exit2.loopexit:
; EPILOG-BLOCK-NEXT: ret void
; PROLOG-BLOCK: loop_exiting_bb2.prol:
; PROLOG-BLOCK-NEXT: br i1 false, label %loop_latch.prol, label %exit3
; PROLOG-BLOCK: loop_latch.prol:
-; PROLOG-BLOCK-NEXT: %iv_next.prol = add i64 0, 1
-; PROLOG-BLOCK-NEXT: %cmp.prol = icmp ne i64 %iv_next.prol, %trip
; PROLOG-BLOCK-NEXT: br label %loop_header.prol.loopexit
; PROLOG-BLOCK: loop_header.prol.loopexit:
-; PROLOG-BLOCK-NEXT: %iv.unr = phi i64 [ 0, %entry ], [ %iv_next.prol, %loop_latch.prol ]
+; PROLOG-BLOCK-NEXT: %iv.unr = phi i64 [ 0, %entry ], [ 1, %loop_latch.prol ]
; PROLOG-BLOCK-NEXT: %1 = icmp ult i64 %0, 1
; PROLOG-BLOCK-NEXT: br i1 %1, label %exit2.loopexit, label %entry.new
; PROLOG-BLOCK: entry.new:
; EPILOG-BLOCK: header.epil.preheader:
; EPILOG-BLOCK-NEXT: br label %header.epil
; EPILOG-BLOCK: header.epil:
-; EPILOG-BLOCK-NEXT: %indvars.iv.epil = phi i64 [ %indvars.iv.unr, %header.epil.preheader ]
-; EPILOG-BLOCK-NEXT: %sum.02.epil = phi i32 [ %sum.02.unr, %header.epil.preheader ]
; EPILOG-BLOCK-NEXT: br i1 false, label %for.exit2, label %for.exiting_block.epil
; EPILOG-BLOCK: for.exiting_block.epil:
; EPILOG-BLOCK-NEXT: %cmp.epil = icmp eq i64 %n, 42
; EPILOG-BLOCK-NEXT: br i1 %cmp.epil, label %for.exit2, label %for.body.epil
; EPILOG-BLOCK: for.body.epil:
-; EPILOG-BLOCK-NEXT: %arrayidx.epil = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.epil
+; EPILOG-BLOCK-NEXT: %arrayidx.epil = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.unr
; EPILOG-BLOCK-NEXT: %3 = load i32, i32* %arrayidx.epil, align 4
-; EPILOG-BLOCK-NEXT: %add.epil = add nsw i32 %3, %sum.02.epil
-; EPILOG-BLOCK-NEXT: %indvars.iv.next.epil = add i64 %indvars.iv.epil, 1
-; EPILOG-BLOCK-NEXT: %exitcond.epil = icmp eq i64 %indvars.iv.next.epil, %n
-; EPILOG-BLOCK-NEXT: br label %for.end.epilog-lcssa
-; EPILOG-BLOCK: for.end.epilog-lcssa:
+; EPILOG-BLOCK-NEXT: %add.epil = add nsw i32 %3, %sum.02.unr
; EPILOG-BLOCK-NEXT: br label %for.end
; EPILOG-BLOCK: for.end:
-; EPILOG-BLOCK-NEXT: %sum.0.lcssa = phi i32 [ %sum.0.lcssa.ph, %for.end.unr-lcssa ], [ %add.epil, %for.end.epilog-lcssa ]
+; EPILOG-BLOCK-NEXT: %sum.0.lcssa = phi i32 [ %sum.0.lcssa.ph, %for.end.unr-lcssa ], [ %add.epil, %for.body.epil ]
; EPILOG-BLOCK-NEXT: ret i32 %sum.0.lcssa
; EPILOG-BLOCK: for.exit2.loopexit:
; EPILOG-BLOCK-NEXT: %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %for.body ], [ 42, %for.exiting_block.1 ]
; EPILOG-BLOCK-NEXT: br label %for.exit2
; EPILOG-BLOCK: for.exit2:
-; EPILOG-BLOCK-NEXT: %retval = phi i32 [ %sum.02.epil, %header.epil ], [ 42, %for.exiting_block.epil ], [ %retval.ph, %for.exit2.loopexit ]
+; EPILOG-BLOCK-NEXT: %retval = phi i32 [ %sum.02.unr, %header.epil ], [ 42, %for.exiting_block.epil ], [ %retval.ph, %for.exit2.loopexit ]
; EPILOG-BLOCK-NEXT: ret i32 %retval
; EPILOG-BLOCK: for.exiting_block.1:
; EPILOG-BLOCK-NEXT: %cmp.1 = icmp eq i64 %n, 42
; PROLOG-BLOCK-NEXT: %cmp.prol = icmp eq i64 %n, 42
; PROLOG-BLOCK-NEXT: br i1 %cmp.prol, label %for.exit2, label %for.body.prol
; PROLOG-BLOCK: for.body.prol:
-; PROLOG-BLOCK-NEXT: %arrayidx.prol = getelementptr inbounds i32, i32* %a, i64 0
-; PROLOG-BLOCK-NEXT: %1 = load i32, i32* %arrayidx.prol, align 4
-; PROLOG-BLOCK-NEXT: %add.prol = add nsw i32 %1, 0
-; PROLOG-BLOCK-NEXT: %indvars.iv.next.prol = add i64 0, 1
-; PROLOG-BLOCK-NEXT: %exitcond.prol = icmp eq i64 %indvars.iv.next.prol, %n
+; PROLOG-BLOCK-NEXT: %1 = load i32, i32* %a, align 4
; PROLOG-BLOCK-NEXT: br label %header.prol.loopexit
; PROLOG-BLOCK: header.prol.loopexit:
-; PROLOG-BLOCK-NEXT: %sum.0.lcssa.unr = phi i32 [ undef, %entry ], [ %add.prol, %for.body.prol ]
-; PROLOG-BLOCK-NEXT: %indvars.iv.unr = phi i64 [ 0, %entry ], [ %indvars.iv.next.prol, %for.body.prol ]
-; PROLOG-BLOCK-NEXT: %sum.02.unr = phi i32 [ 0, %entry ], [ %add.prol, %for.body.prol ]
+; PROLOG-BLOCK-NEXT: %sum.0.lcssa.unr = phi i32 [ undef, %entry ], [ %1, %for.body.prol ]
+; PROLOG-BLOCK-NEXT: %indvars.iv.unr = phi i64 [ 0, %entry ], [ 1, %for.body.prol ]
+; PROLOG-BLOCK-NEXT: %sum.02.unr = phi i32 [ 0, %entry ], [ %1, %for.body.prol ]
; PROLOG-BLOCK-NEXT: %2 = icmp ult i64 %0, 1
; PROLOG-BLOCK-NEXT: br i1 %2, label %for.end, label %entry.new
; PROLOG-BLOCK: entry.new:
; EPILOG-BLOCK: exit1:
; EPILOG-BLOCK-NEXT: ret void
; EPILOG-BLOCK: exit2.loopexit.unr-lcssa.loopexit:
-; EPILOG-BLOCK-NEXT: %iv.unr.ph = phi i64 [ %iv_next.1, %loop_latch.1 ]
; EPILOG-BLOCK-NEXT: %sum.unr.ph = phi i64 [ %sum.next.1, %loop_latch.1 ]
; EPILOG-BLOCK-NEXT: br label %exit2.loopexit.unr-lcssa
; EPILOG-BLOCK: exit2.loopexit.unr-lcssa:
-; EPILOG-BLOCK-NEXT: %iv.unr = phi i64 [ 0, %entry ], [ %iv.unr.ph, %exit2.loopexit.unr-lcssa.loopexit ]
; EPILOG-BLOCK-NEXT: %sum.unr = phi i64 [ 0, %entry ], [ %sum.unr.ph, %exit2.loopexit.unr-lcssa.loopexit ]
; EPILOG-BLOCK-NEXT: %lcmp.mod = icmp ne i64 %xtraiter, 0
; EPILOG-BLOCK-NEXT: br i1 %lcmp.mod, label %loop_header.epil.preheader, label %exit2.loopexit
; EPILOG-BLOCK: loop_header.epil.preheader:
; EPILOG-BLOCK-NEXT: br label %loop_header.epil
; EPILOG-BLOCK: loop_header.epil:
-; EPILOG-BLOCK-NEXT: %iv.epil = phi i64 [ %iv.unr, %loop_header.epil.preheader ]
-; EPILOG-BLOCK-NEXT: %sum.epil = phi i64 [ %sum.unr, %loop_header.epil.preheader ]
; EPILOG-BLOCK-NEXT: br i1 undef, label %loop_latch.epil, label %loop_exiting_bb1.epil
; EPILOG-BLOCK: loop_exiting_bb1.epil:
-; EPILOG-BLOCK-NEXT: switch i64 %sum.epil, label %loop_latch.epil [
+; EPILOG-BLOCK-NEXT: switch i64 %sum.unr, label %loop_latch.epil [
; EPILOG-BLOCK-NEXT: i64 24, label %exit1
; EPILOG-BLOCK-NEXT: i64 42, label %exit3
; EPILOG-BLOCK-NEXT: ]
; EPILOG-BLOCK: loop_latch.epil:
-; EPILOG-BLOCK-NEXT: %iv_next.epil = add nuw nsw i64 %iv.epil, 1
-; EPILOG-BLOCK-NEXT: %sum.next.epil = add i64 %sum.epil, %add
-; EPILOG-BLOCK-NEXT: %cmp.epil = icmp ne i64 %iv_next.epil, %trip
-; EPILOG-BLOCK-NEXT: br label %exit2.loopexit.epilog-lcssa
-; EPILOG-BLOCK: exit2.loopexit.epilog-lcssa:
; EPILOG-BLOCK-NEXT: br label %exit2.loopexit
; EPILOG-BLOCK: exit2.loopexit:
; EPILOG-BLOCK-NEXT: ret void
; PROLOG-BLOCK-NEXT: i64 42, label %exit3
; PROLOG-BLOCK-NEXT: ]
; PROLOG-BLOCK: loop_latch.prol:
-; PROLOG-BLOCK-NEXT: %iv_next.prol = add nuw nsw i64 0, 1
-; PROLOG-BLOCK-NEXT: %sum.next.prol = add i64 0, %add
-; PROLOG-BLOCK-NEXT: %cmp.prol = icmp ne i64 %iv_next.prol, %trip
; PROLOG-BLOCK-NEXT: br label %loop_header.prol.loopexit
; PROLOG-BLOCK: loop_header.prol.loopexit:
-; PROLOG-BLOCK-NEXT: %iv.unr = phi i64 [ 0, %entry ], [ %iv_next.prol, %loop_latch.prol ]
-; PROLOG-BLOCK-NEXT: %sum.unr = phi i64 [ 0, %entry ], [ %sum.next.prol, %loop_latch.prol ]
+; PROLOG-BLOCK-NEXT: %iv.unr = phi i64 [ 0, %entry ], [ 1, %loop_latch.prol ]
+; PROLOG-BLOCK-NEXT: %sum.unr = phi i64 [ 0, %entry ], [ %add, %loop_latch.prol ]
; PROLOG-BLOCK-NEXT: %1 = icmp ult i64 %0, 1
; PROLOG-BLOCK-NEXT: br i1 %1, label %exit2.loopexit, label %entry.new
; PROLOG-BLOCK: entry.new:
; EPILOG-BLOCK: header.epil.preheader:
; EPILOG-BLOCK-NEXT: br label %header.epil
; EPILOG-BLOCK: header.epil:
-; EPILOG-BLOCK-NEXT: %indvars.iv.epil = phi i64 [ %indvars.iv.unr, %header.epil.preheader ]
-; EPILOG-BLOCK-NEXT: %sum.02.epil = phi i32 [ %sum.02.unr, %header.epil.preheader ]
; EPILOG-BLOCK-NEXT: br i1 %cond, label %latchExit.epilog-lcssa, label %for.exiting_block.epil
; EPILOG-BLOCK: for.exiting_block.epil:
; EPILOG-BLOCK-NEXT: %cmp.epil = icmp eq i64 %n, 42
; EPILOG-BLOCK-NEXT: br i1 %cmp.epil, label %for.exit2, label %latch.epil
; EPILOG-BLOCK: latch.epil:
-; EPILOG-BLOCK-NEXT: %arrayidx.epil = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.epil
+; EPILOG-BLOCK-NEXT: %arrayidx.epil = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.unr
; EPILOG-BLOCK-NEXT: %3 = load i32, i32* %arrayidx.epil, align 4
-; EPILOG-BLOCK-NEXT: %add.epil = add nsw i32 %3, %sum.02.epil
-; EPILOG-BLOCK-NEXT: %indvars.iv.next.epil = add i64 %indvars.iv.epil, 1
-; EPILOG-BLOCK-NEXT: %exitcond.epil = icmp eq i64 %indvars.iv.next.epil, %n
+; EPILOG-BLOCK-NEXT: %add.epil = add nsw i32 %3, %sum.02.unr
; EPILOG-BLOCK-NEXT: br label %latchExit.epilog-lcssa
; EPILOG-BLOCK: latchExit.epilog-lcssa.loopexit:
; EPILOG-BLOCK-NEXT: %result.ph1.ph = phi i32 [ 0, %header ], [ 0, %latch ]
; PROLOG-BLOCK-NEXT: %cmp.prol = icmp eq i64 %n, 42
; PROLOG-BLOCK-NEXT: br i1 %cmp.prol, label %for.exit2, label %latch.prol
; PROLOG-BLOCK: latch.prol:
-; PROLOG-BLOCK-NEXT: %arrayidx.prol = getelementptr inbounds i32, i32* %a, i64 0
-; PROLOG-BLOCK-NEXT: %1 = load i32, i32* %arrayidx.prol, align 4
-; PROLOG-BLOCK-NEXT: %add.prol = add nsw i32 %1, 0
-; PROLOG-BLOCK-NEXT: %indvars.iv.next.prol = add i64 0, 1
-; PROLOG-BLOCK-NEXT: %exitcond.prol = icmp eq i64 %indvars.iv.next.prol, %n
+; PROLOG-BLOCK-NEXT: %1 = load i32, i32* %a, align 4
; PROLOG-BLOCK-NEXT: br label %header.prol.loopexit
; PROLOG-BLOCK: header.prol.loopexit:
-; PROLOG-BLOCK-NEXT: %result.unr = phi i32 [ undef, %entry ], [ %add.prol, %latch.prol ]
-; PROLOG-BLOCK-NEXT: %indvars.iv.unr = phi i64 [ 0, %entry ], [ %indvars.iv.next.prol, %latch.prol ]
-; PROLOG-BLOCK-NEXT: %sum.02.unr = phi i32 [ 0, %entry ], [ %add.prol, %latch.prol ]
+; PROLOG-BLOCK-NEXT: %result.unr = phi i32 [ undef, %entry ], [ %1, %latch.prol ]
+; PROLOG-BLOCK-NEXT: %indvars.iv.unr = phi i64 [ 0, %entry ], [ 1, %latch.prol ]
+; PROLOG-BLOCK-NEXT: %sum.02.unr = phi i32 [ 0, %entry ], [ %1, %latch.prol ]
; PROLOG-BLOCK-NEXT: %2 = icmp ult i64 %0, 1
; PROLOG-BLOCK-NEXT: br i1 %2, label %latchExit, label %entry.new
; PROLOG-BLOCK: entry.new:
; EPILOG-BLOCK: header.epil.preheader:
; EPILOG-BLOCK-NEXT: br label %header.epil
; EPILOG-BLOCK: header.epil:
-; EPILOG-BLOCK-NEXT: %indvars.iv.epil = phi i64 [ %indvars.iv.unr, %header.epil.preheader ]
-; EPILOG-BLOCK-NEXT: %sum.02.epil = phi i32 [ %sum.02.unr, %header.epil.preheader ]
; EPILOG-BLOCK-NEXT: br i1 %cond, label %for.exit2, label %for.exiting_block.epil
; EPILOG-BLOCK: for.exiting_block.epil:
; EPILOG-BLOCK-NEXT: %cmp.epil = icmp eq i64 %n, 42
; EPILOG-BLOCK-NEXT: br i1 %cmp.epil, label %latchExit.epilog-lcssa, label %latch.epil
; EPILOG-BLOCK: latch.epil:
-; EPILOG-BLOCK-NEXT: %arrayidx.epil = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.epil
+; EPILOG-BLOCK-NEXT: %arrayidx.epil = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.unr
; EPILOG-BLOCK-NEXT: %3 = load i32, i32* %arrayidx.epil, align 4
-; EPILOG-BLOCK-NEXT: %add.epil = add nsw i32 %3, %sum.02.epil
-; EPILOG-BLOCK-NEXT: %indvars.iv.next.epil = add i64 %indvars.iv.epil, 1
-; EPILOG-BLOCK-NEXT: %exitcond.epil = icmp eq i64 %indvars.iv.next.epil, %n
+; EPILOG-BLOCK-NEXT: %add.epil = add nsw i32 %3, %sum.02.unr
; EPILOG-BLOCK-NEXT: br label %latchExit.epilog-lcssa
; EPILOG-BLOCK: latchExit.epilog-lcssa.loopexit:
; EPILOG-BLOCK-NEXT: %result.ph1.ph = phi i32 [ 2, %for.exiting_block ], [ 2, %for.exiting_block.1 ]
; PROLOG-BLOCK-NEXT: %cmp.prol = icmp eq i64 %n, 42
; PROLOG-BLOCK-NEXT: br i1 %cmp.prol, label %latchExit.unr-lcssa, label %latch.prol
; PROLOG-BLOCK: latch.prol:
-; PROLOG-BLOCK-NEXT: %arrayidx.prol = getelementptr inbounds i32, i32* %a, i64 0
-; PROLOG-BLOCK-NEXT: %1 = load i32, i32* %arrayidx.prol, align 4
-; PROLOG-BLOCK-NEXT: %add.prol = add nsw i32 %1, 0
-; PROLOG-BLOCK-NEXT: %indvars.iv.next.prol = add i64 0, 1
-; PROLOG-BLOCK-NEXT: %exitcond.prol = icmp eq i64 %indvars.iv.next.prol, %n
+; PROLOG-BLOCK-NEXT: %1 = load i32, i32* %a, align 4
; PROLOG-BLOCK-NEXT: br label %header.prol.loopexit
; PROLOG-BLOCK: header.prol.loopexit:
-; PROLOG-BLOCK-NEXT: %result.unr = phi i32 [ undef, %entry ], [ %add.prol, %latch.prol ]
-; PROLOG-BLOCK-NEXT: %indvars.iv.unr = phi i64 [ 0, %entry ], [ %indvars.iv.next.prol, %latch.prol ]
-; PROLOG-BLOCK-NEXT: %sum.02.unr = phi i32 [ 0, %entry ], [ %add.prol, %latch.prol ]
+; PROLOG-BLOCK-NEXT: %result.unr = phi i32 [ undef, %entry ], [ %1, %latch.prol ]
+; PROLOG-BLOCK-NEXT: %indvars.iv.unr = phi i64 [ 0, %entry ], [ 1, %latch.prol ]
+; PROLOG-BLOCK-NEXT: %sum.02.unr = phi i32 [ 0, %entry ], [ %1, %latch.prol ]
; PROLOG-BLOCK-NEXT: %2 = icmp ult i64 %0, 1
; PROLOG-BLOCK-NEXT: br i1 %2, label %latchExit, label %entry.new
; PROLOG-BLOCK: entry.new:
; EPILOG-BLOCK: header.epil.preheader:
; EPILOG-BLOCK-NEXT: br label %header.epil
; EPILOG-BLOCK: header.epil:
-; EPILOG-BLOCK-NEXT: %indvars.iv.epil = phi i64 [ %indvars.iv.unr, %header.epil.preheader ]
-; EPILOG-BLOCK-NEXT: %sum.02.epil = phi i32 [ %sum.02.unr, %header.epil.preheader ]
; EPILOG-BLOCK-NEXT: br i1 %cond, label %for.exit2, label %for.exiting_block.epil
; EPILOG-BLOCK: for.exiting_block.epil:
; EPILOG-BLOCK-NEXT: %cmp.epil = icmp eq i64 %n, 42
; EPILOG-BLOCK-NEXT: br i1 %cmp.epil, label %latchExit.epilog-lcssa, label %latch.epil
; EPILOG-BLOCK: latch.epil:
-; EPILOG-BLOCK-NEXT: %arrayidx.epil = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.epil
+; EPILOG-BLOCK-NEXT: %arrayidx.epil = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.unr
; EPILOG-BLOCK-NEXT: %3 = load i32, i32* %arrayidx.epil, align 4
-; EPILOG-BLOCK-NEXT: %add.epil = add nsw i32 %3, %sum.02.epil
-; EPILOG-BLOCK-NEXT: %indvars.iv.next.epil = add i64 %indvars.iv.epil, 1
-; EPILOG-BLOCK-NEXT: %exitcond.epil = icmp eq i64 %indvars.iv.next.epil, %n
+; EPILOG-BLOCK-NEXT: %add.epil = add nsw i32 %3, %sum.02.unr
; EPILOG-BLOCK-NEXT: br label %latchExit.epilog-lcssa
; EPILOG-BLOCK: latchExit.epilog-lcssa.loopexit:
; EPILOG-BLOCK-NEXT: %result.ph1.ph = phi i32 [ %sum.02, %for.exiting_block ], [ %add, %for.exiting_block.1 ]
; EPILOG-BLOCK-NEXT: br label %latchExit.epilog-lcssa
; EPILOG-BLOCK: latchExit.epilog-lcssa:
-; EPILOG-BLOCK-NEXT: %result.ph1 = phi i32 [ %add.epil, %latch.epil ], [ %sum.02.epil, %for.exiting_block.epil ], [ %result.ph1.ph, %latchExit.epilog-lcssa.loopexit ]
+; EPILOG-BLOCK-NEXT: %result.ph1 = phi i32 [ %add.epil, %latch.epil ], [ %sum.02.unr, %for.exiting_block.epil ], [ %result.ph1.ph, %latchExit.epilog-lcssa.loopexit ]
; EPILOG-BLOCK-NEXT: br label %latchExit
; EPILOG-BLOCK: latchExit:
; EPILOG-BLOCK-NEXT: %result = phi i32 [ %result.ph, %latchExit.unr-lcssa ], [ %result.ph1, %latchExit.epilog-lcssa ]
; PROLOG-BLOCK-NEXT: %cmp.prol = icmp eq i64 %n, 42
; PROLOG-BLOCK-NEXT: br i1 %cmp.prol, label %latchExit.unr-lcssa, label %latch.prol
; PROLOG-BLOCK: latch.prol:
-; PROLOG-BLOCK-NEXT: %arrayidx.prol = getelementptr inbounds i32, i32* %a, i64 0
-; PROLOG-BLOCK-NEXT: %1 = load i32, i32* %arrayidx.prol, align 4
-; PROLOG-BLOCK-NEXT: %add.prol = add nsw i32 %1, 0
-; PROLOG-BLOCK-NEXT: %indvars.iv.next.prol = add i64 0, 1
-; PROLOG-BLOCK-NEXT: %exitcond.prol = icmp eq i64 %indvars.iv.next.prol, %n
+; PROLOG-BLOCK-NEXT: %1 = load i32, i32* %a, align 4
; PROLOG-BLOCK-NEXT: br label %header.prol.loopexit
; PROLOG-BLOCK: header.prol.loopexit:
-; PROLOG-BLOCK-NEXT: %result.unr = phi i32 [ undef, %entry ], [ %add.prol, %latch.prol ]
-; PROLOG-BLOCK-NEXT: %indvars.iv.unr = phi i64 [ 0, %entry ], [ %indvars.iv.next.prol, %latch.prol ]
-; PROLOG-BLOCK-NEXT: %sum.02.unr = phi i32 [ 0, %entry ], [ %add.prol, %latch.prol ]
+; PROLOG-BLOCK-NEXT: %result.unr = phi i32 [ undef, %entry ], [ %1, %latch.prol ]
+; PROLOG-BLOCK-NEXT: %indvars.iv.unr = phi i64 [ 0, %entry ], [ 1, %latch.prol ]
+; PROLOG-BLOCK-NEXT: %sum.02.unr = phi i32 [ 0, %entry ], [ %1, %latch.prol ]
; PROLOG-BLOCK-NEXT: %2 = icmp ult i64 %0, 1
; PROLOG-BLOCK-NEXT: br i1 %2, label %latchExit, label %entry.new
; PROLOG-BLOCK: entry.new:
; EPILOG-BLOCK: header.epil.preheader:
; EPILOG-BLOCK-NEXT: br label %header.epil
; EPILOG-BLOCK: header.epil:
-; EPILOG-BLOCK-NEXT: %indvars.iv.epil = phi i64 [ %indvars.iv.unr, %header.epil.preheader ]
-; EPILOG-BLOCK-NEXT: %sum.02.epil = phi i32 [ %sum.02.unr, %header.epil.preheader ]
; EPILOG-BLOCK-NEXT: br i1 %cond, label %for.exit2, label %for.exiting_block.epil
; EPILOG-BLOCK: for.exiting_block.epil:
-; EPILOG-BLOCK-NEXT: %arrayidx.epil = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.epil
+; EPILOG-BLOCK-NEXT: %arrayidx.epil = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.unr
; EPILOG-BLOCK-NEXT: %3 = load i32, i32* %arrayidx.epil, align 4
-; EPILOG-BLOCK-NEXT: %add.epil = add nsw i32 %3, %sum.02.epil
+; EPILOG-BLOCK-NEXT: %add.epil = add nsw i32 %3, %sum.02.unr
; EPILOG-BLOCK-NEXT: %cmp.epil = icmp eq i64 %n, 42
; EPILOG-BLOCK-NEXT: br i1 %cmp.epil, label %latchExit.epilog-lcssa, label %latch.epil
; EPILOG-BLOCK: latch.epil:
-; EPILOG-BLOCK-NEXT: %indvars.iv.next.epil = add i64 %indvars.iv.epil, 1
-; EPILOG-BLOCK-NEXT: %exitcond.epil = icmp eq i64 %indvars.iv.next.epil, %n
; EPILOG-BLOCK-NEXT: br label %latchExit.epilog-lcssa
; EPILOG-BLOCK: latchExit.epilog-lcssa.loopexit:
; EPILOG-BLOCK-NEXT: %result.ph1.ph = phi i32 [ %sum.02, %for.exiting_block ], [ %add, %for.exiting_block.1 ]
; EPILOG-BLOCK-NEXT: br label %latchExit.epilog-lcssa
; EPILOG-BLOCK: latchExit.epilog-lcssa:
-; EPILOG-BLOCK-NEXT: %result.ph1 = phi i32 [ %add.epil, %latch.epil ], [ %sum.02.epil, %for.exiting_block.epil ], [ %result.ph1.ph, %latchExit.epilog-lcssa.loopexit ]
+; EPILOG-BLOCK-NEXT: %result.ph1 = phi i32 [ %add.epil, %latch.epil ], [ %sum.02.unr, %for.exiting_block.epil ], [ %result.ph1.ph, %latchExit.epilog-lcssa.loopexit ]
; EPILOG-BLOCK-NEXT: br label %latchExit
; EPILOG-BLOCK: latchExit:
; EPILOG-BLOCK-NEXT: %result = phi i32 [ %result.ph, %latchExit.unr-lcssa ], [ %result.ph1, %latchExit.epilog-lcssa ]
; PROLOG-BLOCK: header.prol:
; PROLOG-BLOCK-NEXT: br i1 %cond, label %for.exit2, label %for.exiting_block.prol
; PROLOG-BLOCK: for.exiting_block.prol:
-; PROLOG-BLOCK-NEXT: %arrayidx.prol = getelementptr inbounds i32, i32* %a, i64 0
-; PROLOG-BLOCK-NEXT: %1 = load i32, i32* %arrayidx.prol, align 4
-; PROLOG-BLOCK-NEXT: %add.prol = add nsw i32 %1, 0
+; PROLOG-BLOCK-NEXT: %1 = load i32, i32* %a, align 4
; PROLOG-BLOCK-NEXT: %cmp.prol = icmp eq i64 %n, 42
; PROLOG-BLOCK-NEXT: br i1 %cmp.prol, label %latchExit.unr-lcssa, label %latch.prol
; PROLOG-BLOCK: latch.prol:
-; PROLOG-BLOCK-NEXT: %indvars.iv.next.prol = add i64 0, 1
-; PROLOG-BLOCK-NEXT: %exitcond.prol = icmp eq i64 %indvars.iv.next.prol, %n
; PROLOG-BLOCK-NEXT: br label %header.prol.loopexit
; PROLOG-BLOCK: header.prol.loopexit:
-; PROLOG-BLOCK-NEXT: %result.unr = phi i32 [ undef, %entry ], [ %add.prol, %latch.prol ]
-; PROLOG-BLOCK-NEXT: %indvars.iv.unr = phi i64 [ 0, %entry ], [ %indvars.iv.next.prol, %latch.prol ]
-; PROLOG-BLOCK-NEXT: %sum.02.unr = phi i32 [ 0, %entry ], [ %add.prol, %latch.prol ]
+; PROLOG-BLOCK-NEXT: %result.unr = phi i32 [ undef, %entry ], [ %1, %latch.prol ]
+; PROLOG-BLOCK-NEXT: %indvars.iv.unr = phi i64 [ 0, %entry ], [ 1, %latch.prol ]
+; PROLOG-BLOCK-NEXT: %sum.02.unr = phi i32 [ 0, %entry ], [ %1, %latch.prol ]
; PROLOG-BLOCK-NEXT: %2 = icmp ult i64 %0, 1
; PROLOG-BLOCK-NEXT: br i1 %2, label %latchExit, label %entry.new
; PROLOG-BLOCK: entry.new:
; EPILOG-BLOCK: loop_header.epil.preheader:
; EPILOG-BLOCK-NEXT: br label %loop_header.epil
; EPILOG-BLOCK: loop_header.epil:
-; EPILOG-BLOCK-NEXT: %iv.epil = phi i64 [ %iv.unr, %loop_header.epil.preheader ]
-; EPILOG-BLOCK-NEXT: %sum.epil = phi i64 [ %sum.unr, %loop_header.epil.preheader ]
; EPILOG-BLOCK-NEXT: br i1 %cond, label %loop_latch.epil, label %loop_exiting.epil
; EPILOG-BLOCK: loop_exiting.epil:
-; EPILOG-BLOCK-NEXT: %ivy.epil = add i64 %iv.epil, %add
-; EPILOG-BLOCK-NEXT: switch i64 %sum.epil, label %loop_latch.epil [
+; EPILOG-BLOCK-NEXT: %ivy.epil = add i64 %iv.unr, %add
+; EPILOG-BLOCK-NEXT: switch i64 %sum.unr, label %loop_latch.epil [
; EPILOG-BLOCK-NEXT: i64 24, label %exit1
; EPILOG-BLOCK-NEXT: i64 42, label %exit1
; EPILOG-BLOCK-NEXT: ]
; EPILOG-BLOCK: loop_latch.epil:
-; EPILOG-BLOCK-NEXT: %iv_next.epil = add nuw nsw i64 %iv.epil, 1
-; EPILOG-BLOCK-NEXT: %sum.next.epil = add i64 %sum.epil, %add
-; EPILOG-BLOCK-NEXT: %cmp.epil = icmp ne i64 %iv_next.epil, %trip
-; EPILOG-BLOCK-NEXT: br label %latchexit.epilog-lcssa
-; EPILOG-BLOCK: latchexit.epilog-lcssa:
+; EPILOG-BLOCK-NEXT: %sum.next.epil = add i64 %sum.unr, %add
; EPILOG-BLOCK-NEXT: br label %latchexit
; EPILOG-BLOCK: latchexit:
-; EPILOG-BLOCK-NEXT: %sum.next.lcssa = phi i64 [ %sum.next.lcssa.ph, %latchexit.unr-lcssa ], [ %sum.next.epil, %latchexit.epilog-lcssa ]
+; EPILOG-BLOCK-NEXT: %sum.next.lcssa = phi i64 [ %sum.next.lcssa.ph, %latchexit.unr-lcssa ], [ %sum.next.epil, %loop_latch.epil ]
; EPILOG-BLOCK-NEXT: ret i64 %sum.next.lcssa
; EPILOG-BLOCK: loop_exiting.1:
; EPILOG-BLOCK-NEXT: %ivy.1 = add i64 %iv_next, %add
; PROLOG-BLOCK: loop_header.prol:
; PROLOG-BLOCK-NEXT: br i1 %cond, label %loop_latch.prol, label %loop_exiting.prol
; PROLOG-BLOCK: loop_exiting.prol:
-; PROLOG-BLOCK-NEXT: %ivy.prol = add i64 0, %add
; PROLOG-BLOCK-NEXT: switch i64 0, label %loop_latch.prol [
; PROLOG-BLOCK-NEXT: i64 24, label %exit1
; PROLOG-BLOCK-NEXT: i64 42, label %exit1
; PROLOG-BLOCK-NEXT: ]
; PROLOG-BLOCK: loop_latch.prol:
-; PROLOG-BLOCK-NEXT: %iv_next.prol = add nuw nsw i64 0, 1
-; PROLOG-BLOCK-NEXT: %sum.next.prol = add i64 0, %add
-; PROLOG-BLOCK-NEXT: %cmp.prol = icmp ne i64 %iv_next.prol, %trip
; PROLOG-BLOCK-NEXT: br label %loop_header.prol.loopexit
; PROLOG-BLOCK: loop_header.prol.loopexit:
-; PROLOG-BLOCK-NEXT: %iv.unr = phi i64 [ 0, %entry ], [ %iv_next.prol, %loop_latch.prol ]
-; PROLOG-BLOCK-NEXT: %sum.unr = phi i64 [ 0, %entry ], [ %sum.next.prol, %loop_latch.prol ]
-; PROLOG-BLOCK-NEXT: %sum.next.lcssa.unr = phi i64 [ undef, %entry ], [ %sum.next.prol, %loop_latch.prol ]
+; PROLOG-BLOCK-NEXT: %iv.unr = phi i64 [ 0, %entry ], [ 1, %loop_latch.prol ]
+; PROLOG-BLOCK-NEXT: %sum.unr = phi i64 [ 0, %entry ], [ %add, %loop_latch.prol ]
+; PROLOG-BLOCK-NEXT: %sum.next.lcssa.unr = phi i64 [ undef, %entry ], [ %add, %loop_latch.prol ]
; PROLOG-BLOCK-NEXT: %1 = icmp ult i64 %0, 1
; PROLOG-BLOCK-NEXT: br i1 %1, label %latchexit, label %entry.new
; PROLOG-BLOCK: entry.new:
; PROLOG-BLOCK-NEXT: %result.ph = phi i64 [ %ivy, %loop_exiting ], [ %ivy, %loop_exiting ], [ %ivy.1, %loop_exiting.1 ], [ %ivy.1, %loop_exiting.1 ]
; PROLOG-BLOCK-NEXT: br label %exit1
; PROLOG-BLOCK: exit1:
-; PROLOG-BLOCK-NEXT: %result = phi i64 [ %ivy.prol, %loop_exiting.prol ], [ %ivy.prol, %loop_exiting.prol ], [ %result.ph, %exit1.loopexit ]
+; PROLOG-BLOCK-NEXT: %result = phi i64 [ %add, %loop_exiting.prol ], [ %add, %loop_exiting.prol ], [ %result.ph, %exit1.loopexit ]
; PROLOG-BLOCK-NEXT: ret i64 %result
; PROLOG-BLOCK: latchexit.unr-lcssa:
; PROLOG-BLOCK-NEXT: %sum.next.lcssa.ph = phi i64 [ %sum.next.1, %loop_latch.1 ]
; EPILOG-BLOCK: header.epil.preheader:
; EPILOG-BLOCK-NEXT: br label %header.epil
; EPILOG-BLOCK: header.epil:
-; EPILOG-BLOCK-NEXT: %indvars.iv.epil = phi i64 [ %indvars.iv.unr, %header.epil.preheader ]
-; EPILOG-BLOCK-NEXT: %sum.02.epil = phi i32 [ %sum.02.unr, %header.epil.preheader ]
; EPILOG-BLOCK-NEXT: br i1 false, label %for.exit2, label %for.exiting_block.epil
; EPILOG-BLOCK: for.exiting_block.epil:
; EPILOG-BLOCK-NEXT: %cmp.epil = icmp eq i64 %n, 42
; EPILOG-BLOCK-NEXT: br i1 %cmp.epil, label %for.exit2, label %latch.epil
; EPILOG-BLOCK: latch.epil:
-; EPILOG-BLOCK-NEXT: %arrayidx.epil = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.epil
+; EPILOG-BLOCK-NEXT: %arrayidx.epil = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.unr
; EPILOG-BLOCK-NEXT: %load.epil = load i32, i32* %arrayidx.epil, align 4
-; EPILOG-BLOCK-NEXT: %add.epil = add nsw i32 %load.epil, %sum.02.epil
-; EPILOG-BLOCK-NEXT: %indvars.iv.next.epil = add i64 %indvars.iv.epil, 1
-; EPILOG-BLOCK-NEXT: %exitcond.epil = icmp eq i64 %indvars.iv.next.epil, %n
-; EPILOG-BLOCK-NEXT: br label %latch_exit.epilog-lcssa
-; EPILOG-BLOCK: latch_exit.epilog-lcssa:
+; EPILOG-BLOCK-NEXT: %add.epil = add nsw i32 %load.epil, %sum.02.unr
; EPILOG-BLOCK-NEXT: br label %latch_exit
; EPILOG-BLOCK: latch_exit:
-; EPILOG-BLOCK-NEXT: %sum.0.lcssa = phi i32 [ %sum.0.lcssa.ph, %latch_exit.unr-lcssa ], [ %add.epil, %latch_exit.epilog-lcssa ]
+; EPILOG-BLOCK-NEXT: %sum.0.lcssa = phi i32 [ %sum.0.lcssa.ph, %latch_exit.unr-lcssa ], [ %add.epil, %latch.epil ]
; EPILOG-BLOCK-NEXT: ret i32 %sum.0.lcssa
; EPILOG-BLOCK: for.exit2.loopexit:
; EPILOG-BLOCK-NEXT: %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %latch ], [ 42, %for.exiting_block.1 ]
; EPILOG-BLOCK-NEXT: br label %for.exit2
; EPILOG-BLOCK: for.exit2:
-; EPILOG-BLOCK-NEXT: %retval = phi i32 [ %sum.02.epil, %header.epil ], [ 42, %for.exiting_block.epil ], [ %retval.ph, %for.exit2.loopexit ]
+; EPILOG-BLOCK-NEXT: %retval = phi i32 [ %sum.02.unr, %header.epil ], [ 42, %for.exiting_block.epil ], [ %retval.ph, %for.exit2.loopexit ]
; EPILOG-BLOCK-NEXT: %addx = add i32 %retval, %x
; EPILOG-BLOCK-NEXT: br i1 %cond, label %exit_true, label %exit_false
; EPILOG-BLOCK: exit_true:
; PROLOG-BLOCK-NEXT: %cmp.prol = icmp eq i64 %n, 42
; PROLOG-BLOCK-NEXT: br i1 %cmp.prol, label %for.exit2, label %latch.prol
; PROLOG-BLOCK: latch.prol:
-; PROLOG-BLOCK-NEXT: %arrayidx.prol = getelementptr inbounds i32, i32* %a, i64 0
-; PROLOG-BLOCK-NEXT: %load.prol = load i32, i32* %arrayidx.prol, align 4
-; PROLOG-BLOCK-NEXT: %add.prol = add nsw i32 %load.prol, 0
-; PROLOG-BLOCK-NEXT: %indvars.iv.next.prol = add i64 0, 1
-; PROLOG-BLOCK-NEXT: %exitcond.prol = icmp eq i64 %indvars.iv.next.prol, %n
+; PROLOG-BLOCK-NEXT: %load.prol = load i32, i32* %a, align 4
; PROLOG-BLOCK-NEXT: br label %header.prol.loopexit
; PROLOG-BLOCK: header.prol.loopexit:
-; PROLOG-BLOCK-NEXT: %sum.0.lcssa.unr = phi i32 [ undef, %entry ], [ %add.prol, %latch.prol ]
-; PROLOG-BLOCK-NEXT: %indvars.iv.unr = phi i64 [ 0, %entry ], [ %indvars.iv.next.prol, %latch.prol ]
-; PROLOG-BLOCK-NEXT: %sum.02.unr = phi i32 [ 0, %entry ], [ %add.prol, %latch.prol ]
+; PROLOG-BLOCK-NEXT: %sum.0.lcssa.unr = phi i32 [ undef, %entry ], [ %load.prol, %latch.prol ]
+; PROLOG-BLOCK-NEXT: %indvars.iv.unr = phi i64 [ 0, %entry ], [ 1, %latch.prol ]
+; PROLOG-BLOCK-NEXT: %sum.02.unr = phi i32 [ 0, %entry ], [ %load.prol, %latch.prol ]
; PROLOG-BLOCK-NEXT: %1 = icmp ult i64 %0, 1
; PROLOG-BLOCK-NEXT: br i1 %1, label %latch_exit, label %entry.new
; PROLOG-BLOCK: entry.new:
; EPILOG-BLOCK-NEXT: %niter.nsub = sub i64 %niter, 1
; EPILOG-BLOCK-NEXT: br i1 false, label %loopexit1.loopexit, label %latch.1
; EPILOG-BLOCK: latchexit.unr-lcssa.loopexit:
-; EPILOG-BLOCK-NEXT: %i6.unr.ph = phi i64 [ %add.1, %latch.1 ]
; EPILOG-BLOCK-NEXT: br label %latchexit.unr-lcssa
; EPILOG-BLOCK: latchexit.unr-lcssa:
-; EPILOG-BLOCK-NEXT: %i6.unr = phi i64 [ 1, %preheader ], [ %i6.unr.ph, %latchexit.unr-lcssa.loopexit ]
; EPILOG-BLOCK-NEXT: %lcmp.mod = icmp ne i64 %xtraiter, 0
; EPILOG-BLOCK-NEXT: br i1 %lcmp.mod, label %header.epil.preheader, label %latchexit
; EPILOG-BLOCK: header.epil.preheader:
; EPILOG-BLOCK-NEXT: br label %header.epil
; EPILOG-BLOCK: header.epil:
-; EPILOG-BLOCK-NEXT: %i6.epil = phi i64 [ %i6.unr, %header.epil.preheader ]
; EPILOG-BLOCK-NEXT: br i1 false, label %loopexit1, label %latch.epil
; EPILOG-BLOCK: latch.epil:
-; EPILOG-BLOCK-NEXT: %add.epil = add nuw nsw i64 %i6.epil, 1
-; EPILOG-BLOCK-NEXT: %i9.epil = icmp slt i64 %add.epil, %sext
-; EPILOG-BLOCK-NEXT: br label %latchexit.epilog-lcssa
-; EPILOG-BLOCK: latchexit.epilog-lcssa:
; EPILOG-BLOCK-NEXT: br label %latchexit
; EPILOG-BLOCK: latchexit:
; EPILOG-BLOCK-NEXT: unreachable
; PROLOG-BLOCK: header.prol:
; PROLOG-BLOCK-NEXT: br i1 false, label %loopexit1, label %latch.prol
; PROLOG-BLOCK: latch.prol:
-; PROLOG-BLOCK-NEXT: %add.prol = add nuw nsw i64 1, 1
-; PROLOG-BLOCK-NEXT: %i9.prol = icmp slt i64 %add.prol, %sext
; PROLOG-BLOCK-NEXT: br label %header.prol.loopexit
; PROLOG-BLOCK: header.prol.loopexit:
-; PROLOG-BLOCK-NEXT: %i6.unr = phi i64 [ 1, %preheader ], [ %add.prol, %latch.prol ]
+; PROLOG-BLOCK-NEXT: %i6.unr = phi i64 [ 1, %preheader ], [ 2, %latch.prol ]
; PROLOG-BLOCK-NEXT: %2 = icmp ult i64 %1, 1
; PROLOG-BLOCK-NEXT: br i1 %2, label %latchexit, label %preheader.new
; PROLOG-BLOCK: preheader.new:
; PROLOG-BLOCK: outerloop.loopexit.loopexit:
; PROLOG-BLOCK-NEXT: br label %outerloop.loopexit
; PROLOG-BLOCK: outerloop.loopexit:
-; PROLOG-BLOCK-NEXT: br i1 false, label %innerH.prol.preheader.1, label %innerH.prol.loopexit.unr-lcssa.1
+; PROLOG-BLOCK-NEXT: br i1 false, label %innerH.prol.preheader.1, label %innerH.prol.loopexit.1
; PROLOG-BLOCK: outerloop:
; PROLOG-BLOCK-NEXT: %i = phi i64 [ 3, %bb ], [ 0, %outerloop.loopexit.1 ]
; PROLOG-BLOCK-NEXT: %0 = sub i64 100, %i
; PROLOG-BLOCK-NEXT: %1 = sub i64 99, %i
; PROLOG-BLOCK-NEXT: %xtraiter = and i64 %0, 1
; PROLOG-BLOCK-NEXT: %lcmp.mod = icmp ne i64 %xtraiter, 0
-; PROLOG-BLOCK-NEXT: br i1 %lcmp.mod, label %innerH.prol.preheader, label %innerH.prol.loopexit.unr-lcssa
+; PROLOG-BLOCK-NEXT: br i1 %lcmp.mod, label %innerH.prol.preheader, label %innerH.prol.loopexit
; PROLOG-BLOCK: innerH.prol.preheader:
; PROLOG-BLOCK-NEXT: br label %innerH.prol
; PROLOG-BLOCK: innerH.prol:
; PROLOG-BLOCK-NEXT: %i4.prol = add nuw nsw i64 %i, 1
; PROLOG-BLOCK-NEXT: br i1 false, label %outerloop.loopexit, label %latch.prol
; PROLOG-BLOCK: latch.prol:
-; PROLOG-BLOCK-NEXT: br label %innerH.prol.loopexit.unr-lcssa
-; PROLOG-BLOCK: innerH.prol.loopexit.unr-lcssa:
-; PROLOG-BLOCK-NEXT: %i3.unr.ph = phi i64 [ %i4.prol, %latch.prol ], [ %i, %outerloop ]
; PROLOG-BLOCK-NEXT: br label %innerH.prol.loopexit
; PROLOG-BLOCK: innerH.prol.loopexit:
+; PROLOG-BLOCK-NEXT: %i3.unr = phi i64 [ %i, %outerloop ], [ %i4.prol, %latch.prol ]
; PROLOG-BLOCK-NEXT: %2 = icmp ult i64 %1, 1
; PROLOG-BLOCK-NEXT: br i1 %2, label %exit.loopexit, label %outerloop.new
; PROLOG-BLOCK: outerloop.new:
; PROLOG-BLOCK-NEXT: br label %innerH
; PROLOG-BLOCK: innerH:
-; PROLOG-BLOCK-NEXT: %i3 = phi i64 [ %i3.unr.ph, %outerloop.new ], [ %i4.1, %latch.1 ]
+; PROLOG-BLOCK-NEXT: %i3 = phi i64 [ %i3.unr, %outerloop.new ], [ %i4.1, %latch.1 ]
; PROLOG-BLOCK-NEXT: %i4 = add nuw nsw i64 %i3, 1
; PROLOG-BLOCK-NEXT: br i1 false, label %outerloop.loopexit.loopexit, label %latch
; PROLOG-BLOCK: latch:
; PROLOG-BLOCK: innerH.prol.1:
; PROLOG-BLOCK-NEXT: br i1 false, label %outerloop.loopexit.1, label %latch.prol.1
; PROLOG-BLOCK: latch.prol.1:
-; PROLOG-BLOCK-NEXT: br label %innerH.prol.loopexit.unr-lcssa.1
-; PROLOG-BLOCK: innerH.prol.loopexit.unr-lcssa.1:
-; PROLOG-BLOCK-NEXT: %i3.unr.ph.1 = phi i64 [ 1, %latch.prol.1 ], [ 0, %outerloop.loopexit ]
; PROLOG-BLOCK-NEXT: br label %innerH.prol.loopexit.1
; PROLOG-BLOCK: innerH.prol.loopexit.1:
+; PROLOG-BLOCK-NEXT: %i3.unr.1 = phi i64 [ 0, %outerloop.loopexit ], [ 1, %latch.prol.1 ]
; PROLOG-BLOCK-NEXT: br i1 false, label %exit.loopexit, label %outerloop.new.1
; PROLOG-BLOCK: outerloop.new.1:
; PROLOG-BLOCK-NEXT: br label %innerH.1
; PROLOG-BLOCK: innerH.1:
-; PROLOG-BLOCK-NEXT: %i3.1 = phi i64 [ %i3.unr.ph.1, %outerloop.new.1 ], [ %i4.1.1, %latch.1.1 ]
+; PROLOG-BLOCK-NEXT: %i3.1 = phi i64 [ %i3.unr.1, %outerloop.new.1 ], [ %i4.1.1, %latch.1.1 ]
; PROLOG-BLOCK-NEXT: %i4.11 = add nuw nsw i64 %i3.1, 1
; PROLOG-BLOCK-NEXT: br i1 false, label %outerloop.loopexit.loopexit.1, label %latch.12
; PROLOG-BLOCK: latch.12:
; PROLOG-BLOCK: preheader:
; PROLOG-BLOCK-NEXT: %xtraiter = and i32 %0, 1
; PROLOG-BLOCK-NEXT: %lcmp.mod = icmp ne i32 %xtraiter, 0
-; PROLOG-BLOCK-NEXT: br i1 %lcmp.mod, label %header.prol.preheader, label %header.prol.loopexit.unr-lcssa
+; PROLOG-BLOCK-NEXT: br i1 %lcmp.mod, label %header.prol.preheader, label %header.prol.loopexit
; PROLOG-BLOCK: header.prol.preheader:
; PROLOG-BLOCK-NEXT: br label %header.prol
; PROLOG-BLOCK: header.prol:
; PROLOG-BLOCK-NEXT: br i1 true, label %latch.prol, label %innerexit.loopexit1
; PROLOG-BLOCK: latch.prol:
-; PROLOG-BLOCK-NEXT: br label %header.prol.loopexit.unr-lcssa
-; PROLOG-BLOCK: header.prol.loopexit.unr-lcssa:
-; PROLOG-BLOCK-NEXT: %phi.unr.ph = phi i64 [ 1, %latch.prol ], [ 0, %preheader ]
; PROLOG-BLOCK-NEXT: br label %header.prol.loopexit
; PROLOG-BLOCK: header.prol.loopexit:
+; PROLOG-BLOCK-NEXT: %phi.unr = phi i64 [ 0, %preheader ], [ 1, %latch.prol ]
; PROLOG-BLOCK-NEXT: %2 = icmp ult i32 %1, 1
; PROLOG-BLOCK-NEXT: br i1 %2, label %outerLatch.loopexit, label %preheader.new
; PROLOG-BLOCK: preheader.new:
; PROLOG-BLOCK-NEXT: br label %header
; PROLOG-BLOCK: header:
-; PROLOG-BLOCK-NEXT: %phi = phi i64 [ %phi.unr.ph, %preheader.new ], [ %iv.next.1, %latch.1 ]
+; PROLOG-BLOCK-NEXT: %phi = phi i64 [ %phi.unr, %preheader.new ], [ %iv.next.1, %latch.1 ]
; PROLOG-BLOCK-NEXT: br i1 true, label %latch, label %innerexit.loopexit.loopexit
; PROLOG-BLOCK: innerexit.loopexit.loopexit:
; PROLOG-BLOCK-NEXT: %trip.lcssa.ph.ph = phi i32 [ %trip, %latch ], [ %trip, %header ]
; PROLOG-BLOCK: preheader.1:
; PROLOG-BLOCK-NEXT: %xtraiter.1 = and i32 %0, 1
; PROLOG-BLOCK-NEXT: %lcmp.mod.1 = icmp ne i32 %xtraiter.1, 0
-; PROLOG-BLOCK-NEXT: br i1 %lcmp.mod.1, label %header.prol.preheader.1, label %header.prol.loopexit.unr-lcssa.1
+; PROLOG-BLOCK-NEXT: br i1 %lcmp.mod.1, label %header.prol.preheader.1, label %header.prol.loopexit.1
; PROLOG-BLOCK: header.prol.preheader.1:
; PROLOG-BLOCK-NEXT: br label %header.prol.1
; PROLOG-BLOCK: header.prol.1:
; PROLOG-BLOCK-NEXT: br i1 true, label %latch.prol.1, label %innerexit.loopexit1
; PROLOG-BLOCK: latch.prol.1:
-; PROLOG-BLOCK-NEXT: br label %header.prol.loopexit.unr-lcssa.1
-; PROLOG-BLOCK: header.prol.loopexit.unr-lcssa.1:
-; PROLOG-BLOCK-NEXT: %phi.unr.ph.1 = phi i64 [ 1, %latch.prol.1 ], [ 0, %preheader.1 ]
; PROLOG-BLOCK-NEXT: br label %header.prol.loopexit.1
; PROLOG-BLOCK: header.prol.loopexit.1:
+; PROLOG-BLOCK-NEXT: %phi.unr.1 = phi i64 [ 0, %preheader.1 ], [ 1, %latch.prol.1 ]
; PROLOG-BLOCK-NEXT: %3 = icmp ult i32 %1, 1
; PROLOG-BLOCK-NEXT: br i1 %3, label %outerLatch.loopexit.1, label %preheader.new.1
; PROLOG-BLOCK: preheader.new.1:
; PROLOG-BLOCK-NEXT: br label %header.1
; PROLOG-BLOCK: header.1:
-; PROLOG-BLOCK-NEXT: %phi.1 = phi i64 [ %phi.unr.ph.1, %preheader.new.1 ], [ %iv.next.1.1, %latch.1.1 ]
+; PROLOG-BLOCK-NEXT: %phi.1 = phi i64 [ %phi.unr.1, %preheader.new.1 ], [ %iv.next.1.1, %latch.1.1 ]
; PROLOG-BLOCK-NEXT: br i1 true, label %latch.14, label %innerexit.loopexit.loopexit5
; PROLOG-BLOCK: latch.14:
; PROLOG-BLOCK-NEXT: %iv.next.13 = add nuw nsw i64 %phi.1, 1