Instruction *InsertPt = NewExit->getTerminator();
IRBuilder<> B(InsertPt);
- Value *BrLoopExit = B.CreateIsNotNull(ModVal);
+ Value *BrLoopExit = B.CreateIsNotNull(ModVal, "lcmp.mod");
assert(Exit && "Loop must have a single exit block only");
// Split the exit to maintain loop canonicalization guarantees
SmallVector<BasicBlock*, 4> Preds(predecessors(Exit));
///
/// ***Epilog case***
/// extraiters = tripcount % loopfactor
-/// if (extraiters == tripcount) jump LoopExit:
+/// if (tripcount < loopfactor) jump LoopExit:
/// unroll_iters = tripcount - extraiters
/// Loop: LoopBody; (executes unroll_iter times);
/// unroll_iter -= 1
ConstantInt::get(BECount->getType(), Count),
"xtraiter");
}
- Value *CmpOperand =
- UseEpilogRemainder ? TripCount :
- ConstantInt::get(TripCount->getType(), 0);
- Value *BranchVal = B.CreateICmpNE(ModVal, CmpOperand, "lcmp.mod");
- BasicBlock *FirstLoop = UseEpilogRemainder ? NewPreHeader : PrologPreHeader;
- BasicBlock *SecondLoop = UseEpilogRemainder ? NewExit : PrologExit;
+ Value *BranchVal =
+ UseEpilogRemainder ? B.CreateICmpULT(BECount,
+ ConstantInt::get(BECount->getType(),
+ Count - 1)) :
+ B.CreateIsNotNull(ModVal, "lcmp.mod");
+ BasicBlock *RemainderLoop = UseEpilogRemainder ? NewExit : PrologPreHeader;
+ BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit;
// Branch to either remainder (extra iterations) loop or unrolling loop.
- B.CreateCondBr(BranchVal, FirstLoop, SecondLoop);
+ B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop);
PreHeaderBR->eraseFromParent();
Function *F = Header->getParent();
// Get an ordered list of blocks in the loop to help with the ordering of the
; Tests for unrolling loops with run-time trip counts
; EPILOG: %xtraiter = and i32 %n
-; EPILOG: %lcmp.mod = icmp ne i32 %xtraiter, %n
-; EPILOG: br i1 %lcmp.mod, label %for.body.preheader.new, label %for.end.loopexit.unr-lcssa
+; EPILOG: for.body:
+; EPILOG: %lcmp.mod = icmp ne i32 %xtraiter, 0
+; EPILOG: br i1 %lcmp.mod, label %for.body.epil.preheader, label %for.end.loopexit
+; EPILOG: for.body.epil:
; PROLOG: %xtraiter = and i32 %n
; PROLOG: %lcmp.mod = icmp ne i32 %xtraiter, 0
; PROLOG: br i1 %lcmp.mod, label %for.body.prol.preheader, label %for.body.prol.loopexit
-
-; EPILOG: for.body:
-; EPILOG: for.body.epil:
-
; PROLOG: for.body.prol:
; PROLOG: for.body:
; Tests for unrolling loops with run-time trip counts
; EPILOG: %xtraiter = and i32 %n
-; EPILOG: %lcmp.mod = icmp ne i32 %xtraiter, %n
-; EPILOG: br i1 %lcmp.mod, label %for.body.preheader.new, label %for.end.loopexit.unr-lcssa
+; EPILOG: %lcmp.mod = icmp ne i32 %xtraiter, 0
+; EPILOG: br i1 %lcmp.mod, label %for.body.epil.preheader, label %for.end.loopexit
; PROLOG: %xtraiter = and i32 %n
; PROLOG: %lcmp.mod = icmp ne i32 %xtraiter, 0
; EPILOG: for.body.preheader:
-; EPILOG: br i1 %lcmp.mod, label %for.body.preheader.new, label %for.end.loopexit.unr-lcssa, !dbg [[PH_LOC:![0-9]+]]
+; EPILOG: br i1 %1, label %for.end.loopexit.unr-lcssa, label %for.body.preheader.new, !dbg [[PH_LOC:![0-9]+]]
; EPILOG: for.body:
; EPILOG: br i1 %niter.ncmp.1, label %for.end.loopexit.unr-lcssa.loopexit, label %for.body, !dbg [[BODY_LOC:![0-9]+]]
; EPILOG-NOT: br i1 %niter.ncmp.2, label %for.end.loopexit{{.*}}, label %for.body
-; RUN: opt < %s -S -unroll-runtime -unroll-count=2 -loop-unroll | FileCheck %s
+; RUN: opt < %s -S -unroll-runtime -unroll-count=2 -loop-unroll | FileCheck %s -check-prefix=EPILOG
+; RUN: opt < %s -S -unroll-runtime -unroll-count=2 -loop-unroll -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
; This test case documents how runtime loop unrolling handles the case
; is divisible by 2. The prologue then branches to the unrolled loop
; and executes the 2^32 iterations there, in groups of 2.
+; EPILOG: entry:
-; CHECK: entry:
-; CHECK-NEXT: %0 = add i32 %N, 1
-; CHECK-NEXT: %xtraiter = and i32 %0, 1
-; CHECK-NEXT: %lcmp.mod = icmp ne i32 %xtraiter, %0
-; CHECK-NEXT: br i1 %lcmp.mod, label %entry.new, label %while.end.unr-lcssa
+; EPILOG-NEXT: %0 = add i32 %N, 1
+; EPILOG-NEXT: %xtraiter = and i32 %0, 1
+; EPILOG-NEXT: %1 = icmp ult i32 %N, 1
+; EPILOG-NEXT: br i1 %1, label %while.end.unr-lcssa, label %entry.new
+; EPILOG: while.body:
-; CHECK: while.body.epil:
-; CHECK: br label %while.end.epilog-lcssa
+; EPILOG: %lcmp.mod = icmp ne i32 %xtraiter, 0
+; EPILOG-NEXT: br i1 %lcmp.mod, label %while.body.epil.preheader, label %while.end
+; EPILOG: while.body.epil:
-; CHECK: while.end.epilog-lcssa:
+; PROLOG: entry:
+; PROLOG-NEXT: %0 = add i32 %N, 1
+; PROLOG-NEXT: %xtraiter = and i32 %0, 1
+; PROLOG-NEXT: %lcmp.mod = icmp ne i32 %xtraiter, 0
+; PROLOG-NEXT: br i1 %lcmp.mod, label %while.body.prol.preheader, label %while.body.prol.loopexit
+; PROLOG: while.body.prol:
+
+; PROLOG: %1 = icmp ult i32 %N, 1
+; PROLOG-NEXT: br i1 %1, label %while.end, label %entry.new
+; PROLOG: while.body:
; Function Attrs: nounwind readnone ssp uwtable
define i32 @foo(i32 %N) {