From: Evgeny Stupachenko Date: Wed, 27 Apr 2016 03:04:54 +0000 (+0000) Subject: The patch fixes PR27392. X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=23ce61b66334b78ce7a0dd48c24fbf407ca12d8b;p=platform%2Fupstream%2Fllvm.git The patch fixes PR27392. Summary: It is incorrect to compare TripCount (which is BECount + 1) with extraiters (or Count) to check if we should enter unrolled loop or not, because TripCount can potentially overflow (when BECount is max unsigned integer). While comparing BECount with (Count - 1) is overflow safe and therefore correct. Reviewer: hfinkel Differential Revision: http://reviews.llvm.org/D19256 From: Evgeny Stupachenko llvm-svn: 267662 --- diff --git a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp index da66da0..861a50c 100644 --- a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -246,7 +246,7 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit, Instruction *InsertPt = NewExit->getTerminator(); IRBuilder<> B(InsertPt); - Value *BrLoopExit = B.CreateIsNotNull(ModVal); + Value *BrLoopExit = B.CreateIsNotNull(ModVal, "lcmp.mod"); assert(Exit && "Loop must have a single exit block only"); // Split the exit to maintain loop canonicalization guarantees SmallVector Preds(predecessors(Exit)); @@ -416,7 +416,7 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, /// /// ***Epilog case*** /// extraiters = tripcount % loopfactor -/// if (extraiters == tripcount) jump LoopExit: +/// if (tripcount < loopfactor) jump LoopExit: /// unroll_iters = tripcount - extraiters /// Loop: LoopBody; (executes unroll_iter times); /// unroll_iter -= 1 @@ -575,14 +575,15 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, ConstantInt::get(BECount->getType(), Count), "xtraiter"); } - Value *CmpOperand = - UseEpilogRemainder ? TripCount : - ConstantInt::get(TripCount->getType(), 0); - Value *BranchVal = B.CreateICmpNE(ModVal, CmpOperand, "lcmp.mod"); - BasicBlock *FirstLoop = UseEpilogRemainder ? NewPreHeader : PrologPreHeader; - BasicBlock *SecondLoop = UseEpilogRemainder ? NewExit : PrologExit; + Value *BranchVal = + UseEpilogRemainder ? B.CreateICmpULT(BECount, + ConstantInt::get(BECount->getType(), + Count - 1)) : + B.CreateIsNotNull(ModVal, "lcmp.mod"); + BasicBlock *RemainderLoop = UseEpilogRemainder ? NewExit : PrologPreHeader; + BasicBlock *UnrollingLoop = UseEpilogRemainder ? NewPreHeader : PrologExit; // Branch to either remainder (extra iterations) loop or unrolling loop. - B.CreateCondBr(BranchVal, FirstLoop, SecondLoop); + B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop); PreHeaderBR->eraseFromParent(); Function *F = Header->getParent(); // Get an ordered list of blocks in the loop to help with the ordering of the diff --git a/llvm/test/Transforms/LoopUnroll/AArch64/runtime-loop.ll b/llvm/test/Transforms/LoopUnroll/AArch64/runtime-loop.ll index 98c9819..ebb9444 100644 --- a/llvm/test/Transforms/LoopUnroll/AArch64/runtime-loop.ll +++ b/llvm/test/Transforms/LoopUnroll/AArch64/runtime-loop.ll @@ -4,16 +4,14 @@ ; Tests for unrolling loops with run-time trip counts ; EPILOG: %xtraiter = and i32 %n -; EPILOG: %lcmp.mod = icmp ne i32 %xtraiter, %n -; EPILOG: br i1 %lcmp.mod, label %for.body.preheader.new, label %for.end.loopexit.unr-lcssa +; EPILOG: for.body: +; EPILOG: %lcmp.mod = icmp ne i32 %xtraiter, 0 +; EPILOG: br i1 %lcmp.mod, label %for.body.epil.preheader, label %for.end.loopexit +; EPILOG: for.body.epil: ; PROLOG: %xtraiter = and i32 %n ; PROLOG: %lcmp.mod = icmp ne i32 %xtraiter, 0 ; PROLOG: br i1 %lcmp.mod, label %for.body.prol.preheader, label %for.body.prol.loopexit - -; EPILOG: for.body: -; EPILOG: for.body.epil: - ; PROLOG: for.body.prol: ; PROLOG: for.body: diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop.ll index efa2427..2fc4dbd 100644 --- a/llvm/test/Transforms/LoopUnroll/runtime-loop.ll +++ b/llvm/test/Transforms/LoopUnroll/runtime-loop.ll @@ -6,8 +6,8 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 ; Tests for unrolling loops with run-time trip counts ; EPILOG: %xtraiter = and i32 %n -; EPILOG: %lcmp.mod = icmp ne i32 %xtraiter, %n -; EPILOG: br i1 %lcmp.mod, label %for.body.preheader.new, label %for.end.loopexit.unr-lcssa +; EPILOG: %lcmp.mod = icmp ne i32 %xtraiter, 0 +; EPILOG: br i1 %lcmp.mod, label %for.body.epil.preheader, label %for.end.loopexit ; PROLOG: %xtraiter = and i32 %n ; PROLOG: %lcmp.mod = icmp ne i32 %xtraiter, 0 diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop1.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop1.ll index 2e962b5..a2e2f88 100644 --- a/llvm/test/Transforms/LoopUnroll/runtime-loop1.ll +++ b/llvm/test/Transforms/LoopUnroll/runtime-loop1.ll @@ -5,7 +5,7 @@ ; EPILOG: for.body.preheader: -; EPILOG: br i1 %lcmp.mod, label %for.body.preheader.new, label %for.end.loopexit.unr-lcssa, !dbg [[PH_LOC:![0-9]+]] +; EPILOG: br i1 %1, label %for.end.loopexit.unr-lcssa, label %for.body.preheader.new, !dbg [[PH_LOC:![0-9]+]] ; EPILOG: for.body: ; EPILOG: br i1 %niter.ncmp.1, label %for.end.loopexit.unr-lcssa.loopexit, label %for.body, !dbg [[BODY_LOC:![0-9]+]] ; EPILOG-NOT: br i1 %niter.ncmp.2, label %for.end.loopexit{{.*}}, label %for.body diff --git a/llvm/test/Transforms/LoopUnroll/tripcount-overflow.ll b/llvm/test/Transforms/LoopUnroll/tripcount-overflow.ll index 5c25093..7156629 100644 --- a/llvm/test/Transforms/LoopUnroll/tripcount-overflow.ll +++ b/llvm/test/Transforms/LoopUnroll/tripcount-overflow.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -S -unroll-runtime -unroll-count=2 -loop-unroll | FileCheck %s +; RUN: opt < %s -S -unroll-runtime -unroll-count=2 -loop-unroll | FileCheck %s -check-prefix=EPILOG +; RUN: opt < %s -S -unroll-runtime -unroll-count=2 -loop-unroll -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" ; This test case documents how runtime loop unrolling handles the case @@ -9,17 +10,28 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" ; is divisible by 2. The prologue then branches to the unrolled loop ; and executes the 2^32 iterations there, in groups of 2. +; EPILOG: entry: -; CHECK: entry: -; CHECK-NEXT: %0 = add i32 %N, 1 -; CHECK-NEXT: %xtraiter = and i32 %0, 1 -; CHECK-NEXT: %lcmp.mod = icmp ne i32 %xtraiter, %0 -; CHECK-NEXT: br i1 %lcmp.mod, label %entry.new, label %while.end.unr-lcssa +; EPILOG-NEXT: %0 = add i32 %N, 1 +; EPILOG-NEXT: %xtraiter = and i32 %0, 1 +; EPILOG-NEXT: %1 = icmp ult i32 %N, 1 +; EPILOG-NEXT: br i1 %1, label %while.end.unr-lcssa, label %entry.new +; EPILOG: while.body: -; CHECK: while.body.epil: -; CHECK: br label %while.end.epilog-lcssa +; EPILOG: %lcmp.mod = icmp ne i32 %xtraiter, 0 +; EPILOG-NEXT: br i1 %lcmp.mod, label %while.body.epil.preheader, label %while.end +; EPILOG: while.body.epil: -; CHECK: while.end.epilog-lcssa: +; PROLOG: entry: +; PROLOG-NEXT: %0 = add i32 %N, 1 +; PROLOG-NEXT: %xtraiter = and i32 %0, 1 +; PROLOG-NEXT: %lcmp.mod = icmp ne i32 %xtraiter, 0 +; PROLOG-NEXT: br i1 %lcmp.mod, label %while.body.prol.preheader, label %while.body.prol.loopexit +; PROLOG: while.body.prol: + +; PROLOG: %1 = icmp ult i32 %N, 1 +; PROLOG-NEXT: br i1 %1, label %while.end, label %entry.new +; PROLOG: while.body: ; Function Attrs: nounwind readnone ssp uwtable define i32 @foo(i32 %N) {