From a10b311aeca09090228493700352687e878de245 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Tue, 4 Dec 2012 18:17:33 +0000 Subject: [PATCH] Add support for reduction variables when IF-conversion is enabled. llvm-svn: 169288 --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 33 ++++++++++----- .../test/Transforms/LoopVectorize/if-conversion.ll | 48 ++++++++++++++++++++++ 2 files changed, 71 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 1de5b30..3502e9e 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1133,8 +1133,8 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { // Reductions do not have to start at zero. They can start with // any loop invariant values. VecRdxPhi->addIncoming(VectorStart, VecPreheader); - unsigned SelfEdgeIdx = (RdxPhi)->getBasicBlockIndex(LoopScalarBody); - Value *Val = getVectorValue(RdxPhi->getIncomingValue(SelfEdgeIdx)); + Value *Val = + getVectorValue(RdxPhi->getIncomingValueForBlock(OrigLoop->getLoopLatch())); VecRdxPhi->addIncoming(Val, LoopVectorBody); // Before each round, move the insertion point right between @@ -1201,8 +1201,11 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { // Fix the scalar loop reduction variable with the incoming reduction sum // from the vector body and from the backedge value. - int IncomingEdgeBlockIdx = (RdxPhi)->getBasicBlockIndex(LoopScalarBody); - int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1); // The other block. + int IncomingEdgeBlockIdx = + (RdxPhi)->getBasicBlockIndex(OrigLoop->getLoopLatch()); + assert(IncomingEdgeBlockIdx >= 0 && "Invalid block index"); + // Pick the other block. + int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1); (RdxPhi)->setIncomingValue(SelfEdgeBlockIdx, Scalar0); (RdxPhi)->setIncomingValue(IncomingEdgeBlockIdx, RdxDesc.LoopExitInstr); }// end of for each redux variable. @@ -1961,11 +1964,13 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi, if (Phi->getNumIncomingValues() != 2) return false; - // Find the possible incoming reduction variable. - BasicBlock *BB = Phi->getParent(); - int SelfEdgeIdx = Phi->getBasicBlockIndex(BB); - int InEdgeBlockIdx = (SelfEdgeIdx ? 0 : 1); // The other entry. - Value *RdxStart = Phi->getIncomingValue(InEdgeBlockIdx); + // Reduction variables are only found in the loop header block. + if (Phi->getParent() != TheLoop->getHeader()) + return false; + + // Obtain the reduction start value from the value that comes from the loop + // preheader. + Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader()); // ExitInstruction is the single value which is used outside the loop. // We only allow for a single reduction value to be used outside the loop. @@ -2003,9 +2008,17 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi, FoundStartPHI = true; continue; } + + // We allow in-loop PHINodes which are not the original reduction PHI + // node. If this PHI is the only user of Iter (happens in IF w/ no ELSE + // structure) then don't skip this PHI. + if (isa(U) && U->getParent() != TheLoop->getHeader() && + TheLoop->contains(U->getParent()) && Iter->getNumUses() > 1) + continue; + // Check if we found the exit user. BasicBlock *Parent = U->getParent(); - if (Parent != BB) { + if (!TheLoop->contains(Parent)) { // We must have a single exit instruction. if (ExitInstruction != 0) return false; diff --git a/llvm/test/Transforms/LoopVectorize/if-conversion.ll b/llvm/test/Transforms/LoopVectorize/if-conversion.ll index 35c549c..b4701b9 100644 --- a/llvm/test/Transforms/LoopVectorize/if-conversion.ll +++ b/llvm/test/Transforms/LoopVectorize/if-conversion.ll @@ -58,3 +58,51 @@ if.end: for.end: ret i32 undef } + + + +; int func(int *A, int n) { +; unsigned sum = 0; +; for (int i = 0; i < n; ++i) +; if (A[i] > 30) +; sum += A[i] + 2; +; +; return sum; +; } + +;CHECK: @reduction_func +;CHECK: load <4 x i32> +;CHECK: icmp sgt <4 x i32> +;CHECK: add <4 x i32> +;CHECK: select <4 x i1> +;CHECK: ret i32 +define i32 @reduction_func(i32* nocapture %A, i32 %n) nounwind uwtable readonly ssp { +entry: + %cmp10 = icmp sgt i32 %n, 0 + br i1 %cmp10, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.inc + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %sum.011 = phi i32 [ %sum.1, %for.inc ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %cmp1 = icmp sgt i32 %0, 30 + br i1 %cmp1, label %if.then, label %for.inc + +if.then: ; preds = %for.body + %add = add i32 %sum.011, 2 + %add4 = add i32 %add, %0 + br label %for.inc + +for.inc: ; preds = %for.body, %if.then + %sum.1 = phi i32 [ %add4, %if.then ], [ %sum.011, %for.body ] + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.inc, %entry + %sum.0.lcssa = phi i32 [ 0, %entry ], [ %sum.1, %for.inc ] + ret i32 %sum.0.lcssa +} + -- 2.7.4