From: Matthew Simpson Date: Thu, 23 Mar 2017 16:07:21 +0000 (+0000) Subject: [LV] Delete unneeded scalar GEP creation code X-Git-Tag: llvmorg-5.0.0-rc1~9333 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=1fb4064531161167324a35aa860f67f3005f3f24;p=platform%2Fupstream%2Fllvm.git [LV] Delete unneeded scalar GEP creation code The code for generating scalar base pointers in vectorizeMemoryInstruction is not needed. We currently scalarize all GEPs and maintain the scalarized values in VectorLoopValueMap. The GEP cloning in this unneeded code is the same as that in scalarizeInstruction. The test cases that changed as a result of this patch changed because we were able to reuse the scalarized GEP that we previously generated instead of cloning a new one. Differential Revision: https://reviews.llvm.org/D30587 llvm-svn: 298615 --- diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 08f603f..fa2c71d 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2999,39 +2999,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) { // Handle consecutive loads/stores. GetElementPtrInst *Gep = getGEPInstruction(Ptr); if (ConsecutiveStride) { - if (Gep) { - unsigned NumOperands = Gep->getNumOperands(); -#ifndef NDEBUG - // The original GEP that identified as a consecutive memory access - // should have only one loop-variant operand. - unsigned NumOfLoopVariantOps = 0; - for (unsigned i = 0; i < NumOperands; ++i) - if (!PSE.getSE()->isLoopInvariant(PSE.getSCEV(Gep->getOperand(i)), - OrigLoop)) - NumOfLoopVariantOps++; - assert(NumOfLoopVariantOps == 1 && - "Consecutive GEP should have only one loop-variant operand"); -#endif - GetElementPtrInst *Gep2 = cast(Gep->clone()); - Gep2->setName("gep.indvar"); - - // A new GEP is created for a 0-lane value of the first unroll iteration. - // The GEPs for the rest of the unroll iterations are computed below as an - // offset from this GEP. - for (unsigned i = 0; i < NumOperands; ++i) - // We can apply getScalarValue() for all GEP indices. It returns an - // original value for loop-invariant operand and 0-lane for consecutive - // operand. - Gep2->setOperand(i, getScalarValue(Gep->getOperand(i), - 0, /* First unroll iteration */ - 0 /* 0-lane of the vector */ )); - setDebugLocFromInst(Builder, Gep); - Ptr = Builder.Insert(Gep2); - - } else { // No GEP - setDebugLocFromInst(Builder, Ptr); - Ptr = getScalarValue(Ptr, 0, 0); - } + Ptr = getScalarValue(Ptr, 0, 0); } else { // At this point we should vector version of GEP for Gather or Scatter assert(CreateGatherScatter && "The instruction should be scalarized"); diff --git a/llvm/test/Transforms/LoopVectorize/float-induction.ll b/llvm/test/Transforms/LoopVectorize/float-induction.ll index 4cd617c..8eec6e2 100644 --- a/llvm/test/Transforms/LoopVectorize/float-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/float-induction.ll @@ -298,8 +298,7 @@ for.end: ; preds = %for.end.loopexit, % ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 ; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP5]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; VEC2_INTERL1_PRED_STORE: [[PRED_STORE_IF]]: -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, float* %A, i64 [[INDEX]] -; VEC2_INTERL1_PRED_STORE-NEXT: store float [[TMP1]], float* [[TMP7]], align 4 +; VEC2_INTERL1_PRED_STORE-NEXT: store float [[TMP1]], float* [[TMP2]], align 4 ; VEC2_INTERL1_PRED_STORE-NEXT: br label %[[PRED_STORE_CONTINUE]] ; VEC2_INTERL1_PRED_STORE: [[PRED_STORE_CONTINUE]]: ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1 diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll index c436814..a1837b3 100644 --- a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll +++ b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll @@ -11,6 +11,7 @@ entry: ; VEC-LABEL: test ; VEC: %[[v0:.+]] = add i64 %index, 0 +; VEC: %[[v2:.+]] = getelementptr inbounds i32, i32* %f, i64 %[[v0]] ; VEC: %[[v8:.+]] = icmp sgt <2 x i32> %{{.*}}, ; VEC: %[[v10:.+]] = and <2 x i1> %[[v8]], ; VEC: %[[o1:.+]] = or <2 x i1> zeroinitializer, %[[v10]] @@ -21,7 +22,6 @@ entry: ; VEC: [[cond]]: ; VEC: %[[v13:.+]] = extractelement <2 x i32> %wide.load, i32 0 ; VEC: %[[v9a:.+]] = add nsw i32 %[[v13]], 20 -; VEC: %[[v2:.+]] = getelementptr inbounds i32, i32* %f, i64 %[[v0]] ; VEC: store i32 %[[v9a]], i32* %[[v2]], align 4 ; VEC: br label %[[else:.+]] ;