From: Hiroshi Yamauchi Date: Tue, 11 Aug 2020 21:10:30 +0000 (-0700) Subject: [PGO][PGSO][LV] Fix loop not vectorized issue under profile guided size opts. X-Git-Tag: llvmorg-13-init~14251 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=ab401a8c8a9cf2b9e81eb6864aa647d96751ae84;p=platform%2Fupstream%2Fllvm.git [PGO][PGSO][LV] Fix loop not vectorized issue under profile guided size opts. D81345 appears to accidentally disables vectorization when explicitly enabled. As PGSO isn't currently accessible from LoopAccessInfo, revert back to the vectorization with versioning-for-unit-stride for PGSO. Differential Revision: https://reviews.llvm.org/D85784 --- diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 90a8d47..e1a82f5 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2822,7 +2822,8 @@ void InnerLoopVectorizer::emitSCEVChecks(Loop *L, BasicBlock *Bypass) { return; assert(!(SCEVCheckBlock->getParent()->hasOptSize() || - OptForSizeBasedOnProfile) && + (OptForSizeBasedOnProfile && + Cost->Hints->getForce() != LoopVectorizeHints::FK_Enabled)) && "Cannot SCEV check stride or overflow when optimizing for size"); SCEVCheckBlock->setName("vector.scevcheck"); @@ -7914,12 +7915,17 @@ static ScalarEpilogueLowering getScalarEpilogueLowering( BlockFrequencyInfo *BFI, TargetTransformInfo *TTI, TargetLibraryInfo *TLI, AssumptionCache *AC, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, LoopVectorizationLegality &LVL) { - bool OptSize = - F->hasOptSize() || llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI, - PGSOQueryType::IRPass); // 1) OptSize takes precedence over all other options, i.e. if this is set, // don't look at hints or options, and don't request a scalar epilogue. - if (OptSize) + // (For PGSO, as shouldOptimizeForSize isn't currently accessible from + // LoopAccessInfo (due to code dependency and not being able to reliably get + // PSI/BFI from a loop analysis under NPM), we cannot suppress the collection + // of strides in LoopAccessInfo::analyzeLoop() and vectorize without + // versioning when the vectorization is forced, unlike hasOptSize. So revert + // back to the old way and vectorize with versioning when forced. See D81345.) + if (F->hasOptSize() || (llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI, + PGSOQueryType::IRPass) && + Hints.getForce() != LoopVectorizeHints::FK_Enabled)) return CM_ScalarEpilogueNotAllowedOptSize; bool PredicateOptDisabled = PreferPredicateOverEpilog.getNumOccurrences() && diff --git a/llvm/test/Transforms/LoopVectorize/optsize.ll b/llvm/test/Transforms/LoopVectorize/optsize.ll index b4233e6..9b24bb4 100644 --- a/llvm/test/Transforms/LoopVectorize/optsize.ll +++ b/llvm/test/Transforms/LoopVectorize/optsize.ll @@ -284,6 +284,34 @@ for.end: ret void } +; Vectorize with versioning for unit stride for PGSO and enabled vectorization. +; +define void @stride1_pgso(i16* noalias %B, i32 %BStride) !prof !14 { +; CHECK-LABEL: @stride1_pgso( +; CHECK: vector.body +; +; PGSO-LABEL: @stride1_pgso( +; PGSO: vector.body +; +; NPGSO-LABEL: @stride1_pgso( +; NPGSO: vector.body + +entry: + br label %for.body + +for.body: + %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ] + %mulB = mul nsw i32 %iv, %BStride + %gepOfB = getelementptr inbounds i16, i16* %B, i32 %mulB + store i16 42, i16* %gepOfB, align 4 + %iv.next = add nuw nsw i32 %iv, 1 + %exitcond = icmp eq i32 %iv.next, 1025 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !15 + +for.end: + ret void +} + ; PR46652: Check that the need for stride==1 check prevents vectorizing a loop ; having tiny trip count, when compiling w/o -Os/-Oz. ; CHECK-LABEL: @pr46652