[PGO][PGSO][LV] Fix loop not vectorized issue under profile guided size opts.

author Hiroshi Yamauchi <yamauchi@google.com>

Tue, 11 Aug 2020 21:10:30 +0000 (14:10 -0700)

committer Hiroshi Yamauchi <yamauchi@google.com>

Wed, 19 Aug 2020 19:13:34 +0000 (12:13 -0700)
author Hiroshi Yamauchi <yamauchi@google.com>
Tue, 11 Aug 2020 21:10:30 +0000 (14:10 -0700)
committer Hiroshi Yamauchi <yamauchi@google.com>
Wed, 19 Aug 2020 19:13:34 +0000 (12:13 -0700)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

index 90a8d47..e1a82f5 100644 (file)
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2822,7 +2822,8 @@ void InnerLoopVectorizer::emitSCEVChecks(Loop *L, BasicBlock *Bypass) {
        return;
  
    assert(!(SCEVCheckBlock->getParent()->hasOptSize() ||
-           OptForSizeBasedOnProfile) &&
+           (OptForSizeBasedOnProfile &&
+            Cost->Hints->getForce() != LoopVectorizeHints::FK_Enabled)) &&
           "Cannot SCEV check stride or overflow when optimizing for size");
  
    SCEVCheckBlock->setName("vector.scevcheck");
@@ -7914,12 +7915,17 @@ static ScalarEpilogueLowering getScalarEpilogueLowering(
      BlockFrequencyInfo *BFI, TargetTransformInfo *TTI, TargetLibraryInfo *TLI,
      AssumptionCache *AC, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
      LoopVectorizationLegality &LVL) {
-  bool OptSize =
-      F->hasOptSize() || llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI,
-                                                     PGSOQueryType::IRPass);
    // 1) OptSize takes precedence over all other options, i.e. if this is set,
    // don't look at hints or options, and don't request a scalar epilogue.
-  if (OptSize)
+  // (For PGSO, as shouldOptimizeForSize isn't currently accessible from
+  // LoopAccessInfo (due to code dependency and not being able to reliably get
+  // PSI/BFI from a loop analysis under NPM), we cannot suppress the collection
+  // of strides in LoopAccessInfo::analyzeLoop() and vectorize without
+  // versioning when the vectorization is forced, unlike hasOptSize. So revert
+  // back to the old way and vectorize with versioning when forced. See D81345.)
+  if (F->hasOptSize() || (llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI,
+                                                      PGSOQueryType::IRPass) &&
+                          Hints.getForce() != LoopVectorizeHints::FK_Enabled))
      return CM_ScalarEpilogueNotAllowedOptSize;
  
    bool PredicateOptDisabled = PreferPredicateOverEpilog.getNumOccurrences() &&
diff --git a/llvm/test/Transforms/LoopVectorize/optsize.ll b/llvm/test/Transforms/LoopVectorize/optsize.ll

index b4233e6..9b24bb4 100644 (file)
--- a/llvm/test/Transforms/LoopVectorize/optsize.ll
+++ b/llvm/test/Transforms/LoopVectorize/optsize.ll
@@ -284,6 +284,34 @@ for.end:
    ret void
  }
  
+; Vectorize with versioning for unit stride for PGSO and enabled vectorization.
+;
+define void @stride1_pgso(i16* noalias %B, i32 %BStride) !prof !14 {
+; CHECK-LABEL: @stride1_pgso(
+; CHECK: vector.body
+;
+; PGSO-LABEL: @stride1_pgso(
+; PGSO: vector.body
+;
+; NPGSO-LABEL: @stride1_pgso(
+; NPGSO: vector.body
+
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
+  %mulB = mul nsw i32 %iv, %BStride
+  %gepOfB = getelementptr inbounds i16, i16* %B, i32 %mulB
+  store i16 42, i16* %gepOfB, align 4
+  %iv.next = add nuw nsw i32 %iv, 1
+  %exitcond = icmp eq i32 %iv.next, 1025
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !15
+
+for.end:
+  ret void
+}
+
  ; PR46652: Check that the need for stride==1 check prevents vectorizing a loop
  ; having tiny trip count, when compiling w/o -Os/-Oz.
  ; CHECK-LABEL: @pr46652
author	Hiroshi Yamauchi <yamauchi@google.com>
	Tue, 11 Aug 2020 21:10:30 +0000 (14:10 -0700)
committer	Hiroshi Yamauchi <yamauchi@google.com>
	Wed, 19 Aug 2020 19:13:34 +0000 (12:13 -0700)
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp		patch \| blob \| history
llvm/test/Transforms/LoopVectorize/optsize.ll		patch \| blob \| history