[Polly][MatMul] Abandon dependence analysis.

author Michael Kruse <llvm-project@meinersbur.de>

Wed, 29 Jun 2022 21:44:57 +0000 (16:44 -0500)

committer Michael Kruse <llvm-project@meinersbur.de>

Wed, 29 Jun 2022 22:20:05 +0000 (17:20 -0500)
author Michael Kruse <llvm-project@meinersbur.de>
Wed, 29 Jun 2022 21:44:57 +0000 (16:44 -0500)
committer Michael Kruse <llvm-project@meinersbur.de>
Wed, 29 Jun 2022 22:20:05 +0000 (17:20 -0500)
diff --git a/polly/include/polly/DependenceInfo.h b/polly/include/polly/DependenceInfo.h

index d1d6aa2..7526a29 100644 (file)
--- a/polly/include/polly/DependenceInfo.h
+++ b/polly/include/polly/DependenceInfo.h
@@ -208,6 +208,15 @@ struct DependenceAnalysis final : public AnalysisInfoMixin<DependenceAnalysis> {
  
      /// Recompute dependences from schedule and memory accesses.
      const Dependences &recomputeDependences(Dependences::AnalysisLevel Level);
+
+    /// Invalidate the dependence information and recompute it when needed
+    /// again.
+    /// May be required when the underlaying Scop was changed in a way that
+    /// would add new dependencies (e.g. between new statement instances
+    /// insierted into the SCoP) or intentionally breaks existing ones. It is
+    /// not required when updating the schedule that conforms the existing
+    /// dependencies.
+    void abandonDependences();
    };
    Result run(Scop &S, ScopAnalysisManager &SAM,
               ScopStandardAnalysisResults &SAR);
@@ -241,6 +250,13 @@ public:
    /// Recompute dependences from schedule and memory accesses.
    const Dependences &recomputeDependences(Dependences::AnalysisLevel Level);
  
+  /// Invalidate the dependence information and recompute it when needed again.
+  /// May be required when the underlaying Scop was changed in a way that would
+  /// add new dependencies (e.g. between new statement instances insierted into
+  /// the SCoP) or intentionally breaks existing ones. It is not required when
+  /// updating the schedule that conforms the existing dependencies.
+  void abandonDependences();
+
    /// Compute the dependence information for the SCoP @p S.
    bool runOnScop(Scop &S) override;
  
diff --git a/polly/lib/Analysis/DependenceInfo.cpp b/polly/lib/Analysis/DependenceInfo.cpp

index f29355f..d58dc99 100644 (file)
--- a/polly/lib/Analysis/DependenceInfo.cpp
+++ b/polly/lib/Analysis/DependenceInfo.cpp
@@ -848,6 +848,11 @@ const Dependences &DependenceAnalysis::Result::recomputeDependences(
    return *D[Level];
  }
  
+void DependenceAnalysis::Result::abandonDependences() {
+  for (std::unique_ptr<Dependences> &Deps : D)
+    Deps.release();
+}
+
  DependenceAnalysis::Result
  DependenceAnalysis::run(Scop &S, ScopAnalysisManager &SAM,
                          ScopStandardAnalysisResults &SAR) {
@@ -890,6 +895,11 @@ DependenceInfo::recomputeDependences(Dependences::AnalysisLevel Level) {
    return *D[Level];
  }
  
+void DependenceInfo::abandonDependences() {
+  for (std::unique_ptr<Dependences> &Deps : D)
+    Deps.release();
+}
+
  bool DependenceInfo::runOnScop(Scop &ScopVar) {
    S = &ScopVar;
    return false;
diff --git a/polly/lib/Transform/MatmulOptimizer.cpp b/polly/lib/Transform/MatmulOptimizer.cpp

index bad05df..4120cd8 100644 (file)
--- a/polly/lib/Transform/MatmulOptimizer.cpp
+++ b/polly/lib/Transform/MatmulOptimizer.cpp
@@ -491,9 +491,6 @@ createMacroKernel(isl::schedule_node Node,
    Node = permuteBandNodeDimensions(Node, DimOutNum - 2, DimOutNum - 1);
    Node = permuteBandNodeDimensions(Node, DimOutNum - 3, DimOutNum - 1);
  
-  // Mark the outermost loop as parallelizable.
-  Node = Node.as<isl::schedule_node_band>().member_set_coincident(0, true);
-
    return Node.child(0).child(0);
  }
  
diff --git a/polly/lib/Transform/ScheduleOptimizer.cpp b/polly/lib/Transform/ScheduleOptimizer.cpp

index c036651..99645d0 100644 (file)
--- a/polly/lib/Transform/ScheduleOptimizer.cpp
+++ b/polly/lib/Transform/ScheduleOptimizer.cpp
@@ -228,6 +228,7 @@ struct OptimizerAdditionalInfoTy {
    bool PatternOpts;
    bool Postopts;
    bool Prevect;
+  bool &DepsChanged;
  };
  
  class ScheduleTreeOptimizer final {
@@ -526,6 +527,7 @@ ScheduleTreeOptimizer::optimizeBand(__isl_take isl_schedule_node *NodeArg,
          tryOptimizeMatMulPattern(Node, OAI->TTI, OAI->D);
      if (!PatternOptimizedSchedule.is_null()) {
        MatMulOpts++;
+      OAI->DepsChanged = true;
        return PatternOptimizedSchedule.release();
      }
    }
@@ -676,21 +678,21 @@ static void walkScheduleTreeForStatistics(isl::schedule Schedule, int Version) {
        &Version);
  }
  
-static bool runIslScheduleOptimizer(
+static void runIslScheduleOptimizer(
      Scop &S,
      function_ref<const Dependences &(Dependences::AnalysisLevel)> GetDeps,
      TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE,
-    isl::schedule &LastSchedule) {
+    isl::schedule &LastSchedule, bool &DepsChanged) {
  
    // Skip SCoPs in case they're already optimised by PPCGCodeGeneration
    if (S.isToBeSkipped())
-    return false;
+    return;
  
    // Skip empty SCoPs but still allow code generation as it will delete the
    // loops present but not needed.
    if (S.getSize() == 0) {
      S.markAsOptimized();
-    return false;
+    return;
    }
  
    ScopsProcessed++;
@@ -706,7 +708,7 @@ static bool runIslScheduleOptimizer(
          &S, Schedule, GetDeps(Dependences::AL_Statement), ORE);
      if (ManuallyTransformed.is_null()) {
        LLVM_DEBUG(dbgs() << "Error during manual optimization\n");
-      return false;
+      return;
      }
  
      if (ManuallyTransformed.get() != Schedule.get()) {
@@ -724,18 +726,18 @@ static bool runIslScheduleOptimizer(
    // metadata earlier in ScopDetection.
    if (!HasUserTransformation && S.hasDisableHeuristicsHint()) {
      LLVM_DEBUG(dbgs() << "Heuristic optimizations disabled by metadata\n");
-    return false;
+    return;
    }
  
    // Get dependency analysis.
    const Dependences &D = GetDeps(Dependences::AL_Statement);
    if (D.getSharedIslCtx() != S.getSharedIslCtx()) {
      LLVM_DEBUG(dbgs() << "DependenceInfo for another SCoP/isl_ctx\n");
-    return false;
+    return;
    }
    if (!D.hasValidDependences()) {
      LLVM_DEBUG(dbgs() << "Dependency information not available\n");
-    return false;
+    return;
    }
  
    // Apply ISL's algorithm only if not overriden by the user. Note that
@@ -769,7 +771,7 @@ static bool runIslScheduleOptimizer(
      isl::union_set Domain = S.getDomains();
  
      if (Domain.is_null())
-      return false;
+      return;
  
      isl::union_map Validity = D.getDependences(ValidityKinds);
      isl::union_map Proximity = D.getDependences(ProximityKinds);
@@ -847,7 +849,7 @@ static bool runIslScheduleOptimizer(
    // In cases the scheduler is not able to optimize the code, we just do not
    // touch the schedule.
    if (Schedule.is_null())
-    return false;
+    return;
  
    if (GreedyFusion) {
      isl::union_map Validity = D.getDependences(
@@ -858,10 +860,12 @@ static bool runIslScheduleOptimizer(
  
    // Apply post-rescheduling optimizations (if enabled) and/or prevectorization.
    const OptimizerAdditionalInfoTy OAI = {
-      TTI, const_cast<Dependences *>(&D),
+      TTI,
+      const_cast<Dependences *>(&D),
        /*PatternOpts=*/!HasUserTransformation && PMBasedOpts,
        /*Postopts=*/!HasUserTransformation && EnablePostopts,
-      /*Prevect=*/PollyVectorizerChoice != VECTORIZER_NONE};
+      /*Prevect=*/PollyVectorizerChoice != VECTORIZER_NONE,
+      DepsChanged};
    if (OAI.PatternOpts || OAI.Postopts || OAI.Prevect) {
      Schedule = ScheduleTreeOptimizer::optimizeSchedule(Schedule, &OAI);
      Schedule = hoistExtensionNodes(Schedule);
@@ -872,7 +876,7 @@ static bool runIslScheduleOptimizer(
    // Skip profitability check if user transformation(s) have been applied.
    if (!HasUserTransformation &&
        !ScheduleTreeOptimizer::isProfitableSchedule(S, Schedule))
-    return false;
+    return;
  
    auto ScopStats = S.getStatistics();
    ScopsOptimized++;
@@ -885,8 +889,6 @@ static bool runIslScheduleOptimizer(
  
    if (OptimizedScops)
      errs() << S;
-
-  return false;
  }
  
  bool IslScheduleOptimizerWrapperPass::runOnScop(Scop &S) {
@@ -904,7 +906,13 @@ bool IslScheduleOptimizerWrapperPass::runOnScop(Scop &S) {
        getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
    TargetTransformInfo *TTI =
        &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
-  return runIslScheduleOptimizer(S, getDependences, TTI, &ORE, LastSchedule);
+
+  bool DepsChanged = false;
+  runIslScheduleOptimizer(S, getDependences, TTI, &ORE, LastSchedule,
+                          DepsChanged);
+  if (DepsChanged)
+    getAnalysis<DependenceInfo>().abandonDependences();
+  return false;
  }
  
  static void runScheduleOptimizerPrinter(raw_ostream &OS,
@@ -971,22 +979,18 @@ runIslScheduleOptimizerUsingNPM(Scop &S, ScopAnalysisManager &SAM,
    OptimizationRemarkEmitter ORE(&S.getFunction());
    TargetTransformInfo *TTI = &SAR.TTI;
    isl::schedule LastSchedule;
-  bool Modified = runIslScheduleOptimizer(S, GetDeps, TTI, &ORE, LastSchedule);
+  bool DepsChanged = false;
+  runIslScheduleOptimizer(S, GetDeps, TTI, &ORE, LastSchedule, DepsChanged);
+  if (DepsChanged)
+    Deps.abandonDependences();
+
    if (OS) {
      *OS << "Printing analysis 'Polly - Optimize schedule of SCoP' for region: '"
          << S.getName() << "' in function '" << S.getFunction().getName()
          << "':\n";
      runScheduleOptimizerPrinter(*OS, LastSchedule);
    }
-
-  if (!Modified)
-    return PreservedAnalyses::all();
-
-  PreservedAnalyses PA;
-  PA.preserveSet<AllAnalysesOn<Module>>();
-  PA.preserveSet<AllAnalysesOn<Function>>();
-  PA.preserveSet<AllAnalysesOn<Loop>>();
-  return PA;
+  return PreservedAnalyses::all();
  }
  
  llvm::PreservedAnalyses
diff --git a/polly/test/CodeGen/OpenMP/matmul-parallel.ll b/polly/test/CodeGen/OpenMP/matmul-parallel.ll

index 4a9d9c8..29a0939 100644 (file)
--- a/polly/test/CodeGen/OpenMP/matmul-parallel.ll
+++ b/polly/test/CodeGen/OpenMP/matmul-parallel.ll
@@ -2,20 +2,10 @@
  ; RUN: opt %loadPolly -polly-parallel -polly-opt-isl -polly-codegen -S < %s | FileCheck --check-prefix=CODEGEN %s
  ; REQUIRES: asserts
  
-; Parellization of detected matrix-multiplication. The allocations
-; Packed_A and Packed_B must be passed to the outlined function.
-; llvm.org/PR43164
-;
-; #define N 1536
-; int foo(float A[N][N],float B[N][N],float C[N][N]) {
-;     for (int i = 0; i < N; i++) {
-;         for (int j = 0; j < N; j++) {
-;             for (int k = 0; k < N; k++)
-;                 C[i][j] = C[i][j] + A[i][k] * B[k][j];
-;         }
-;     }
-;     return 0;
-; }
+; Parallelization of detected matrix-multiplication.
+; Currently, this is not supported. Due to Packed_A/Packed_B not private
+; per-thread the outer loops cannot be parallelized and a
+; '#pragma omp parallel for' on an inner loop may impose too much overhead.
  
  target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
  target triple = "x86_64-pc-windows-msvc19.16.27034"
@@ -65,8 +55,6 @@ for.body8:
  }
  
  
-; AST: #pragma omp parallel for
+; AST-NOT: parallel
  
-; CODGEN-LABEL: define internal void @init_array_polly_subfn(i8* %polly.par.userContext)
-; CODEGEN: %polly.subfunc.arg.Packed_A = load
-; CODEGEN: %polly.subfunc.arg.Packed_B = load
+; CODEGEN-NOT: subfunc
diff --git a/polly/test/ScheduleOptimizer/pattern-matching-based-opts.ll b/polly/test/ScheduleOptimizer/pattern-matching-based-opts.ll

index 90d8cd8..69f43d1 100644 (file)
--- a/polly/test/ScheduleOptimizer/pattern-matching-based-opts.ll
+++ b/polly/test/ScheduleOptimizer/pattern-matching-based-opts.ll
@@ -15,8 +15,7 @@
  ;
  ; CHECK-NOT: The matrix multiplication pattern was detected
  ; PATTERN-MATCHING-OPTS: The matrix multiplication pattern was detected
-; PARALLEL-AST: #pragma known-parallel
-; PARALLEL-AST: #pragma known-parallel
+; PARALLEL-AST-NOT: #pragma known-parallel
  ; STATS:  1 polly-opt-isl    - Number of matrix multiplication patterns detected and optimized
  ;
  target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
author	Michael Kruse <llvm-project@meinersbur.de>
	Wed, 29 Jun 2022 21:44:57 +0000 (16:44 -0500)
committer	Michael Kruse <llvm-project@meinersbur.de>
	Wed, 29 Jun 2022 22:20:05 +0000 (17:20 -0500)
polly/include/polly/DependenceInfo.h		patch \| blob \| history
polly/lib/Analysis/DependenceInfo.cpp		patch \| blob \| history
polly/lib/Transform/MatmulOptimizer.cpp		patch \| blob \| history
polly/lib/Transform/ScheduleOptimizer.cpp		patch \| blob \| history
polly/test/CodeGen/OpenMP/matmul-parallel.ll		patch \| blob \| history
polly/test/ScheduleOptimizer/pattern-matching-based-opts.ll		patch \| blob \| history