/// Recompute dependences from schedule and memory accesses.
const Dependences &recomputeDependences(Dependences::AnalysisLevel Level);
+
+ /// Invalidate the dependence information and recompute it when needed
+ /// again.
+ /// May be required when the underlaying Scop was changed in a way that
+ /// would add new dependencies (e.g. between new statement instances
+ /// insierted into the SCoP) or intentionally breaks existing ones. It is
+ /// not required when updating the schedule that conforms the existing
+ /// dependencies.
+ void abandonDependences();
};
Result run(Scop &S, ScopAnalysisManager &SAM,
ScopStandardAnalysisResults &SAR);
/// Recompute dependences from schedule and memory accesses.
const Dependences &recomputeDependences(Dependences::AnalysisLevel Level);
+ /// Invalidate the dependence information and recompute it when needed again.
+ /// May be required when the underlaying Scop was changed in a way that would
+ /// add new dependencies (e.g. between new statement instances insierted into
+ /// the SCoP) or intentionally breaks existing ones. It is not required when
+ /// updating the schedule that conforms the existing dependencies.
+ void abandonDependences();
+
/// Compute the dependence information for the SCoP @p S.
bool runOnScop(Scop &S) override;
bool PatternOpts;
bool Postopts;
bool Prevect;
+ bool &DepsChanged;
};
class ScheduleTreeOptimizer final {
tryOptimizeMatMulPattern(Node, OAI->TTI, OAI->D);
if (!PatternOptimizedSchedule.is_null()) {
MatMulOpts++;
+ OAI->DepsChanged = true;
return PatternOptimizedSchedule.release();
}
}
&Version);
}
-static bool runIslScheduleOptimizer(
+static void runIslScheduleOptimizer(
Scop &S,
function_ref<const Dependences &(Dependences::AnalysisLevel)> GetDeps,
TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE,
- isl::schedule &LastSchedule) {
+ isl::schedule &LastSchedule, bool &DepsChanged) {
// Skip SCoPs in case they're already optimised by PPCGCodeGeneration
if (S.isToBeSkipped())
- return false;
+ return;
// Skip empty SCoPs but still allow code generation as it will delete the
// loops present but not needed.
if (S.getSize() == 0) {
S.markAsOptimized();
- return false;
+ return;
}
ScopsProcessed++;
&S, Schedule, GetDeps(Dependences::AL_Statement), ORE);
if (ManuallyTransformed.is_null()) {
LLVM_DEBUG(dbgs() << "Error during manual optimization\n");
- return false;
+ return;
}
if (ManuallyTransformed.get() != Schedule.get()) {
// metadata earlier in ScopDetection.
if (!HasUserTransformation && S.hasDisableHeuristicsHint()) {
LLVM_DEBUG(dbgs() << "Heuristic optimizations disabled by metadata\n");
- return false;
+ return;
}
// Get dependency analysis.
const Dependences &D = GetDeps(Dependences::AL_Statement);
if (D.getSharedIslCtx() != S.getSharedIslCtx()) {
LLVM_DEBUG(dbgs() << "DependenceInfo for another SCoP/isl_ctx\n");
- return false;
+ return;
}
if (!D.hasValidDependences()) {
LLVM_DEBUG(dbgs() << "Dependency information not available\n");
- return false;
+ return;
}
// Apply ISL's algorithm only if not overriden by the user. Note that
isl::union_set Domain = S.getDomains();
if (Domain.is_null())
- return false;
+ return;
isl::union_map Validity = D.getDependences(ValidityKinds);
isl::union_map Proximity = D.getDependences(ProximityKinds);
// In cases the scheduler is not able to optimize the code, we just do not
// touch the schedule.
if (Schedule.is_null())
- return false;
+ return;
if (GreedyFusion) {
isl::union_map Validity = D.getDependences(
// Apply post-rescheduling optimizations (if enabled) and/or prevectorization.
const OptimizerAdditionalInfoTy OAI = {
- TTI, const_cast<Dependences *>(&D),
+ TTI,
+ const_cast<Dependences *>(&D),
/*PatternOpts=*/!HasUserTransformation && PMBasedOpts,
/*Postopts=*/!HasUserTransformation && EnablePostopts,
- /*Prevect=*/PollyVectorizerChoice != VECTORIZER_NONE};
+ /*Prevect=*/PollyVectorizerChoice != VECTORIZER_NONE,
+ DepsChanged};
if (OAI.PatternOpts || OAI.Postopts || OAI.Prevect) {
Schedule = ScheduleTreeOptimizer::optimizeSchedule(Schedule, &OAI);
Schedule = hoistExtensionNodes(Schedule);
// Skip profitability check if user transformation(s) have been applied.
if (!HasUserTransformation &&
!ScheduleTreeOptimizer::isProfitableSchedule(S, Schedule))
- return false;
+ return;
auto ScopStats = S.getStatistics();
ScopsOptimized++;
if (OptimizedScops)
errs() << S;
-
- return false;
}
bool IslScheduleOptimizerWrapperPass::runOnScop(Scop &S) {
getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
TargetTransformInfo *TTI =
&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- return runIslScheduleOptimizer(S, getDependences, TTI, &ORE, LastSchedule);
+
+ bool DepsChanged = false;
+ runIslScheduleOptimizer(S, getDependences, TTI, &ORE, LastSchedule,
+ DepsChanged);
+ if (DepsChanged)
+ getAnalysis<DependenceInfo>().abandonDependences();
+ return false;
}
static void runScheduleOptimizerPrinter(raw_ostream &OS,
OptimizationRemarkEmitter ORE(&S.getFunction());
TargetTransformInfo *TTI = &SAR.TTI;
isl::schedule LastSchedule;
- bool Modified = runIslScheduleOptimizer(S, GetDeps, TTI, &ORE, LastSchedule);
+ bool DepsChanged = false;
+ runIslScheduleOptimizer(S, GetDeps, TTI, &ORE, LastSchedule, DepsChanged);
+ if (DepsChanged)
+ Deps.abandonDependences();
+
if (OS) {
*OS << "Printing analysis 'Polly - Optimize schedule of SCoP' for region: '"
<< S.getName() << "' in function '" << S.getFunction().getName()
<< "':\n";
runScheduleOptimizerPrinter(*OS, LastSchedule);
}
-
- if (!Modified)
- return PreservedAnalyses::all();
-
- PreservedAnalyses PA;
- PA.preserveSet<AllAnalysesOn<Module>>();
- PA.preserveSet<AllAnalysesOn<Function>>();
- PA.preserveSet<AllAnalysesOn<Loop>>();
- return PA;
+ return PreservedAnalyses::all();
}
llvm::PreservedAnalyses
; RUN: opt %loadPolly -polly-parallel -polly-opt-isl -polly-codegen -S < %s | FileCheck --check-prefix=CODEGEN %s
; REQUIRES: asserts
-; Parellization of detected matrix-multiplication. The allocations
-; Packed_A and Packed_B must be passed to the outlined function.
-; llvm.org/PR43164
-;
-; #define N 1536
-; int foo(float A[N][N],float B[N][N],float C[N][N]) {
-; for (int i = 0; i < N; i++) {
-; for (int j = 0; j < N; j++) {
-; for (int k = 0; k < N; k++)
-; C[i][j] = C[i][j] + A[i][k] * B[k][j];
-; }
-; }
-; return 0;
-; }
+; Parallelization of detected matrix-multiplication.
+; Currently, this is not supported. Due to Packed_A/Packed_B not private
+; per-thread the outer loops cannot be parallelized and a
+; '#pragma omp parallel for' on an inner loop may impose too much overhead.
target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-windows-msvc19.16.27034"
}
-; AST: #pragma omp parallel for
+; AST-NOT: parallel
-; CODGEN-LABEL: define internal void @init_array_polly_subfn(i8* %polly.par.userContext)
-; CODEGEN: %polly.subfunc.arg.Packed_A = load
-; CODEGEN: %polly.subfunc.arg.Packed_B = load
+; CODEGEN-NOT: subfunc