From 7c888dca46efae9b265da50509fc2b985d40eca3 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Tue, 8 Aug 2017 02:24:20 +0000 Subject: [PATCH] [PM] Fix new LoopUnroll function pass by invalidating loop analysis results when a loop is completely removed. This is very hard to manifest as a visible bug. You need to arrange for there to be a subsequent allocation of a 'Loop' object which gets the exact same address as the one which the unroll deleted, and you need the LoopAccessAnalysis results to be significant in the way that they're stale. And you need a million other things to align. But when it does, you get a deeply mysterious crash due to actually finding a stale analysis result. This fixes the issue and tests for it by directly checking we successfully invalidate things. I have not been able to get *any* test case to reliably trigger this. Changes to LLVM itself caused the only test case I ever had to cease to crash. I've looked pretty extensively at less brittle ways of fixing this and they are actually very, very hard to do. This is a somewhat strange and unusual case as we have a pass which is deleting an IR unit, but is not running within that IR unit's pass framework (which is what handles this cleanly for the normal loop unroll). And where there isn't a definitive way to clear *all* of the stale cache entries. And where the pass *is* updating the core analysis that provides the IR units! For example, we don't have any of these problems with Function analyses because it is easy to clear out function analyses when the functions themselves may have been deleted -- we clear an entire module's worth! But that is too heavy of a hammer down here in the LoopAnalysisManager layer. A better long-term solution IMO is to require that AnalysisManager's make their keys durable to this kind of thing. Specifically, when caching an analysis for one IR unit that is conceptually "owned" by a higher level IR unit, the AnalysisManager should incorporate this into its data structures so that we can reliably clear these results without having to teach each and every pass to do so manually as we do here. But that is a change for another day as it will be a fairly invasive change to the AnalysisManager infrastructure. Until then, this fortunately seems to be quite rare. llvm-svn: 310333 --- llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp | 20 +++- .../LoopUnroll/unroll-loop-invalidation.ll | 107 +++++++++++++++++++++ 2 files changed, 125 insertions(+), 2 deletions(-) create mode 100644 llvm/test/Transforms/LoopUnroll/unroll-loop-invalidation.ll diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp index b692c08..a6d7849 100644 --- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -1253,6 +1253,10 @@ PreservedAnalyses LoopUnrollPass::run(Function &F, auto &AC = AM.getResult(F); auto &ORE = AM.getResult(F); + LoopAnalysisManager *LAM = nullptr; + if (auto *LAMProxy = AM.getCachedResult(F)) + LAM = &LAMProxy->getManager(); + const ModuleAnalysisManager &MAM = AM.getResult(F).getManager(); ProfileSummaryInfo *PSI = @@ -1278,9 +1282,7 @@ PreservedAnalyses LoopUnrollPass::run(Function &F, // for unrolling is only needed to get optimization remarks emitted in // a forward order. Loop &L = *Worklist.pop_back_val(); -#ifndef NDEBUG Loop *ParentL = L.getParentLoop(); -#endif // The API here is quite complex to call, but there are only two interesting // states we support: partial and full (or "simple") unrolling. However, to @@ -1305,6 +1307,20 @@ PreservedAnalyses LoopUnrollPass::run(Function &F, if (CurChanged && ParentL) ParentL->verifyLoop(); #endif + + // Walk the parent or top-level loops after unrolling to check whether we + // actually removed a loop, and if so clear any cached analysis results for + // it. We have to do this immediately as the next unrolling could allocate + // a new loop object that ends up with the same address as the deleted loop + // object causing cache collisions. + if (LAM) { + bool IsCurLoopValid = + ParentL + ? llvm::any_of(*ParentL, [&](Loop *SibL) { return SibL == &L; }) + : llvm::any_of(LI, [&](Loop *SibL) { return SibL == &L; }); + if (!IsCurLoopValid) + LAM->clear(L); + } } if (!Changed) diff --git a/llvm/test/Transforms/LoopUnroll/unroll-loop-invalidation.ll b/llvm/test/Transforms/LoopUnroll/unroll-loop-invalidation.ll new file mode 100644 index 0000000..af4d54c --- /dev/null +++ b/llvm/test/Transforms/LoopUnroll/unroll-loop-invalidation.ll @@ -0,0 +1,107 @@ +; This test exercises that we don't corrupt a loop-analysis when running loop +; unrolling in a way that deletes a loop. To do that, we first ensure the +; analysis is cached, then unroll the loop (deleting it) and make sure that the +; next function doesn't get a cache "hit" for this stale analysis result. +; +; RUN: opt -S -passes='loop(require),unroll,loop(print-access-info)' -debug-pass-manager < %s 2>&1 | FileCheck %s +; +; CHECK: Starting llvm::Function pass manager run. +; CHECK: Running pass: FunctionToLoopPassAdaptor +; CHECK: Running analysis: LoopAnalysis +; CHECK: Running analysis: InnerAnalysisManagerProxy<{{.*}}LoopAnalysisManager +; CHECK: Starting Loop pass manager run. +; CHECK: Running pass: RequireAnalysisPass<{{.*}}LoopAccessAnalysis +; CHECK: Running analysis: LoopAccessAnalysis on inner1.header +; CHECK: Finished Loop pass manager run. +; CHECK: Starting Loop pass manager run. +; CHECK: Running pass: RequireAnalysisPass<{{.*}}LoopAccessAnalysis +; CHECK: Running analysis: LoopAccessAnalysis on inner2.header +; CHECK: Finished Loop pass manager run. +; CHECK: Starting Loop pass manager run. +; CHECK: Running pass: RequireAnalysisPass<{{.*}}LoopAccessAnalysis +; CHECK: Running analysis: LoopAccessAnalysis on outer.header +; CHECK: Finished Loop pass manager run. +; CHECK: Running pass: LoopUnrollPass +; CHECK: Clearing all analysis results for: inner2.header +; CHECK: Clearing all analysis results for: outer.header +; CHECK: Invalidating all non-preserved analyses for: test +; CHECK: Invalidating all non-preserved analyses for: inner1.header +; CHECK: Invalidating analysis: LoopAccessAnalysis on inner1.header +; CHECK: Invalidating all non-preserved analyses for: inner1.header.1 +; CHECK-NOT: Invalidating analysis: LoopAccessAnalysis on inner1.header.1 +; CHECK: Running pass: FunctionToLoopPassAdaptor +; CHECK: Starting Loop pass manager run. +; CHECK: Running pass: LoopAccessInfoPrinterPass +; CHECK: Running analysis: LoopAccessAnalysis on inner1.header +; CHECK: Loop access info in function 'test': +; CHECK: inner1.header: +; CHECK: Finished Loop pass manager run. +; CHECK: Starting Loop pass manager run. +; CHECK: Running pass: LoopAccessInfoPrinterPass +; CHECK: Running analysis: LoopAccessAnalysis on inner1.header.1 +; CHECK: Loop access info in function 'test': +; CHECK: inner1.header.1: +; CHECK: Finished Loop pass manager run. + +target triple = "x86_64-unknown-linux-gnu" + +define void @test(i32 %inner1.count) { +; CHECK-LABEL: define void @test( +bb: + br label %outer.ph + +outer.ph: + br label %outer.header + +outer.header: + %outer.i = phi i32 [ 0, %outer.ph ], [ %outer.i.next, %outer.latch ] + br label %inner1.ph + +inner1.ph: + br label %inner1.header + +inner1.header: + %inner1.i = phi i32 [ 0, %inner1.ph ], [ %inner1.i.next, %inner1.header ] + %inner1.i.next = add i32 %inner1.i, 1 + %inner1.cond = icmp eq i32 %inner1.i, %inner1.count + br i1 %inner1.cond, label %inner1.exit, label %inner1.header +; We should have two unrolled copies of this loop and nothing else. +; +; CHECK-NOT: icmp eq +; CHECK-NOT: br i1 +; CHECK: %[[COND1:.*]] = icmp eq i32 %{{.*}}, %inner1.count +; CHECK: br i1 %[[COND1]], +; CHECK-NOT: icmp eq +; CHECK-NOT: br i1 +; CHECK: %[[COND2:.*]] = icmp eq i32 %{{.*}}, %inner1.count +; CHECK: br i1 %[[COND2]], +; CHECK-NOT: icmp eq +; CHECK-NOT: br i1 + + +inner1.exit: + br label %inner2.ph + +inner2.ph: + br label %inner2.header + +inner2.header: + %inner2.i = phi i32 [ 0, %inner2.ph ], [ %inner2.i.next, %inner2.header ] + %inner2.i.next = add i32 %inner2.i, 1 + %inner2.cond = icmp eq i32 %inner2.i, 4 + br i1 %inner2.cond, label %inner2.exit, label %inner2.header + +inner2.exit: + br label %outer.latch + +outer.latch: + %outer.i.next = add i32 %outer.i, 1 + %outer.cond = icmp eq i32 %outer.i.next, 2 + br i1 %outer.cond, label %outer.exit, label %outer.header + +outer.exit: + br label %exit + +exit: + ret void +} -- 2.7.4