From 7c888dca46efae9b265da50509fc2b985d40eca3 Mon Sep 17 00:00:00 2001
From: Chandler Carruth <chandlerc@gmail.com>
Date: Tue, 8 Aug 2017 02:24:20 +0000
Subject: [PATCH] [PM] Fix new LoopUnroll function pass by invalidating loop
 analysis results when a loop is completely removed.

This is very hard to manifest as a visible bug. You need to arrange for
there to be a subsequent allocation of a 'Loop' object which gets the
exact same address as the one which the unroll deleted, and you need the
LoopAccessAnalysis results to be significant in the way that they're
stale. And you need a million other things to align.

But when it does, you get a deeply mysterious crash due to actually
finding a stale analysis result. This fixes the issue and tests for it
by directly checking we successfully invalidate things. I have not been
able to get *any* test case to reliably trigger this. Changes to LLVM
itself caused the only test case I ever had to cease to crash.

I've looked pretty extensively at less brittle ways of fixing this and
they are actually very, very hard to do. This is a somewhat strange and
unusual case as we have a pass which is deleting an IR unit, but is not
running within that IR unit's pass framework (which is what handles this
cleanly for the normal loop unroll). And where there isn't a definitive
way to clear *all* of the stale cache entries. And where the pass *is*
updating the core analysis that provides the IR units!

For example, we don't have any of these problems with Function analyses
because it is easy to clear out function analyses when the functions
themselves may have been deleted -- we clear an entire module's worth!
But that is too heavy of a hammer down here in the LoopAnalysisManager
layer.

A better long-term solution IMO is to require that AnalysisManager's
make their keys durable to this kind of thing. Specifically, when
caching an analysis for one IR unit that is conceptually "owned" by
a higher level IR unit, the AnalysisManager should incorporate this into
its data structures so that we can reliably clear these results without
having to teach each and every pass to do so manually as we do here. But
that is a change for another day as it will be a fairly invasive change
to the AnalysisManager infrastructure. Until then, this fortunately
seems to be quite rare.

llvm-svn: 310333
---
 llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp      |  20 +++-
 .../LoopUnroll/unroll-loop-invalidation.ll         | 107 +++++++++++++++++++++
 2 files changed, 125 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/Transforms/LoopUnroll/unroll-loop-invalidation.ll

diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index b692c08..a6d7849 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -1253,6 +1253,10 @@ PreservedAnalyses LoopUnrollPass::run(Function &F,
   auto &AC = AM.getResult<AssumptionAnalysis>(F);
   auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
 
+  LoopAnalysisManager *LAM = nullptr;
+  if (auto *LAMProxy = AM.getCachedResult<LoopAnalysisManagerFunctionProxy>(F))
+    LAM = &LAMProxy->getManager();
+
   const ModuleAnalysisManager &MAM =
       AM.getResult<ModuleAnalysisManagerFunctionProxy>(F).getManager();
   ProfileSummaryInfo *PSI =
@@ -1278,9 +1282,7 @@ PreservedAnalyses LoopUnrollPass::run(Function &F,
     // for unrolling is only needed to get optimization remarks emitted in
     // a forward order.
     Loop &L = *Worklist.pop_back_val();
-#ifndef NDEBUG
     Loop *ParentL = L.getParentLoop();
-#endif
 
     // The API here is quite complex to call, but there are only two interesting
     // states we support: partial and full (or "simple") unrolling. However, to
@@ -1305,6 +1307,20 @@ PreservedAnalyses LoopUnrollPass::run(Function &F,
     if (CurChanged && ParentL)
       ParentL->verifyLoop();
 #endif
+
+    // Walk the parent or top-level loops after unrolling to check whether we
+    // actually removed a loop, and if so clear any cached analysis results for
+    // it. We have to do this immediately as the next unrolling could allocate
+    // a new loop object that ends up with the same address as the deleted loop
+    // object causing cache collisions.
+    if (LAM) {
+      bool IsCurLoopValid =
+          ParentL
+              ? llvm::any_of(*ParentL, [&](Loop *SibL) { return SibL == &L; })
+              : llvm::any_of(LI, [&](Loop *SibL) { return SibL == &L; });
+      if (!IsCurLoopValid)
+        LAM->clear(L);
+    }
   }
 
   if (!Changed)
diff --git a/llvm/test/Transforms/LoopUnroll/unroll-loop-invalidation.ll b/llvm/test/Transforms/LoopUnroll/unroll-loop-invalidation.ll
new file mode 100644
index 0000000..af4d54c
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/unroll-loop-invalidation.ll
@@ -0,0 +1,107 @@
+; This test exercises that we don't corrupt a loop-analysis when running loop
+; unrolling in a way that deletes a loop. To do that, we first ensure the
+; analysis is cached, then unroll the loop (deleting it) and make sure that the
+; next function doesn't get a cache "hit" for this stale analysis result.
+;
+; RUN: opt -S -passes='loop(require<access-info>),unroll,loop(print-access-info)' -debug-pass-manager < %s 2>&1 | FileCheck %s
+;
+; CHECK: Starting llvm::Function pass manager run.
+; CHECK: Running pass: FunctionToLoopPassAdaptor
+; CHECK: Running analysis: LoopAnalysis
+; CHECK: Running analysis: InnerAnalysisManagerProxy<{{.*}}LoopAnalysisManager
+; CHECK: Starting Loop pass manager run.
+; CHECK: Running pass: RequireAnalysisPass<{{.*}}LoopAccessAnalysis
+; CHECK: Running analysis: LoopAccessAnalysis on inner1.header
+; CHECK: Finished Loop pass manager run.
+; CHECK: Starting Loop pass manager run.
+; CHECK: Running pass: RequireAnalysisPass<{{.*}}LoopAccessAnalysis
+; CHECK: Running analysis: LoopAccessAnalysis on inner2.header
+; CHECK: Finished Loop pass manager run.
+; CHECK: Starting Loop pass manager run.
+; CHECK: Running pass: RequireAnalysisPass<{{.*}}LoopAccessAnalysis
+; CHECK: Running analysis: LoopAccessAnalysis on outer.header
+; CHECK: Finished Loop pass manager run.
+; CHECK: Running pass: LoopUnrollPass
+; CHECK: Clearing all analysis results for: inner2.header
+; CHECK: Clearing all analysis results for: outer.header
+; CHECK: Invalidating all non-preserved analyses for: test
+; CHECK: Invalidating all non-preserved analyses for: inner1.header
+; CHECK: Invalidating analysis: LoopAccessAnalysis on inner1.header
+; CHECK: Invalidating all non-preserved analyses for: inner1.header.1
+; CHECK-NOT: Invalidating analysis: LoopAccessAnalysis on inner1.header.1
+; CHECK: Running pass: FunctionToLoopPassAdaptor
+; CHECK: Starting Loop pass manager run.
+; CHECK: Running pass: LoopAccessInfoPrinterPass
+; CHECK: Running analysis: LoopAccessAnalysis on inner1.header
+; CHECK: Loop access info in function 'test':
+; CHECK:   inner1.header:
+; CHECK: Finished Loop pass manager run.
+; CHECK: Starting Loop pass manager run.
+; CHECK: Running pass: LoopAccessInfoPrinterPass
+; CHECK: Running analysis: LoopAccessAnalysis on inner1.header.1
+; CHECK: Loop access info in function 'test':
+; CHECK:   inner1.header.1:
+; CHECK: Finished Loop pass manager run.
+
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @test(i32 %inner1.count) {
+; CHECK-LABEL: define void @test(
+bb:
+  br label %outer.ph
+
+outer.ph:
+  br label %outer.header
+
+outer.header:
+  %outer.i = phi i32 [ 0, %outer.ph ], [ %outer.i.next, %outer.latch ]
+  br label %inner1.ph
+
+inner1.ph:
+  br label %inner1.header
+
+inner1.header:
+  %inner1.i = phi i32 [ 0, %inner1.ph ], [ %inner1.i.next, %inner1.header ]
+  %inner1.i.next = add i32 %inner1.i, 1
+  %inner1.cond = icmp eq i32 %inner1.i, %inner1.count
+  br i1 %inner1.cond, label %inner1.exit, label %inner1.header
+; We should have two unrolled copies of this loop and nothing else.
+;
+; CHECK-NOT:     icmp eq
+; CHECK-NOT:     br i1
+; CHECK:         %[[COND1:.*]] = icmp eq i32 %{{.*}}, %inner1.count
+; CHECK:         br i1 %[[COND1]],
+; CHECK-NOT:     icmp eq
+; CHECK-NOT:     br i1
+; CHECK:         %[[COND2:.*]] = icmp eq i32 %{{.*}}, %inner1.count
+; CHECK:         br i1 %[[COND2]],
+; CHECK-NOT:     icmp eq
+; CHECK-NOT:     br i1
+
+
+inner1.exit:
+  br label %inner2.ph
+
+inner2.ph:
+  br label %inner2.header
+
+inner2.header:
+  %inner2.i = phi i32 [ 0, %inner2.ph ], [ %inner2.i.next, %inner2.header ]
+  %inner2.i.next = add i32 %inner2.i, 1
+  %inner2.cond = icmp eq i32 %inner2.i, 4
+  br i1 %inner2.cond, label %inner2.exit, label %inner2.header
+
+inner2.exit:
+  br label %outer.latch
+
+outer.latch:
+  %outer.i.next = add i32 %outer.i, 1
+  %outer.cond = icmp eq i32 %outer.i.next, 2
+  br i1 %outer.cond, label %outer.exit, label %outer.header
+
+outer.exit:
+  br label %exit
+
+exit:
+  ret void
+}
-- 
2.7.4