From 1f621f0a70e307455b000118838df2f155c5ea50 Mon Sep 17 00:00:00 2001
From: Chandler Carruth <chandlerc@gmail.com>
Date: Sun, 4 Sep 2016 08:34:24 +0000
Subject: [PATCH] [LCG] A NFC refactoring to extract the logic for doing a
 postorder-sequence based update after edge insertion into a generic helper
 function.

This separates the SCC-specific logic into two fairly simple lambdas and
extracts the rest into a generic helper template function. I think this
is a net win on its own merits because it disentangles different pieces
of the algorithm. Now there is one place that does the two-step
partition to identify a set of newly connected components and at the
same time update the postorder sequence.

However, I'm also hoping to re-use this an upcoming patch to update
a cached post-order sequence of RefSCCs when doing the analogous update
to the RefSCC graph, and I don't want to have two copies.

The diff is quite messy but this really is just moving things around and
making types generic rather than specific.

llvm-svn: 280618
---
 llvm/lib/Analysis/LazyCallGraph.cpp | 295 ++++++++++++++++++++++--------------
 1 file changed, 184 insertions(+), 111 deletions(-)

diff --git a/llvm/lib/Analysis/LazyCallGraph.cpp b/llvm/lib/Analysis/LazyCallGraph.cpp
index c4230eb..2e78fca 100644
--- a/llvm/lib/Analysis/LazyCallGraph.cpp
+++ b/llvm/lib/Analysis/LazyCallGraph.cpp
@@ -251,6 +251,141 @@ bool LazyCallGraph::RefSCC::isDescendantOf(const RefSCC &C) const {
   return false;
 }
 
+/// Generic helper that updates a postorder sequence of SCCs for a potentially
+/// cycle-introducing edge insertion.
+///
+/// A postorder sequence of SCCs of a directed graph has one fundamental
+/// property: all deges in the DAG of SCCs point "up" the sequence. That is,
+/// all edges in the SCC DAG point to prior SCCs in the sequence.
+///
+/// This routine both updates a postorder sequence and uses that sequence to
+/// compute the set of SCCs connected into a cycle. It should only be called to
+/// insert a "downward" edge which will require changing the sequence to
+/// restore it to a postorder.
+///
+/// When inserting an edge from an earlier SCC to a later SCC in some postorder
+/// sequence, all of the SCCs which may be impacted are in the closed range of
+/// those two within the postorder sequence. The algorithm used here to restore
+/// the state is as follows:
+///
+/// 1) Starting from the source SCC, construct a set of SCCs which reach the
+///    source SCC consisting of just the source SCC. Then scan toward the
+///    target SCC in postorder and for each SCC, if it has an edge to an SCC
+///    in the set, add it to the set. Otherwise, the source SCC is not
+///    a successor, move it in the postorder sequence to immediately before
+///    the source SCC, shifting the source SCC and all SCCs in the set one
+///    position toward the target SCC. Stop scanning after processing the
+///    target SCC.
+/// 2) If the source SCC is now past the target SCC in the postorder sequence,
+///    and thus the new edge will flow toward the start, we are done.
+/// 3) Otherwise, starting from the target SCC, walk all edges which reach an
+///    SCC between the source and the target, and add them to the set of
+///    connected SCCs, then recurse through them. Once a complete set of the
+///    SCCs the target connects to is known, hoist the remaining SCCs between
+///    the source and the target to be above the target. Note that there is no
+///    need to process the source SCC, it is already known to connect.
+/// 4) At this point, all of the SCCs in the closed range between the source
+///    SCC and the target SCC in the postorder sequence are connected,
+///    including the target SCC and the source SCC. Inserting the edge from
+///    the source SCC to the target SCC will form a cycle out of precisely
+///    these SCCs. Thus we can merge all of the SCCs in this closed range into
+///    a single SCC.
+///
+/// This process has various important properties:
+/// - Only mutates the SCCs when adding the edge actually changes the SCC
+///   structure.
+/// - Never mutates SCCs which are unaffected by the change.
+/// - Updates the postorder sequence to correctly satisfy the postorder
+///   constraint after the edge is inserted.
+/// - Only reorders SCCs in the closed postorder sequence from the source to
+///   the target, so easy to bound how much has changed even in the ordering.
+/// - Big-O is the number of edges in the closed postorder range of SCCs from
+///   source to target.
+///
+/// This helper routine, in addition to updating the postorder sequence itself
+/// will also update a map from SCCs to indices within that sequecne.
+///
+/// The sequence and the map must operate on pointers to the SCC type.
+///
+/// Two callbacks must be provided. The first computes the subset of SCCs in
+/// the postorder closed range from the source to the target which connect to
+/// the source SCC via some (transitive) set of edges. The second computes the
+/// subset of the same range which the target SCC connects to via some
+/// (transitive) set of edges. Both callbacks should populate the set argument
+/// provided.
+template <typename SCCT, typename PostorderSequenceT, typename SCCIndexMapT,
+          typename ComputeSourceConnectedSetCallableT,
+          typename ComputeTargetConnectedSetCallableT>
+static iterator_range<typename PostorderSequenceT::iterator>
+updatePostorderSequenceForEdgeInsertion(
+    SCCT &SourceSCC, SCCT &TargetSCC, PostorderSequenceT &SCCs,
+    SCCIndexMapT &SCCIndices,
+    ComputeSourceConnectedSetCallableT ComputeSourceConnectedSet,
+    ComputeTargetConnectedSetCallableT ComputeTargetConnectedSet) {
+  int SourceIdx = SCCIndices[&SourceSCC];
+  int TargetIdx = SCCIndices[&TargetSCC];
+  assert(SourceIdx < TargetIdx && "Cannot have equal indices here!");
+
+  SmallPtrSet<SCCT *, 4> ConnectedSet;
+
+  // Compute the SCCs which (transitively) reach the source.
+  ComputeSourceConnectedSet(ConnectedSet);
+
+  // Partition the SCCs in this part of the port-order sequence so only SCCs
+  // connecting to the source remain between it and the target. This is
+  // a benign partition as it preserves postorder.
+  auto SourceI = std::stable_partition(
+      SCCs.begin() + SourceIdx, SCCs.begin() + TargetIdx + 1,
+      [&ConnectedSet](SCCT *C) { return !ConnectedSet.count(C); });
+  for (int i = SourceIdx, e = TargetIdx + 1; i < e; ++i)
+    SCCIndices.find(SCCs[i])->second = i;
+
+  // If the target doesn't connect to the source, then we've corrected the
+  // post-order and there are no cycles formed.
+  if (!ConnectedSet.count(&TargetSCC)) {
+    assert(SourceI > (SCCs.begin() + SourceIdx) &&
+           "Must have moved the source to fix the post-order.");
+    assert(*std::prev(SourceI) == &TargetSCC &&
+           "Last SCC to move should have bene the target.");
+
+    // Return an empty range at the target SCC indicating there is nothing to
+    // merge.
+    return make_range(std::prev(SourceI), std::prev(SourceI));
+  }
+
+  assert(SCCs[TargetIdx] == &TargetSCC &&
+         "Should not have moved target if connected!");
+  SourceIdx = SourceI - SCCs.begin();
+  assert(SCCs[SourceIdx] == &SourceSCC &&
+         "Bad updated index computation for the source SCC!");
+
+
+  // See whether there are any remaining intervening SCCs between the source
+  // and target. If so we need to make sure they all are reachable form the
+  // target.
+  if (SourceIdx + 1 < TargetIdx) {
+    ConnectedSet.clear();
+    ComputeTargetConnectedSet(ConnectedSet);
+
+    // Partition SCCs so that only SCCs reached from the target remain between
+    // the source and the target. This preserves postorder.
+    auto TargetI = std::stable_partition(
+        SCCs.begin() + SourceIdx + 1, SCCs.begin() + TargetIdx + 1,
+        [&ConnectedSet](SCCT *C) { return ConnectedSet.count(C); });
+    for (int i = SourceIdx + 1, e = TargetIdx + 1; i < e; ++i)
+      SCCIndices.find(SCCs[i])->second = i;
+    TargetIdx = std::prev(TargetI) - SCCs.begin();
+    assert(SCCs[TargetIdx] == &TargetSCC &&
+           "Should always end with the target!");
+  }
+
+  // At this point, we know that connecting source to target forms a cycle
+  // because target connects back to source, and we know that all of the SCCs
+  // between the source and target in the postorder sequence participate in that
+  // cycle.
+  return make_range(SCCs.begin() + SourceIdx, SCCs.begin() + TargetIdx);
+}
+
 SmallVector<LazyCallGraph::SCC *, 1>
 LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) {
   assert(!SourceN[TargetN].isCall() && "Must start with a ref edge!");
@@ -288,107 +423,41 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) {
     return DeletedSCCs;
   }
 
-  // When we do have an edge from an earlier SCC to a later SCC in the
-  // postorder sequence, all of the SCCs which may be impacted are in the
-  // closed range of those two within the postorder sequence. The algorithm to
-  // restore the state is as follows:
-  //
-  // 1) Starting from the source SCC, construct a set of SCCs which reach the
-  //    source SCC consisting of just the source SCC. Then scan toward the
-  //    target SCC in postorder and for each SCC, if it has an edge to an SCC
-  //    in the set, add it to the set. Otherwise, the source SCC is not
-  //    a successor, move it in the postorder sequence to immediately before
-  //    the source SCC, shifting the source SCC and all SCCs in the set one
-  //    position toward the target SCC. Stop scanning after processing the
-  //    target SCC.
-  // 2) If the source SCC is now past the target SCC in the postorder sequence,
-  //    and thus the new edge will flow toward the start, we are done.
-  // 3) Otherwise, starting from the target SCC, walk all edges which reach an
-  //    SCC between the source and the target, and add them to the set of
-  //    connected SCCs, then recurse through them. Once a complete set of the
-  //    SCCs the target connects to is known, hoist the remaining SCCs between
-  //    the source and the target to be above the target. Note that there is no
-  //    need to process the source SCC, it is already known to connect.
-  // 4) At this point, all of the SCCs in the closed range between the source
-  //    SCC and the target SCC in the postorder sequence are connected,
-  //    including the target SCC and the source SCC. Inserting the edge from
-  //    the source SCC to the target SCC will form a cycle out of precisely
-  //    these SCCs. Thus we can merge all of the SCCs in this closed range into
-  //    a single SCC.
-  //
-  // This process has various important properties:
-  // - Only mutates the SCCs when adding the edge actually changes the SCC
-  //   structure.
-  // - Never mutates SCCs which are unaffected by the change.
-  // - Updates the postorder sequence to correctly satisfy the postorder
-  //   constraint after the edge is inserted.
-  // - Only reorders SCCs in the closed postorder sequence from the source to
-  //   the target, so easy to bound how much has changed even in the ordering.
-  // - Big-O is the number of edges in the closed postorder range of SCCs from
-  //   source to target.
-
-  assert(SourceIdx < TargetIdx && "Cannot have equal indices here!");
-  SmallPtrSet<SCC *, 4> ConnectedSet;
-
   // Compute the SCCs which (transitively) reach the source.
-  ConnectedSet.insert(&SourceSCC);
-  auto IsConnected = [&](SCC &C) {
-    for (Node &N : C)
-      for (Edge &E : N.calls()) {
-        assert(E.getNode() && "Must have formed a node within an SCC!");
-        if (ConnectedSet.count(G->lookupSCC(*E.getNode())))
-          return true;
-      }
-
-    return false;
-  };
-
-  for (SCC *C :
-       make_range(SCCs.begin() + SourceIdx + 1, SCCs.begin() + TargetIdx + 1))
-    if (IsConnected(*C))
-      ConnectedSet.insert(C);
-
-  // Partition the SCCs in this part of the port-order sequence so only SCCs
-  // connecting to the source remain between it and the target. This is
-  // a benign partition as it preserves postorder.
-  auto SourceI = std::stable_partition(
-      SCCs.begin() + SourceIdx, SCCs.begin() + TargetIdx + 1,
-      [&ConnectedSet](SCC *C) { return !ConnectedSet.count(C); });
-  for (int i = SourceIdx, e = TargetIdx + 1; i < e; ++i)
-    SCCIndices.find(SCCs[i])->second = i;
-
-  // If the target doesn't connect to the source, then we've corrected the
-  // post-order and there are no cycles formed.
-  if (!ConnectedSet.count(&TargetSCC)) {
-    assert(SourceI > (SCCs.begin() + SourceIdx) &&
-           "Must have moved the source to fix the post-order.");
-    assert(*std::prev(SourceI) == &TargetSCC &&
-           "Last SCC to move should have bene the target.");
-    SourceN.setEdgeKind(TargetN.getFunction(), Edge::Call);
+  auto ComputeSourceConnectedSet = [&](SmallPtrSetImpl<SCC *> &ConnectedSet) {
 #ifndef NDEBUG
+    // Check that the RefSCC is still valid before computing this as the
+    // results will be nonsensical of we've broken its invariants.
     verify();
 #endif
-    return DeletedSCCs;
-  }
+    ConnectedSet.insert(&SourceSCC);
+    auto IsConnected = [&](SCC &C) {
+      for (Node &N : C)
+        for (Edge &E : N.calls()) {
+          assert(E.getNode() && "Must have formed a node within an SCC!");
+          if (ConnectedSet.count(G->lookupSCC(*E.getNode())))
+            return true;
+        }
 
-  assert(SCCs[TargetIdx] == &TargetSCC &&
-         "Should not have moved target if connected!");
-  SourceIdx = SourceI - SCCs.begin();
+      return false;
+    };
 
+    for (SCC *C :
+         make_range(SCCs.begin() + SourceIdx + 1, SCCs.begin() + TargetIdx + 1))
+      if (IsConnected(*C))
+        ConnectedSet.insert(C);
+  };
+
+  // Use a normal worklist to find which SCCs the target connects to. We still
+  // bound the search based on the range in the postorder list we care about,
+  // but because this is forward connectivity we just "recurse" through the
+  // edges.
+  auto ComputeTargetConnectedSet = [&](SmallPtrSetImpl<SCC *> &ConnectedSet) {
 #ifndef NDEBUG
-  // Check that the RefSCC is still valid.
-  verify();
+    // Check that the RefSCC is still valid before computing this as the
+    // results will be nonsensical of we've broken its invariants.
+    verify();
 #endif
-
-  // See whether there are any remaining intervening SCCs between the source
-  // and target. If so we need to make sure they all are reachable form the
-  // target.
-  if (SourceIdx + 1 < TargetIdx) {
-    // Use a normal worklist to find which SCCs the target connects to. We still
-    // bound the search based on the range in the postorder list we care about,
-    // but because this is forward connectivity we just "recurse" through the
-    // edges.
-    ConnectedSet.clear();
     ConnectedSet.insert(&TargetSCC);
     SmallVector<SCC *, 4> Worklist;
     Worklist.push_back(&TargetSCC);
@@ -411,35 +480,39 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) {
             Worklist.push_back(&EdgeC);
         }
     } while (!Worklist.empty());
+  };
 
-    // Partition SCCs so that only SCCs reached from the target remain between
-    // the source and the target. This preserves postorder.
-    auto TargetI = std::stable_partition(
-        SCCs.begin() + SourceIdx + 1, SCCs.begin() + TargetIdx + 1,
-        [&ConnectedSet](SCC *C) { return ConnectedSet.count(C); });
-    for (int i = SourceIdx + 1, e = TargetIdx + 1; i < e; ++i)
-      SCCIndices.find(SCCs[i])->second = i;
-    TargetIdx = std::prev(TargetI) - SCCs.begin();
-    assert(SCCs[TargetIdx] == &TargetSCC &&
-           "Should always end with the target!");
-
+  // Use a generic helper to update the postorder sequence of SCCs and return
+  // a range of any SCCs connected into a cycle by inserting this edge. This
+  // routine will also take care of updating the indices into the postorder
+  // sequence.
+  auto MergeRange = updatePostorderSequenceForEdgeInsertion(
+      SourceSCC, TargetSCC, SCCs, SCCIndices, ComputeSourceConnectedSet,
+      ComputeTargetConnectedSet);
+
+  // If the merge range is empty, then adding the edge didn't actually form any
+  // new cycles. We're done.
+  if (MergeRange.begin() == MergeRange.end()) {
+    // Now that the SCC structure is finalized, flip the kind to call.
+    SourceN.setEdgeKind(TargetN.getFunction(), Edge::Call);
 #ifndef NDEBUG
-    // Check that the RefSCC is still valid.
     verify();
 #endif
+    return DeletedSCCs;
   }
 
-  // At this point, we know that connecting source to target forms a cycle
-  // because target connects back to source, and we know that all of the SCCs
-  // between the source and target in the postorder sequence participate in that
-  // cycle. This means that we need to merge all of these SCCs into a single
+#ifndef NDEBUG
+  // Before merging, check that the RefSCC remains valid after all the
+  // postorder updates.
+  verify();
+#endif
+
+  // Otherwise we need to merge all of the SCCs in the cycle into a single
   // result SCC.
   //
   // NB: We merge into the target because all of these functions were already
   // reachable from the target, meaning any SCC-wide properties deduced about it
   // other than the set of functions within it will not have changed.
-  auto MergeRange =
-      make_range(SCCs.begin() + SourceIdx, SCCs.begin() + TargetIdx);
   for (SCC *C : MergeRange) {
     assert(C != &TargetSCC &&
            "We merge *into* the target and shouldn't process it here!");
-- 
2.7.4