JIT: Enable edge based profiles for all scenarios (#80481)

author Andy Ayers <andya@microsoft.com>

Fri, 13 Jan 2023 16:02:14 +0000 (08:02 -0800)

committer GitHub <noreply@github.com>

Fri, 13 Jan 2023 16:02:14 +0000 (08:02 -0800)
author Andy Ayers <andya@microsoft.com>
Fri, 13 Jan 2023 16:02:14 +0000 (08:02 -0800)
committer GitHub <noreply@github.com>
Fri, 13 Jan 2023 16:02:14 +0000 (08:02 -0800)
diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp

index f86e1923f89dc307743534c71524da75b86cb54e..6c8445b6fee7440f899ef20b3747ec5bd3d914dd 100644 (file)
--- a/src/coreclr/jit/compiler.cpp
+++ b/src/coreclr/jit/compiler.cpp
@@ -4389,6 +4389,13 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
          DoPhase(this, PHASE_IBCPREP, &Compiler::fgPrepareToInstrumentMethod);
      }
  
+    // If we are doing OSR, update flow to initially reach the appropriate IL offset.
+    //
+    if (opts.IsOSR())
+    {
+        fgFixEntryFlowForOSR();
+    }
+
      // Enable the post-phase checks that use internal logic to decide when checking makes sense.
      //
      activePhaseChecks = PhaseChecks::CHECK_EH | PhaseChecks::CHECK_LOOPS | PhaseChecks::CHECK_UNIQUE |
@@ -4398,10 +4405,6 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
      //
      DoPhase(this, PHASE_IMPORTATION, &Compiler::fgImport);
  
-    // Expand any patchpoints
-    //
-    DoPhase(this, PHASE_PATCHPOINTS, &Compiler::fgTransformPatchpoints);
-
      // If instrumenting, add block and class probes.
      //
      if (compileFlags->IsSet(JitFlags::JIT_FLAG_BBINSTR))
@@ -4409,6 +4412,10 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
          DoPhase(this, PHASE_IBCINSTR, &Compiler::fgInstrumentMethod);
      }
  
+    // Expand any patchpoints
+    //
+    DoPhase(this, PHASE_PATCHPOINTS, &Compiler::fgTransformPatchpoints);
+
      // Transform indirect calls that require control flow expansion.
      //
      DoPhase(this, PHASE_INDXCALL, &Compiler::fgTransformIndirectCalls);
@@ -6594,13 +6601,6 @@ int Compiler::compCompileHelper(CORINFO_MODULE_HANDLE classPtr,
      {
          // We are jitting the root method, or inlining.
          fgFindBasicBlocks();
-
-        // If we are doing OSR, update flow to initially reach the appropriate IL offset.
-        //
-        if (opts.IsOSR())
-        {
-            fgFixEntryFlowForOSR();
-        }
      }
  
      // If we're inlining and the candidate is bad, bail out.
diff --git a/src/coreclr/jit/fgbasic.cpp b/src/coreclr/jit/fgbasic.cpp

index dc27ecc47cf52e50df3bd70fe0a046b5f4ae13c9..f5b2ced1b714cab68124c63ac4d5d43a72385ece 100644 (file)
--- a/src/coreclr/jit/fgbasic.cpp
+++ b/src/coreclr/jit/fgbasic.cpp
@@ -3621,6 +3621,11 @@ void Compiler::fgFixEntryFlowForOSR()
      fgFirstBB->bbJumpDest = osrEntry;
      fgAddRefPred(osrEntry, fgFirstBB);
  
+    // Give the method entry (which will be a scratch BB)
+    // the same weight as the OSR Entry.
+    //
+    fgFirstBB->inheritWeight(fgOSREntryBB);
+
      JITDUMP("OSR: redirecting flow at entry from entry " FMT_BB " to OSR entry " FMT_BB " for the importer\n",
              fgFirstBB->bbNum, osrEntry->bbNum);
  }
diff --git a/src/coreclr/jit/fgprofile.cpp b/src/coreclr/jit/fgprofile.cpp

index 7f095d9ebad2a5c63ad7c69fb44dabf7e76c0ce2..0d928a1da4c1392c7d1955f41bbf0926e58a9f1c 100644 (file)
--- a/src/coreclr/jit/fgprofile.cpp
+++ b/src/coreclr/jit/fgprofile.cpp
@@ -361,6 +361,7 @@ public:
  class BlockCountInstrumentor : public Instrumentor
  {
  private:
+    void        RelocateProbes();
      BasicBlock* m_entryBlock;
  
  public:
@@ -391,173 +392,195 @@ void BlockCountInstrumentor::Prepare(bool preImport)
          return;
      }
  
-    // If this is an OSR method, look for potential tail calls in
-    // blocks that are not BBJ_RETURN.
-    //
-    // If we see any, we need to adjust our instrumentation pattern.
+    RelocateProbes();
+
+#ifdef DEBUG
+    // Set schema index to invalid value
      //
-    if (m_comp->opts.IsInstrumentedOptimized() && ((m_comp->optMethodFlags & OMF_HAS_TAILCALL_SUCCESSOR) != 0))
+    for (BasicBlock* const block : m_comp->Blocks())
      {
-        JITDUMP("OSR + PGO + potential tail call --- preparing to relocate block probes\n");
+        block->bbCountSchemaIndex = -1;
+    }
+#endif
+}
  
-        // We should be in a root method compiler instance. OSR + PGO does not
-        // currently try and instrument inlinees.
-        //
-        // Relaxing this will require changes below because inlinee compilers
-        // share the root compiler flow graph (and hence bb epoch), and flow
-        // from inlinee tail calls to returns can be more complex.
+//------------------------------------------------------------------------
+// BlockCountInstrumentor::RelocateProbes: relocate any probes that
+//   would appear in post-tail call blocks.
+//
+// Notes:
+//   Conveys relocation information by modifying the cheap pred list.
+//
+//   Actual relocation happens during Instrument, keying off of the
+//   BBF_TAILCALL_SUCCESSOR flag and the (modified) cheap pred list.
+//
+void BlockCountInstrumentor::RelocateProbes()
+{
+    // We only see such blocks when optimizing. They are flagged by the importer.
+    //
+    if (!m_comp->opts.IsInstrumentedOptimized() || ((m_comp->optMethodFlags & OMF_HAS_TAILCALL_SUCCESSOR) == 0))
+    {
+        // No problematic blocks to worry about.
          //
-        assert(!m_comp->compIsForInlining());
+        return;
+    }
  
-        // Build cheap preds.
-        //
-        m_comp->fgComputeCheapPreds();
-        m_comp->EnsureBasicBlockEpoch();
+    JITDUMP("Optimized + instrumented + potential tail calls --- preparing to relocate edge probes\n");
  
-        // Keep track of return blocks needing special treatment.
-        // We also need to track of duplicate preds.
-        //
-        JitExpandArrayStack<BasicBlock*> specialReturnBlocks(m_comp->getAllocator(CMK_Pgo));
-        BlockSet                         predsSeen = BlockSetOps::MakeEmpty(m_comp);
+    // We should be in a root method compiler instance. We currently do not instrument inlinees.
+    //
+    // Relaxing this will require changes below because inlinee compilers
+    // share the root compiler flow graph (and hence bb epoch), and flow
+    // from inlinee tail calls to returns can be more complex.
+    //
+    assert(!m_comp->compIsForInlining());
  
-        // Walk blocks looking for BBJ_RETURNs that are successors of potential tail calls.
-        //
-        // If any such has a conditional pred, we will need to reroute flow from those preds
-        // via an intermediary block. That block will subsequently hold the relocated block
-        // probe for the return for those preds.
-        //
-        // Scrub the cheap pred list for these blocks so that each pred appears at most once.
+    // Build cheap preds.
+    //
+    m_comp->fgComputeCheapPreds();
+    m_comp->EnsureBasicBlockEpoch();
+
+    // Keep track of return blocks needing special treatment.
+    // We also need to track of duplicate preds.
+    //
+    JitExpandArrayStack<BasicBlock*> specialReturnBlocks(m_comp->getAllocator(CMK_Pgo));
+    BlockSet                         predsSeen = BlockSetOps::MakeEmpty(m_comp);
+
+    // Walk blocks looking for BBJ_RETURNs that are successors of potential tail calls.
+    //
+    // If any such has a conditional pred, we will need to reroute flow from those preds
+    // via an intermediary block. That block will subsequently hold the relocated block
+    // probe for the return for those preds.
+    //
+    // Scrub the cheap pred list for these blocks so that each pred appears at most once.
+    //
+    for (BasicBlock* const block : m_comp->Blocks())
+    {
+        // Ignore blocks that we won't process.
          //
-        for (BasicBlock* const block : m_comp->Blocks())
+        if (!ShouldProcess(block))
          {
-            // Ignore blocks that we won't process.
-            //
-            if (!ShouldProcess(block))
-            {
-                continue;
-            }
+            continue;
+        }
  
-            if ((block->bbFlags & BBF_TAILCALL_SUCCESSOR) != 0)
+        if ((block->bbFlags & BBF_TAILCALL_SUCCESSOR) != 0)
+        {
+            JITDUMP("Return " FMT_BB " is successor of possible tail call\n", block->bbNum);
+            assert(block->bbJumpKind == BBJ_RETURN);
+            bool pushed = false;
+            BlockSetOps::ClearD(m_comp, predsSeen);
+            for (BasicBlockList* predEdge = block->bbCheapPreds; predEdge != nullptr; predEdge = predEdge->next)
              {
-                JITDUMP("Return " FMT_BB " is successor of possible tail call\n", block->bbNum);
-                assert(block->bbJumpKind == BBJ_RETURN);
-                bool pushed = false;
-                BlockSetOps::ClearD(m_comp, predsSeen);
-                for (BasicBlockList* predEdge = block->bbCheapPreds; predEdge != nullptr; predEdge = predEdge->next)
+                BasicBlock* const pred = predEdge->block;
+
+                // If pred is not to be processed, ignore it and scrub from the pred list.
+                //
+                if (!ShouldProcess(pred))
                  {
-                    BasicBlock* const pred = predEdge->block;
+                    JITDUMP(FMT_BB " -> " FMT_BB " is dead edge\n", pred->bbNum, block->bbNum);
+                    predEdge->block = nullptr;
+                    continue;
+                }
+
+                BasicBlock* const succ = pred->GetUniqueSucc();
  
-                    // If pred is not to be processed, ignore it and scrub from the pred list.
+                if (succ == nullptr)
+                {
+                    // Flow from pred -> block is conditional, and will require updating.
                      //
-                    if (!ShouldProcess(pred))
+                    JITDUMP(FMT_BB " -> " FMT_BB " is critical edge\n", pred->bbNum, block->bbNum);
+                    if (!pushed)
                      {
-                        JITDUMP(FMT_BB " -> " FMT_BB " is dead edge\n", pred->bbNum, block->bbNum);
-                        predEdge->block = nullptr;
-                        continue;
+                        specialReturnBlocks.Push(block);
+                        pushed = true;
                      }
  
-                    BasicBlock* const succ = pred->GetUniqueSucc();
-
-                    if (succ == nullptr)
+                    // Have we seen this pred before?
+                    //
+                    if (BlockSetOps::IsMember(m_comp, predsSeen, pred->bbNum))
                      {
-                        // Flow from pred -> block is conditional, and will require updating.
-                        //
-                        JITDUMP(FMT_BB " -> " FMT_BB " is critical edge\n", pred->bbNum, block->bbNum);
-                        if (!pushed)
-                        {
-                            specialReturnBlocks.Push(block);
-                            pushed = true;
-                        }
-
-                        // Have we seen this pred before?
+                        // Yes, null out the duplicate pred list entry.
                          //
-                        if (BlockSetOps::IsMember(m_comp, predsSeen, pred->bbNum))
-                        {
-                            // Yes, null out the duplicate pred list entry.
-                            //
-                            predEdge->block = nullptr;
-                        }
+                        predEdge->block = nullptr;
                      }
-                    else
-                    {
-                        // We should only ever see one reference to this pred.
-                        //
-                        assert(!BlockSetOps::IsMember(m_comp, predsSeen, pred->bbNum));
+                }
+                else
+                {
+                    // We should only ever see one reference to this pred.
+                    //
+                    assert(!BlockSetOps::IsMember(m_comp, predsSeen, pred->bbNum));
  
-                        // Ensure flow from non-critical preds is BBJ_ALWAYS as we
-                        // may add a new block right before block.
-                        //
-                        if (pred->bbJumpKind == BBJ_NONE)
-                        {
-                            pred->bbJumpKind = BBJ_ALWAYS;
-                            pred->bbJumpDest = block;
-                        }
-                        assert(pred->bbJumpKind == BBJ_ALWAYS);
+                    // Ensure flow from non-critical preds is BBJ_ALWAYS as we
+                    // may add a new block right before block.
+                    //
+                    if (pred->bbJumpKind == BBJ_NONE)
+                    {
+                        pred->bbJumpKind = BBJ_ALWAYS;
+                        pred->bbJumpDest = block;
                      }
-
-                    BlockSetOps::AddElemD(m_comp, predsSeen, pred->bbNum);
+                    assert(pred->bbJumpKind == BBJ_ALWAYS);
                  }
+
+                BlockSetOps::AddElemD(m_comp, predsSeen, pred->bbNum);
              }
          }
+    }
  
-        // Now process each special return block.
-        // Create an intermediary that falls through to the return.
-        // Update any critical edges to target the intermediary.
-        //
-        // Note we could also route any non-tail-call pred via the
-        // intermedary. Doing so would cut down on probe duplication.
-        //
-        if (specialReturnBlocks.Size() > 0)
-        {
-            SetModifiedFlow();
-        }
+    // Did we find any blocks with probes needing relocation?
+    //
+    if (specialReturnBlocks.Size() == 0)
+    {
+        JITDUMP("No probes need relocating\n");
+        return;
+    }
  
-        while (specialReturnBlocks.Size() > 0)
-        {
-            bool              first        = true;
-            BasicBlock* const block        = specialReturnBlocks.Pop();
-            BasicBlock* const intermediary = m_comp->fgNewBBbefore(BBJ_NONE, block, /* extendRegion*/ true);
+    JITDUMP("%u probes need relocating\n", specialReturnBlocks.Size());
  
-            intermediary->bbFlags |= BBF_IMPORTED;
-            intermediary->inheritWeight(block);
+    // Now process each special return block.
+    // Create an intermediary that falls through to the return.
+    // Update any critical edges to target the intermediary.
+    //
+    // Note we could also route any non-tail-call pred via the
+    // intermedary. Doing so would cut down on probe duplication.
+    //
+    SetModifiedFlow();
  
-            for (BasicBlockList* predEdge = block->bbCheapPreds; predEdge != nullptr; predEdge = predEdge->next)
+    while (specialReturnBlocks.Size() > 0)
+    {
+        bool              first        = true;
+        BasicBlock* const block        = specialReturnBlocks.Pop();
+        BasicBlock* const intermediary = m_comp->fgNewBBbefore(BBJ_NONE, block, /* extendRegion*/ true);
+
+        intermediary->bbFlags |= BBF_IMPORTED;
+        intermediary->inheritWeight(block);
+
+        for (BasicBlockList* predEdge = block->bbCheapPreds; predEdge != nullptr; predEdge = predEdge->next)
+        {
+            BasicBlock* const pred = predEdge->block;
+
+            if (pred != nullptr)
              {
-                BasicBlock* const pred = predEdge->block;
+                BasicBlock* const succ = pred->GetUniqueSucc();
  
-                if (pred != nullptr)
+                if (succ == nullptr)
                  {
-                    BasicBlock* const succ = pred->GetUniqueSucc();
-
-                    if (succ == nullptr)
-                    {
-                        // This will update all branch targets from pred.
-                        //
-                        m_comp->fgReplaceJumpTarget(pred, intermediary, block);
+                    // This will update all branch targets from pred.
+                    //
+                    m_comp->fgReplaceJumpTarget(pred, intermediary, block);
  
-                        // Patch the pred list. Note we only need one pred list
-                        // entry pointing at intermediary.
-                        //
-                        predEdge->block = first ? intermediary : nullptr;
-                        first           = false;
-                    }
-                    else
-                    {
-                        assert(pred->bbJumpKind == BBJ_ALWAYS);
-                    }
+                    // Patch the pred list. Note we only need one pred list
+                    // entry pointing at intermediary.
+                    //
+                    predEdge->block = first ? intermediary : nullptr;
+                    first           = false;
+                }
+                else
+                {
+                    assert(pred->bbJumpKind == BBJ_ALWAYS);
                  }
              }
          }
      }
-
-#ifdef DEBUG
-    // Set schema index to invalid value
-    //
-    for (BasicBlock* const block : m_comp->Blocks())
-    {
-        block->bbCountSchemaIndex = -1;
-    }
-#endif
  }
  
  //------------------------------------------------------------------------
@@ -761,12 +784,19 @@ public:
      // for non-tree edges whether the edge postdominates
      // the source, dominates the target, or is a critical edge.
      //
+    // Later we may need to relocate or duplicate probes. We
+    // overload this enum to also represent those cases.
+    //
      enum class EdgeKind
      {
          Unknown,
          PostdominatesSource,
          DominatesTarget,
-        CriticalEdge
+        CriticalEdge,
+        Deleted,
+        Relocated,
+        Leader,
+        Duplicate
      };
  
      virtual void Badcode()                     = 0;
@@ -836,6 +866,12 @@ void Compiler::WalkSpanningTree(SpanningTreeVisitor* visitor)
              BasicBlock* hndBegBB = HBtab->ebdHndBeg;
              stack.Push(hndBegBB);
              BlockSetOps::AddElemD(comp, marked, hndBegBB->bbNum);
+            if (HBtab->HasFilter())
+            {
+                BasicBlock* filterBB = HBtab->ebdFilter;
+                stack.Push(filterBB);
+                BlockSetOps::AddElemD(comp, marked, filterBB->bbNum);
+            }
          }
      }
  
@@ -903,12 +939,13 @@ void Compiler::WalkSpanningTree(SpanningTreeVisitor* visitor)
              {
                  // See if we're leaving an EH handler region.
                  //
-                bool           isInTry     = false;
-                unsigned const regionIndex = ehGetMostNestedRegionIndex(block, &isInTry);
+                bool            isInTry     = false;
+                unsigned const  regionIndex = ehGetMostNestedRegionIndex(block, &isInTry);
+                EHblkDsc* const dsc         = ehGetBlockHndDsc(block);
  
-                if (isInTry)
+                if (isInTry || (dsc->ebdHandlerType == EH_HANDLER_CATCH))
                  {
-                    // No, we're leaving a try or catch, not a handler.
+                    // We're leaving a try or catch, not a handler.
                      // Treat this as a normal edge.
                      //
                      BasicBlock* const target = block->bbJumpDest;
@@ -945,7 +982,6 @@ void Compiler::WalkSpanningTree(SpanningTreeVisitor* visitor)
                  {
                      // Pseudo-edge back to handler entry.
                      //
-                    EHblkDsc* const   dsc    = ehGetBlockHndDsc(block);
                      BasicBlock* const target = dsc->ebdHndBeg;
                      assert(BlockSetOps::IsMember(comp, marked, target->bbNum));
                      visitor->VisitNonTreeEdge(block, target, SpanningTreeVisitor::EdgeKind::PostdominatesSource);
@@ -1149,20 +1185,27 @@ private:
      //
      struct Probe
      {
+        BasicBlock* source;
          BasicBlock* target;
          Probe*      next;
          int         schemaIndex;
          EdgeKind    kind;
+        Probe*      leader;
      };
  
-    Probe* NewProbe(BasicBlock* source, BasicBlock* target)
+    // Add probe to block, representing edge from source to target.
+    //
+    Probe* NewProbe(BasicBlock* block, BasicBlock* source, BasicBlock* target)
      {
-        Probe* p                  = new (m_comp, CMK_Pgo) Probe();
-        p->target                 = target;
-        p->kind                   = EdgeKind::Unknown;
-        p->schemaIndex            = -1;
-        p->next                   = (Probe*)source->bbSparseProbeList;
-        source->bbSparseProbeList = p;
+        Probe* p       = new (m_comp, CMK_Pgo) Probe();
+        p->source      = source;
+        p->target      = target;
+        p->kind        = EdgeKind::Unknown;
+        p->schemaIndex = -1;
+        p->next        = (Probe*)block->bbSparseProbeList;
+        p->leader      = nullptr;
+
+        block->bbSparseProbeList = p;
          m_probeCount++;
  
          return p;
@@ -1171,7 +1214,7 @@ private:
      void NewSourceProbe(BasicBlock* source, BasicBlock* target)
      {
          JITDUMP("[%u] New probe for " FMT_BB " -> " FMT_BB " [source]\n", m_probeCount, source->bbNum, target->bbNum);
-        Probe* p = NewProbe(source, target);
+        Probe* p = NewProbe(source, source, target);
          p->kind  = EdgeKind::PostdominatesSource;
      }
  
@@ -1179,7 +1222,7 @@ private:
      {
          JITDUMP("[%u] New probe for " FMT_BB " -> " FMT_BB " [target]\n", m_probeCount, source->bbNum, target->bbNum);
  
-        Probe* p = NewProbe(source, target);
+        Probe* p = NewProbe(source, source, target);
          p->kind  = EdgeKind::DominatesTarget;
      }
  
@@ -1187,12 +1230,52 @@ private:
      {
          JITDUMP("[%u] New probe for " FMT_BB " -> " FMT_BB " [edge]\n", m_probeCount, source->bbNum, target->bbNum);
  
-        Probe* p = NewProbe(source, target);
+        Probe* p = NewProbe(source, source, target);
          p->kind  = EdgeKind::CriticalEdge;
  
          m_edgeProbeCount++;
      }
  
+    void NewRelocatedProbe(BasicBlock* block, BasicBlock* source, BasicBlock* target, Probe** pLeader = nullptr)
+    {
+        Probe*      p   = NewProbe(block, source, target);
+        const char* msg = "unknown";
+
+        // Are we starting or adding to a duplicate group?
+        //
+        if (pLeader != nullptr)
+        {
+            Probe* l = *pLeader;
+            if (l == nullptr)
+            {
+                // This probe will be the leader of the group
+                //
+                *pLeader = p;
+                p->kind  = EdgeKind::Leader;
+                msg      = "leader";
+            }
+            else
+            {
+                // This probe is a duplicate
+                //
+                p->leader = l;
+                p->kind   = EdgeKind::Duplicate;
+                msg       = "duplicate";
+            }
+        }
+        else
+        {
+            p->kind = EdgeKind::Relocated;
+            msg     = "relocated";
+        }
+
+        JITDUMP("New %s probe for " FMT_BB " -> " FMT_BB " [reloc to " FMT_BB " ]\n", msg, source->bbNum, target->bbNum,
+                block->bbNum);
+    }
+
+    void SplitCriticalEdges();
+    void RelocateProbes();
+
      unsigned m_blockCount;
      unsigned m_probeCount;
      unsigned m_edgeProbeCount;
@@ -1245,14 +1328,14 @@ public:
                  NewEdgeProbe(source, target);
                  break;
              default:
-                assert(!"unexpected kind");
+                assert(!"unexpected edge kind");
                  break;
          }
      }
  };
  
  //------------------------------------------------------------------------
-// EfficientEdgeCountInstrumentor:Prepare: analyze the flow graph to
+// EfficientEdgeCountInstrumentor::Prepare: analyze the flow graph to
  //   determine which edges should be instrumented.
  //
  // Arguments:
@@ -1277,23 +1360,400 @@ public:
  //
  void EfficientEdgeCountInstrumentor::Prepare(bool preImport)
  {
-    if (!preImport)
+    if (preImport)
+    {
+        JITDUMP("\nEfficientEdgeCountInstrumentor: preparing for instrumentation\n");
+        m_comp->WalkSpanningTree(this);
+        JITDUMP("%u blocks, %u probes (%u on critical edges)\n", m_blockCount, m_probeCount, m_edgeProbeCount);
+        return;
+    }
+
+    // If we saw badcode in the preimport prepare, we would expect
+    // compilation to blow up in the importer. So if we end up back
+    // here postimport with badcode set, something is wrong.
+    //
+    assert(!m_badcode);
+
+    // Walk the probe list splitting critical edges as required.
+    //
+    SplitCriticalEdges();
+
+    // If this is an optimized method, look for potential tail calls in
+    // probe blocks that are not BBJ_RETURN.
+    //
+    // If we see any, we need to adjust our instrumentation pattern.
+    //
+    RelocateProbes();
+}
+
+//------------------------------------------------------------------------
+// EfficientEdgeCountInstrumentor::SplitCriticalEdges: add blocks for
+//   probes along critical edges and adjust affeted probes and probe lists.
+//
+//
+// Notes:
+//   Transforms CriticalEdge probes to Deleted and/or Relocated probes.
+//
+void EfficientEdgeCountInstrumentor::SplitCriticalEdges()
+{
+    if (m_edgeProbeCount == 0)
      {
-        // If we saw badcode in the preimport prepare, we would expect
-        // compilation to blow up in the importer. So if we end up back
-        // here postimport with badcode set, something is wrong.
+        return;
+    }
+
+    JITDUMP("\nEfficientEdgeCountInstrumentor: splitting up to %u critical edges\n", m_edgeProbeCount);
+    unsigned edgesSplit   = 0;
+    unsigned edgesIgnored = 0;
+
+    for (BasicBlock* const block : m_comp->Blocks())
+    {
+        if (!ShouldProcess(block))
+        {
+
+#ifdef DEBUG
+            // Account for probes originating from un-imported blocks.
+            //
+            for (Probe* probe = (Probe*)block->bbSparseProbeList; probe != nullptr; probe = probe->next)
+            {
+                if (probe->kind == EdgeKind::CriticalEdge)
+                {
+                    edgesIgnored++;
+                }
+            }
+#endif
+
+            continue;
+        }
+
+        for (Probe* probe = (Probe*)block->bbSparseProbeList; probe != nullptr; probe = probe->next)
+        {
+            // Figure out what block the probe will appear in.
+            //
+            BasicBlock* const source            = probe->source;
+            BasicBlock* const target            = probe->target;
+            BasicBlock*       instrumentedBlock = nullptr;
+
+            switch (probe->kind)
+            {
+                case EdgeKind::PostdominatesSource:
+                    instrumentedBlock = source;
+                    break;
+                case EdgeKind::DominatesTarget:
+                    instrumentedBlock = target;
+                    break;
+                case EdgeKind::Relocated:
+                    instrumentedBlock = block;
+                    break;
+                case EdgeKind::CriticalEdge:
+                {
+                    assert(block == source);
+
+                    // See if the edge still exists.
+                    //
+                    bool found = false;
+                    for (BasicBlock* const succ : block->Succs(m_comp->impInlineRoot()))
+                    {
+                        if (target == succ)
+                        {
+                            found = true;
+                            break;
+                        }
+                    }
+
+                    if (found)
+                    {
+                        // Importer folding may have changed the block jump kind
+                        // to BBJ_NONE. If so, warp it back to BBJ_ALWAYS.
+                        //
+                        if (block->bbJumpKind == BBJ_NONE)
+                        {
+                            block->bbJumpKind = BBJ_ALWAYS;
+                            block->bbJumpDest = target;
+                        }
+
+                        instrumentedBlock = m_comp->fgSplitEdge(block, target);
+                        instrumentedBlock->bbFlags |= BBF_IMPORTED;
+                        edgesSplit++;
+
+                        // Add in the relocated probe
+                        //
+                        NewRelocatedProbe(instrumentedBlock, source, target);
+                    }
+                    else
+                    {
+                        JITDUMP("Could not find " FMT_BB " -> " FMT_BB " edge to instrument\n", block->bbNum,
+                                target->bbNum);
+
+                        // If we're optimizing, assume this edge got folded away
+                        //
+                        if (m_comp->opts.IsInstrumentedOptimized())
+                        {
+                            JITDUMP(" -- assuming this is ok\n");
+
+                            // Placate the asserts below
+                            //
+                            instrumentedBlock = source;
+                            edgesIgnored++;
+                        }
+                        else
+                        {
+                            assert(found);
+                        }
+                    }
+
+                    // Delete the critical edge probe
+                    //
+                    probe->kind = EdgeKind::Deleted;
+                }
+                break;
+
+                default:
+                    assert(!"unexpected edge kind");
+            }
+
+            assert(instrumentedBlock != nullptr);
+        }
+    }
+
+    // We should have found all edges needing splitting.
+    //
+    assert((edgesSplit + edgesIgnored) == m_edgeProbeCount);
+
+    if (edgesSplit > 0)
+    {
+        SetModifiedFlow();
+    }
+}
+
+//------------------------------------------------------------------------
+// EfficientEdgeCountInstrumentor::RelocateProbes: relocate any probes that
+//   would appear in post-tail call blocks.
+//
+// Notes:
+//   May build and modify the cheap pred lists.
+//   May create Leader and Duplicate probes.
+//
+void EfficientEdgeCountInstrumentor::RelocateProbes()
+{
+    // We only see such blocks when optimizing. They are flagged by the importer.
+    //
+    if (!m_comp->opts.IsInstrumentedOptimized() || ((m_comp->optMethodFlags & OMF_HAS_TAILCALL_SUCCESSOR) == 0))
+    {
+        // No problematic blocks to worry about.
          //
-        assert(!m_badcode);
          return;
      }
  
-    JITDUMP("\nEfficientEdgeCountInstrumentor: preparing for instrumentation\n");
-    m_comp->WalkSpanningTree(this);
-    JITDUMP("%u blocks, %u probes (%u on critical edges)\n", m_blockCount, m_probeCount, m_edgeProbeCount);
+    JITDUMP("Optimized + instrumented + potential tail calls --- preparing to relocate edge probes\n");
+
+    // We should be in a root method compiler instance. We currently do not instrument inlinees.
+    //
+    // Relaxing this will require changes below because inlinee compilers
+    // share the root compiler flow graph (and hence bb epoch), and flow
+    // from inlinee tail calls to returns can be more complex.
+    //
+    assert(!m_comp->compIsForInlining());
+
+    // Build cheap preds.
+    //
+    m_comp->fgComputeCheapPreds();
+    m_comp->EnsureBasicBlockEpoch();
+
+    // Keep track of return blocks needing special treatment.
+    // We also need to track of duplicate preds.
+    //
+    JitExpandArrayStack<BasicBlock*> specialReturnBlocks(m_comp->getAllocator(CMK_Pgo));
+    BlockSet                         retsPushed = BlockSetOps::MakeEmpty(m_comp);
+    BlockSet                         predsSeen  = BlockSetOps::MakeEmpty(m_comp);
+
+    // Walk probe list looking for probes that would appear in BBJ_RETURNs
+    // that are successors of potential tail calls.
+    //
+    // If any such has a conditional pred, we will need to reroute flow from those preds
+    // via an intermediary block. That block will subsequently hold the relocated
+    // probe for the return for those preds.
+    //
+    // Scrub the cheap pred list for these blocks so that each pred appears at most once.
+    //
+    for (BasicBlock* const block : m_comp->Blocks())
+    {
+        if (!ShouldProcess(block))
+        {
+            continue;
+        }
+
+        for (Probe* probe = (Probe*)block->bbSparseProbeList; probe != nullptr; probe = probe->next)
+        {
+            if (probe->kind == EdgeKind::Deleted)
+            {
+                continue;
+            }
+
+            // Figure out what block the probe will appear in.
+            // We do not expect to see any critical edges as we should have split them already.
+            //
+            BasicBlock* const source            = probe->source;
+            BasicBlock* const target            = probe->target;
+            BasicBlock*       instrumentedBlock = nullptr;
+
+            switch (probe->kind)
+            {
+                case EdgeKind::PostdominatesSource:
+                    instrumentedBlock = source;
+                    break;
+                case EdgeKind::DominatesTarget:
+                    instrumentedBlock = target;
+                    break;
+                case EdgeKind::Relocated:
+                    instrumentedBlock = block;
+                    break;
+                default:
+                    assert(!"unexpected probe kind");
+            }
+
+            assert(instrumentedBlock != nullptr);
+
+            // Nothing to do unless the block we wanted to instrument is a tail call successor.
+            //
+            if ((instrumentedBlock->bbFlags & BBF_TAILCALL_SUCCESSOR) == 0)
+            {
+                continue;
+            }
+
+            JITDUMP("Instrumentation target " FMT_BB " is successor of possible tail call\n", instrumentedBlock->bbNum);
+            assert(instrumentedBlock->bbJumpKind == BBJ_RETURN);
+
+            // We will need to relocate probes in this block. Add to our list if not already there.
+            //
+            if (!BlockSetOps::IsMember(m_comp, retsPushed, instrumentedBlock->bbNum))
+            {
+                specialReturnBlocks.Push(instrumentedBlock);
+                BlockSetOps::AddElemD(m_comp, retsPushed, instrumentedBlock->bbNum);
+            }
+
+            // Figure out which preds we'll relocate things to.
+            //
+            BlockSetOps::ClearD(m_comp, predsSeen);
+
+            for (BasicBlockList* predEdge = instrumentedBlock->bbCheapPreds; predEdge != nullptr;
+                 predEdge                 = predEdge->next)
+            {
+                BasicBlock* const pred = predEdge->block;
+                BasicBlock* const succ = pred->GetUniqueSucc();
+
+                if (succ == nullptr)
+                {
+                    // Flow from pred -> block is conditional, and will require updating.
+                    //
+                    JITDUMP(FMT_BB " -> " FMT_BB " is critical edge\n", pred->bbNum, instrumentedBlock->bbNum);
+
+                    // Have we seen this pred before?
+                    //
+                    if (BlockSetOps::IsMember(m_comp, predsSeen, pred->bbNum))
+                    {
+                        // Yes, null out the duplicate pred list entry.
+                        //
+                        predEdge->block = nullptr;
+                    }
+                }
+                else
+                {
+                    // We should only ever see one reference to this pred.
+                    //
+                    assert(!BlockSetOps::IsMember(m_comp, predsSeen, pred->bbNum));
+
+                    // Ensure flow from non-critical preds is BBJ_ALWAYS as we
+                    // may add a new block right before block.
+                    //
+                    if (pred->bbJumpKind == BBJ_NONE)
+                    {
+                        pred->bbJumpKind = BBJ_ALWAYS;
+                        pred->bbJumpDest = block;
+                    }
+                    assert(pred->bbJumpKind == BBJ_ALWAYS);
+                }
+
+                BlockSetOps::AddElemD(m_comp, predsSeen, pred->bbNum);
+            }
+        }
+    }
+
+    // Did we find any blocks with probes needing relocation?
+    //
+    if (specialReturnBlocks.Size() == 0)
+    {
+        JITDUMP("No probes need relocating\n");
+        return;
+    }
+
+    JITDUMP("%u blocks have probes need relocating\n", specialReturnBlocks.Size());
+
+    while (specialReturnBlocks.Size() > 0)
+    {
+        BasicBlock* const block = specialReturnBlocks.Pop();
+
+        // This block should have just one probe, which we no longer need.
+        //
+        Probe* const probe = (Probe*)block->bbSparseProbeList;
+        assert(probe->next == nullptr);
+        assert(probe->kind == EdgeKind::PostdominatesSource);
+        probe->kind = EdgeKind::Deleted;
+
+        // Any critical edge preds will probe via this intermediary block
+        // that we will create when necessary.
+        //
+        BasicBlock* intermediary = nullptr;
+
+        // The first probe we add will be the leader of a duplicate probe group.
+        //
+        Probe* leader = nullptr;
+
+        for (BasicBlockList* predEdge = block->bbCheapPreds; predEdge != nullptr; predEdge = predEdge->next)
+        {
+            BasicBlock* const pred = predEdge->block;
+
+            if (pred == nullptr)
+            {
+                // Pred edge for a duplicate pred we scrubbed above.
+                //
+                continue;
+            }
+
+            // Does this pred reach along a critical edge,
+            // or is the pred the tail of a callfinally pair?
+            //
+            BasicBlock* const succ = pred->GetUniqueSucc();
+
+            if ((succ == nullptr) || pred->isBBCallAlwaysPairTail())
+            {
+                // Yes. Create intermediary if necessary and add probe there.
+                //
+                if (intermediary == nullptr)
+                {
+                    intermediary = m_comp->fgNewBBbefore(BBJ_NONE, block, /* extendRegion*/ true);
+
+                    intermediary->bbFlags |= BBF_IMPORTED;
+                    intermediary->inheritWeight(block);
+                    NewRelocatedProbe(intermediary, probe->source, probe->target, &leader);
+                    SetModifiedFlow();
+                }
+
+                // Alter flow from pred->block to go via intermediary
+                //
+                m_comp->fgReplaceJumpTarget(pred, intermediary, block);
+            }
+            else
+            {
+                // Put a copy of probe into the pred.
+                //
+                NewRelocatedProbe(pred, probe->source, probe->target, &leader);
+            }
+        }
+    }
  }
  
  //------------------------------------------------------------------------
-// EfficientEdgeCountInstrumentor:BuildSchemaElements: create schema
+// EfficientEdgeCountInstrumentor::BuildSchemaElements: create schema
  //   elements for the probes
  //
  // Arguments:
@@ -1309,19 +1769,25 @@ void EfficientEdgeCountInstrumentor::BuildSchemaElements(BasicBlock* block, Sche
      //
      for (Probe* probe = (Probe*)block->bbSparseProbeList; probe != nullptr; probe = probe->next)
      {
-        // Probe is for the edge from block to target.
+        // Deleted and Duplicate probes don't create new schema elements.
          //
-        BasicBlock* const target = probe->target;
+        if ((probe->kind == EdgeKind::Duplicate) || (probe->kind == EdgeKind::Deleted))
+        {
+            continue;
+        }
  
-        // Remember the schema index for this probe
+        // Probe is for the edge from source to target.
          //
+        BasicBlock* const source = probe->source;
+        BasicBlock* const target = probe->target;
+
          assert(probe->schemaIndex == -1);
          probe->schemaIndex = (int)schema.size();
  
          // Normally we use the offset of the block in the schema, but for certain
          // blocks we do not have any information we can use and need to use internal BB numbers.
          //
-        int32_t sourceKey = EfficientEdgeCountBlockToKey(block);
+        int32_t sourceKey = EfficientEdgeCountBlockToKey(source);
          int32_t targetKey = EfficientEdgeCountBlockToKey(target);
  
          ICorJitInfo::PgoInstrumentationSchema schemaElem;
@@ -1350,22 +1816,29 @@ void EfficientEdgeCountInstrumentor::BuildSchemaElements(BasicBlock* block, Sche
  //
  void EfficientEdgeCountInstrumentor::Instrument(BasicBlock* block, Schema& schema, uint8_t* profileMemory)
  {
-    // Inlinee compilers build their blocks in the root compiler's
-    // graph. So for NumSucc, we use the root compiler instance.
-    //
-    Compiler* const comp = m_comp->impInlineRoot();
-
      // Walk the bbSparseProbeList, adding instrumentation.
      //
      for (Probe* probe = (Probe*)block->bbSparseProbeList; probe != nullptr; probe = probe->next)
      {
-        // Probe is for the edge from block to target.
+        if (probe->kind == EdgeKind::Deleted)
+        {
+            continue;
+        }
+
+        // Probe is for the edge from source to target.
          //
+        BasicBlock* const source = probe->source;
          BasicBlock* const target = probe->target;
  
-        // Retrieve the schema index for this probe
+        // Retrieve the schema index for this probe.
+        // For duplicate probes, get the index from the group leader.
          //
-        const int schemaIndex = probe->schemaIndex;
+        int schemaIndex = probe->schemaIndex;
+
+        if (probe->kind == EdgeKind::Duplicate)
+        {
+            schemaIndex = probe->leader->schemaIndex;
+        }
  
          // Sanity checks.
          //
@@ -1384,32 +1857,20 @@ void EfficientEdgeCountInstrumentor::Instrument(BasicBlock* block, Schema& schem
          switch (probe->kind)
          {
              case EdgeKind::PostdominatesSource:
-                instrumentedBlock = block;
+                instrumentedBlock = source;
                  break;
              case EdgeKind::DominatesTarget:
-                instrumentedBlock = probe->target;
+                instrumentedBlock = target;
+                break;
+            case EdgeKind::Relocated:
+            case EdgeKind::Leader:
+            case EdgeKind::Duplicate:
+                instrumentedBlock = block;
                  break;
              case EdgeKind::CriticalEdge:
-            {
-#ifdef DEBUG
-                // Verify the edge still exists.
-                //
-                bool found = false;
-                for (BasicBlock* const succ : block->Succs(comp))
-                {
-                    if (target == succ)
-                    {
-                        found = true;
-                        break;
-                    }
-                }
-                assert(found);
-#endif
-                instrumentedBlock = m_comp->fgSplitEdge(block, probe->target);
-                instrumentedBlock->bbFlags |= BBF_IMPORTED;
-            }
-            break;
-
+                // Should have been handled in SplitCriticalEdges()
+                assert(!"unexpected probe kind");
+                break;
              default:
                  unreached();
          }
@@ -1433,7 +1894,10 @@ void EfficientEdgeCountInstrumentor::Instrument(BasicBlock* block, Schema& schem
  
          m_comp->fgNewStmtAtBeg(instrumentedBlock, asgNode);
  
-        m_instrCount++;
+        if (probe->kind != EdgeKind::Duplicate)
+        {
+            m_instrCount++;
+        }
      }
  }
  
@@ -1858,53 +2322,10 @@ PhaseStatus Compiler::fgPrepareToInstrumentMethod()
      //
      // * disabled by option
      // * we are prejitting
-    // * we are jitting tier0 methods with patchpoints
-    // * we are jitting an OSR method
-    //
-    // OSR is incompatible with edge profiling. Only portions of the Tier0
-    // method will be executed, and the bail-outs at patchpoints won't be obvious
-    // exit points from the method. So for OSR we always do block profiling.
-    //
-    // Note this incompatibility only exists for methods that actually have
-    // patchpoints. Currently we will only place patchponts in methods with
-    // backwards jumps.
-    //
-    // And because we want the Tier1 method to see the full set of profile data,
-    // when OSR is enabled, both Tier0 and any OSR methods need to contribute to
-    // the same profile data set. Since Tier0 has laid down a dense block-based
-    // schema, the OSR methods must use this schema as well.
-    //
-    // Note that OSR methods may also inline. We currently won't instrument
-    // any inlinee contributions (which would also need to carefully "share"
-    // the profile data segment with any Tier0 version and/or any other equivalent
-    // inlnee), so we'll lose a bit of their profile data. We can support this
-    // eventually if it turns out to matter.
      //
-    // Similar issues arise with partially jitted methods; they must also use
-    // block based profiles.
-    //
-    // Under OSR stress we may add patchpoints even without backedges. So we also
-    // need to change the PGO instrumentation approach if OSR stress is enabled.
-    //
-    CLANG_FORMAT_COMMENT_ANCHOR;
-
-#if defined(DEBUG)
-    const bool mayHaveStressPatchpoints =
-        (JitConfig.JitOffsetOnStackReplacement() >= 0) || (JitConfig.JitRandomOnStackReplacement() > 0);
-#else
-    const bool mayHaveStressPatchpoints = false;
-#endif
-
-    const bool mayHavePatchpoints =
-        ((JitConfig.TC_OnStackReplacement() > 0) && (compHasBackwardJump || mayHaveStressPatchpoints)) ||
-        (JitConfig.TC_PartialCompilation() > 0);
-    const bool prejit               = opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT);
-    const bool tier0WithPatchpoints = opts.jitFlags->IsSet(JitFlags::JIT_FLAG_TIER0) && mayHavePatchpoints;
-    const bool isOptimized          = opts.IsInstrumentedOptimized();
-    const bool useEdgeProfiles = (JitConfig.JitEdgeProfiling() > 0) && !prejit && !tier0WithPatchpoints && !isOptimized;
-
-    // TODO-TP: Don't give up on edge profiling for optimized code, currently it has issues
-    // such as unexpected trees near tail calls
+    const bool edgesEnabled    = (JitConfig.JitEdgeProfiling() > 0);
+    const bool prejit          = opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT);
+    const bool useEdgeProfiles = edgesEnabled && !prejit;
  
      if (useEdgeProfiles)
      {
@@ -1912,11 +2333,7 @@ PhaseStatus Compiler::fgPrepareToInstrumentMethod()
      }
      else
      {
-        JITDUMP("Using block profiling, because %s\n",
-                (JitConfig.JitEdgeProfiling() == 0)
-                    ? "edge profiles disabled"
-                    : prejit ? "prejitting" : isOptimized ? "tier1 instrumented" : "tier0 with patchpoints");
-
+        JITDUMP("Using block profiling, because %s\n", edgesEnabled ? "edge profiling disabled" : "prejitting");
          fgCountInstrumentor = new (this, CMK_Pgo) BlockCountInstrumentor(this);
      }
  
@@ -1933,7 +2350,6 @@ PhaseStatus Compiler::fgPrepareToInstrumentMethod()
      else
      {
          JITDUMP("Not doing class/method profiling, because %s\n", prejit ? "prejit" : "class/method profiles disabled");
-
          fgHistogramInstrumentor = new (this, CMK_Pgo) NonInstrumentor(this);
      }
  
@@ -2299,14 +2715,6 @@ void Compiler::fgIncorporateBlockCounts()
              fgSetProfileWeight(block, profileWeight);
          }
      }
-
-    // For OSR, give the method entry (which will be a scratch BB)
-    // the same weight as the OSR Entry.
-    //
-    if (opts.IsOSR())
-    {
-        fgFirstBB->inheritWeight(fgOSREntryBB);
-    }
  }
  
  //------------------------------------------------------------------------
author	Andy Ayers <andya@microsoft.com>
	Fri, 13 Jan 2023 16:02:14 +0000 (08:02 -0800)
committer	GitHub <noreply@github.com>
	Fri, 13 Jan 2023 16:02:14 +0000 (08:02 -0800)
src/coreclr/jit/compiler.cpp		patch \| blob \| history
src/coreclr/jit/fgbasic.cpp		patch \| blob \| history
src/coreclr/jit/fgprofile.cpp		patch \| blob \| history