JIT: initial implementation of profile synthesis (#82926)
authorAndy Ayers <andya@microsoft.com>
Mon, 6 Mar 2023 19:13:49 +0000 (11:13 -0800)
committerGitHub <noreply@github.com>
Mon, 6 Mar 2023 19:13:49 +0000 (11:13 -0800)
Implements a profile synthesis algorithm based on the classic Wu-Larus
paper (Static branch frequency and program profile analysis, Micro-27,
1994), with a simple set of heuristics.

First step is construction of a depth-first spanning tree (DFST) for the
flowgraph, and corresponding reverse postorder (RPO). Together these drive
loop recognition; currently we only recognize reducible loops. We use DFST
(non-) ancestry as a proxy for (non-) domination: the dominator of a node
is required to be a DFST ancestor. So no explicit dominance computation is
needed. Irreducible loops are noted but ignored. Loop features like entry,
back, and exit edges, body sets, and nesting are computed and saved.

Next step is assignment of edge likelihoods. Here we use some simple local
heuristics based on loop structure, returns, and throws. A final heuristic
gives slight preference to conditional branches that fall through to the
next IL offset.

After that we use loop nesting to compute the "cyclic probability" $cp$ for
each loop, working inner to outer in loops and RPO within loops. $cp$ summarizes
the effect of flow through the loop and around loop back edges. We cap $cp$ at
no more than 1000. When finding $cp$ for outer loops we use $cp$ for inner
loops.

Once all $cp$ values are known, we assign "external" input weights to method
and EH entry points, and then a final RPO walk computes the expected weight
of each block (and, via edge likelihoods, each edge).

We use the existing DFS code to build the DFST and the RPO, augmented by
some fixes to ensure all blocks (even ones in isolated cycles) get numbered.

This initial version is intended to establish the right functionality, enable
wider correctness testing, and to provide a foundation for refinement of the
heuristics. It is not yet as efficient as it could be.

The loop recognition and recording done here overlaps with similar code
elsewhere in the JIT. The version here is structural and not sensitive to IL
patterns, so is arguably more general and I believe a good deal simpler than
the lexically driven recognition we use for the current loop optimizer.
I aspire to reconcile this somehow in future work.

All this is disabled by default; a new config option either enables using
synthesis to set block weights for all root methods or just those without PGO
data.

Synthesis for inlinees is not yet enabled; progress here is blocked by #82755.

src/coreclr/jit/CMakeLists.txt
src/coreclr/jit/block.cpp
src/coreclr/jit/compiler.h
src/coreclr/jit/fgprofile.cpp
src/coreclr/jit/fgprofilesynthesis.cpp [new file with mode: 0644]
src/coreclr/jit/fgprofilesynthesis.h [new file with mode: 0644]
src/coreclr/jit/jitconfigvalues.h

index 50e4706c329f1f7367258de1b1e277a46c53487c..480bfdc045e16c83106332088c0d2de30108b4f4 100644 (file)
@@ -113,6 +113,7 @@ set( JIT_SOURCES
   fginline.cpp
   fgopt.cpp
   fgprofile.cpp
+  fgprofilesynthesis.cpp
   fgstmt.cpp
   flowgraph.cpp
   forwardsub.cpp
@@ -295,6 +296,7 @@ set( JIT_HEADERS
   emitjmps.h
   emitpub.h
   error.h
+  fgprofilesynthesis.h
   gentree.h
   gentreeopsdef.h
   gtlist.h
index d81a8ae34bd9e0ba0d34bc95bc06874b750ac0fb..20f48f5d096a408b265170bb021f8740986d5396 100644 (file)
@@ -1431,15 +1431,15 @@ BasicBlock* Compiler::bbNewBasicBlock(BBjumpKinds jumpKind)
     /* Give the block a number, set the ancestor count and weight */
 
     ++fgBBcount;
-    ++fgBBNumMax;
 
     if (compIsForInlining())
     {
         block->bbNum = ++impInlineInfo->InlinerCompiler->fgBBNumMax;
+        fgBBNumMax   = block->bbNum;
     }
     else
     {
-        block->bbNum = fgBBNumMax;
+        block->bbNum = ++fgBBNumMax;
     }
 
     if (compRationalIRForm)
index 81c553640ed8e7d299e849e8dbac2b8e761d5ea2..9af2e4fa37d2aaf853f4fb6ae0d2e52e2e36bbe3 100644 (file)
@@ -86,6 +86,7 @@ class CSE_DataFlow;          // defined in OptCSE.cpp
 class OptBoolsDsc;           // defined in optimizer.cpp
 struct RelopImplicationInfo; // defined in redundantbranchopts.cpp
 struct JumpThreadInfo;       // defined in redundantbranchopts.cpp
+class ProfileSynthesis;      // defined in profilesynthesis.h
 #ifdef DEBUG
 struct IndentStack;
 #endif
@@ -1993,6 +1994,7 @@ class Compiler
     friend class SharedTempsScope;
     friend class CallArgs;
     friend class IndirectCallTransformer;
+    friend class ProfileSynthesis;
 
 #ifdef FEATURE_HW_INTRINSICS
     friend struct HWIntrinsicInfo;
index 8a3ab88a15e2762ead4abf0bb5715eb3ce186b23..bd2b1984b57a9b1eb500f77f9421cf8b79318240 100644 (file)
@@ -7,6 +7,8 @@
 #pragma hdrstop
 #endif
 
+#include "fgprofilesynthesis.h"
+
 // Flowgraph Profile Support
 
 //------------------------------------------------------------------------
@@ -2425,6 +2427,20 @@ PhaseStatus Compiler::fgIncorporateProfileData()
         return PhaseStatus::MODIFIED_EVERYTHING;
     }
 
+#ifdef DEBUG
+    // Optionally just run synthesis
+    //
+    if ((JitConfig.JitSynthesizeCounts() > 0) && !compIsForInlining())
+    {
+        if ((JitConfig.JitSynthesizeCounts() == 1) || ((JitConfig.JitSynthesizeCounts() == 2) && !fgHaveProfileData()))
+        {
+            JITDUMP("Synthesizing profile data\n");
+            ProfileSynthesis::Run(this, ProfileSynthesisOption::AssignLikelihoods);
+            return PhaseStatus::MODIFIED_EVERYTHING;
+        }
+    }
+#endif
+
     // Do we have profile data?
     //
     if (!fgHaveProfileData())
diff --git a/src/coreclr/jit/fgprofilesynthesis.cpp b/src/coreclr/jit/fgprofilesynthesis.cpp
new file mode 100644 (file)
index 0000000..4260012
--- /dev/null
@@ -0,0 +1,923 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#include "jitpch.h"
+
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#include "fgprofilesynthesis.h"
+
+// TODO
+//
+// * faster way of doing fgGetPredForBlock
+// * vet against some real data
+// * IR based heuristics (perhaps)
+// * PGO blend modes, random, etc
+// * Repair mode
+// * Inlinee flow graphs
+// * During Cp, avoid repeatedly propagating through nested loops
+// * Fake BB0 or always force scratch BB
+// * Reconcile with our other loop finding stuff
+// * Stop the upweight/downweight of loops in rest of jit
+// * Durable edge properties (exit, back)
+// * Proper weight comp for finallies
+// * Tweak RunRarely to be at or near zero
+// * OSR entry weight
+// * Special handling for deep nests?
+// * Plan for irreducible cases -- MoveNext's
+
+//------------------------------------------------------------------------
+// fgProfileSynthesis: update edge likelihoods and block counts based
+//   on various strategies
+//
+// Arguments:
+//   options - options to control synthesis
+//
+void ProfileSynthesis::Run(ProfileSynthesisOption option)
+{
+    // Just root instances for now.
+    // Need to sort out issues with flow analysis for inlinees.
+    //
+    if (m_comp->compIsForInlining())
+    {
+        return;
+    }
+
+    BuildReversePostorder();
+    FindLoops();
+
+    // Retain or compute edge likelihood information
+    //
+    switch (option)
+    {
+        case ProfileSynthesisOption::AssignLikelihoods:
+            AssignLikelihoods();
+            break;
+
+        case ProfileSynthesisOption::RetainLikelihoods:
+            break;
+
+        case ProfileSynthesisOption::BlendLikelihoods:
+            BlendLikelihoods();
+            break;
+
+        case ProfileSynthesisOption::ResetAndSynthesize:
+            ClearLikelihoods();
+            AssignLikelihoods();
+            break;
+
+        case ProfileSynthesisOption::ReverseLikelihoods:
+            ReverseLikelihoods();
+            break;
+
+        case ProfileSynthesisOption::RandomLikelihoods:
+            RandomizeLikelihoods();
+            break;
+
+        default:
+            assert(!"unexpected profile synthesis option");
+            break;
+    }
+
+    // Determine cyclic probabilities
+    //
+    ComputeCyclicProbabilities();
+
+    // Assign weights to entry points in the flow graph
+    //
+    AssignInputWeights();
+
+    // Compute the block weights given the inputs and edge likelihoods
+    //
+    ComputeBlockWeights();
+
+    // For now, since we have installed synthesized profile data,
+    // act as if we don't have "real" profile data.
+    //
+    m_comp->fgPgoHaveWeights = false;
+}
+
+//------------------------------------------------------------------------
+// AssignLikelihoods: update edge likelihoods and block counts based
+//   entirely on heuristics.
+//
+// Notes:
+//   any existing likelihoods are removed in favor of heuristic
+//   likelihoods
+//
+void ProfileSynthesis::AssignLikelihoods()
+{
+    JITDUMP("Assigning edge likelihoods based on heuristics\n");
+
+    for (BasicBlock* const block : m_comp->Blocks())
+    {
+        switch (block->bbJumpKind)
+        {
+            case BBJ_THROW:
+            case BBJ_RETURN:
+            case BBJ_EHFINALLYRET:
+                // No successor cases
+                // (todo: finally ret may have succs)
+                break;
+
+            case BBJ_NONE:
+            case BBJ_CALLFINALLY:
+                // Single successor next cases
+                //
+                // Note we handle flow to the finally
+                // specially; this represents return
+                // from the finally.
+                AssignLikelihoodNext(block);
+                break;
+
+            case BBJ_ALWAYS:
+            case BBJ_LEAVE:
+            case BBJ_EHCATCHRET:
+            case BBJ_EHFILTERRET:
+                // Single successor jump cases
+                AssignLikelihoodJump(block);
+                break;
+
+            case BBJ_COND:
+                // Two successor cases
+                AssignLikelihoodCond(block);
+                break;
+
+            case BBJ_SWITCH:
+                // N successor cases
+                AssignLikelihoodSwitch(block);
+                break;
+
+            default:
+                unreached();
+        }
+    }
+}
+
+//------------------------------------------------------------------------
+// IsDfsAncestor: see if block `x` is ancestor of block `y` in the depth
+//   first spanning tree
+//
+// Arguments:
+//   x -- block that is possible ancestor
+//   y -- block that is possible descendant
+//
+// Returns:
+//   True if x is ancestor of y in the depth first spanning tree.
+//
+// Notes:
+//   If return value is false, then x does not dominate y.
+//
+bool ProfileSynthesis::IsDfsAncestor(BasicBlock* x, BasicBlock* y)
+{
+    return ((x->bbPreorderNum <= y->bbPreorderNum) && (y->bbPostorderNum <= x->bbPostorderNum));
+}
+
+//------------------------------------------------------------------------
+// GetLoopFromHeader: see if a block is a loop header, and if so return
+//   the associated loop.
+//
+// Arguments:
+//    block - block in question
+//
+// Returns:
+//    loop headed by block, or nullptr
+//
+SimpleLoop* ProfileSynthesis::GetLoopFromHeader(BasicBlock* block)
+{
+    for (SimpleLoop* loop : *m_loops)
+    {
+        if (loop->m_head == block)
+        {
+            return loop;
+        }
+    }
+
+    return nullptr;
+}
+
+//------------------------------------------------------------------------
+// IsLoopBackEdge: see if an edge is a loop back edge
+//
+// Arguments:
+//   edge - edge in question
+//
+// Returns:
+//   True if edge is a backedge in some recognized loop.
+//
+// Notes:
+//   Different than asking IsDfsAncestor since we disqualify some
+//   natural backedges for complex loop strctures.
+//
+// Todo:
+//   Annotate the edge directly
+//
+bool ProfileSynthesis::IsLoopBackEdge(FlowEdge* edge)
+{
+    for (SimpleLoop* loop : *m_loops)
+    {
+        for (FlowEdge* loopBackEdge : loop->m_backEdges)
+        {
+            if (loopBackEdge == edge)
+            {
+                return true;
+            }
+        }
+    }
+
+    return false;
+}
+
+//------------------------------------------------------------------------
+// IsLoopExitEdge: see if a flow edge is a loop exit edge
+//
+// Arguments:
+//   edge - edge in question
+//
+// Returns:
+//   True if edge is an exit edge in some recognized loop
+//
+// Todo:
+//   Annotate the edge directly
+//
+//   Decide if we want to report that the edge exits
+//   multiple loops.
+
+bool ProfileSynthesis::IsLoopExitEdge(FlowEdge* edge)
+{
+    for (SimpleLoop* loop : *m_loops)
+    {
+        for (FlowEdge* loopExitEdge : loop->m_exitEdges)
+        {
+            if (loopExitEdge == edge)
+            {
+                return true;
+            }
+        }
+    }
+
+    return false;
+}
+
+//------------------------------------------------------------------------
+// AssignLikelihoodNext: update edge likelihood for block that always
+//   transfers control to bbNext
+//
+// Arguments;
+//   block -- block in question
+//
+void ProfileSynthesis::AssignLikelihoodNext(BasicBlock* block)
+{
+    FlowEdge* const edge = m_comp->fgGetPredForBlock(block->bbNext, block);
+    edge->setLikelihood(1.0);
+}
+
+//------------------------------------------------------------------------
+// AssignLikelihoodJump: update edge likelihood for a block that always
+//   transfers control to bbJumpDest
+//
+// Arguments;
+//   block -- block in question
+//
+void ProfileSynthesis::AssignLikelihoodJump(BasicBlock* block)
+{
+    FlowEdge* const edge = m_comp->fgGetPredForBlock(block->bbJumpDest, block);
+    edge->setLikelihood(1.0);
+}
+
+//------------------------------------------------------------------------
+// AssignLikelihoodCond: update edge likelihood for a block that
+//   ends in a conditional branch
+//
+// Arguments;
+//   block -- block in question (BBJ_COND)
+//
+void ProfileSynthesis::AssignLikelihoodCond(BasicBlock* block)
+{
+    BasicBlock* const jump = block->bbJumpDest;
+    BasicBlock* const next = block->bbNext;
+
+    // Watch for degenerate case
+    //
+    if (jump == next)
+    {
+        AssignLikelihoodNext(block);
+        return;
+    }
+
+    FlowEdge* const jumpEdge = m_comp->fgGetPredForBlock(jump, block);
+    FlowEdge* const nextEdge = m_comp->fgGetPredForBlock(next, block);
+
+    // THROW heuristic
+    //
+    bool const isJumpThrow = (jump->bbJumpKind == BBJ_THROW);
+    bool const isNextThrow = (next->bbJumpKind == BBJ_THROW);
+
+    if (isJumpThrow != isNextThrow)
+    {
+        if (isJumpThrow)
+        {
+            jumpEdge->setLikelihood(0.0);
+            nextEdge->setLikelihood(1.0);
+        }
+        else
+        {
+            jumpEdge->setLikelihood(1.0);
+            nextEdge->setLikelihood(0.0);
+        }
+
+        return;
+    }
+
+    // LOOP BACK EDGE heuristic
+    //
+    bool const isJumpEdgeBackEdge = IsLoopBackEdge(jumpEdge);
+    bool const isNextEdgeBackEdge = IsLoopBackEdge(nextEdge);
+
+    if (isJumpEdgeBackEdge != isNextEdgeBackEdge)
+    {
+        if (isJumpEdgeBackEdge)
+        {
+            JITDUMP(FMT_BB "->" FMT_BB " is loop back edge\n", block->bbNum, jump->bbNum);
+            jumpEdge->setLikelihood(0.9);
+            nextEdge->setLikelihood(0.1);
+        }
+        else
+        {
+            JITDUMP(FMT_BB "->" FMT_BB " is loop back edge\n", block->bbNum, next->bbNum);
+            jumpEdge->setLikelihood(0.1);
+            nextEdge->setLikelihood(0.9);
+        }
+
+        return;
+    }
+
+    // LOOP EXIT EDGE heuristic
+    //
+    // Consider: adjust probability if loop has multiple exit edges, so that
+    // overall exit probability is around 0.1.
+    //
+    bool const isJumpEdgeExitEdge = IsLoopExitEdge(jumpEdge);
+    bool const isNextEdgeExitEdge = IsLoopExitEdge(nextEdge);
+
+    if (isJumpEdgeExitEdge != isNextEdgeExitEdge)
+    {
+        if (isJumpEdgeExitEdge)
+        {
+            JITDUMP(FMT_BB "->" FMT_BB " is loop exit edge\n", block->bbNum, jump->bbNum);
+            jumpEdge->setLikelihood(0.1);
+            nextEdge->setLikelihood(0.9);
+        }
+        else
+        {
+            JITDUMP(FMT_BB "->" FMT_BB " is loop exit edge\n", block->bbNum, next->bbNum);
+            jumpEdge->setLikelihood(0.9);
+            nextEdge->setLikelihood(0.1);
+        }
+
+        return;
+    }
+
+    // RETURN heuristic
+    //
+    bool const isJumpReturn = (jump->bbJumpKind == BBJ_RETURN);
+    bool const isNextReturn = (next->bbJumpKind == BBJ_RETURN);
+
+    if (isJumpReturn != isNextReturn)
+    {
+        if (isJumpReturn)
+        {
+            jumpEdge->setLikelihood(0.2);
+            nextEdge->setLikelihood(0.8);
+        }
+        else
+        {
+            jumpEdge->setLikelihood(0.8);
+            nextEdge->setLikelihood(0.2);
+        }
+
+        return;
+    }
+
+    // IL OFFSET heuristic
+    //
+    // Give slight preference to bbNext
+    //
+    jumpEdge->setLikelihood(0.48);
+    nextEdge->setLikelihood(0.52);
+}
+
+//------------------------------------------------------------------------
+// AssignLikelihoodSwitch: update edge likelihood for a block that
+//   ends in a switch
+//
+// Arguments;
+//   block -- block in question (BBJ_SWITCH)
+//
+void ProfileSynthesis::AssignLikelihoodSwitch(BasicBlock* block)
+{
+    // Assume each switch case is equally probable
+    //
+    const unsigned n = block->NumSucc();
+    assert(n != 0);
+    const weight_t p = 1 / (weight_t)n;
+
+    // Each unique edge gets some multiple of that basic probability
+    //
+    for (BasicBlock* const succ : block->Succs(m_comp))
+    {
+        FlowEdge* const edge = m_comp->fgGetPredForBlock(succ, block);
+        edge->setLikelihood(p * edge->getDupCount());
+    }
+}
+
+//------------------------------------------------------------------------
+// fgBlendLikelihoods: blend existing and heuristic likelihoods for all blocks
+//
+void ProfileSynthesis::BlendLikelihoods()
+{
+}
+
+void ProfileSynthesis::ClearLikelihoods()
+{
+}
+
+void ProfileSynthesis::ReverseLikelihoods()
+{
+}
+
+void ProfileSynthesis::RandomizeLikelihoods()
+{
+}
+
+//------------------------------------------------------------------------
+// fgBuildReversePostorder: compute depth first spanning tree and pre
+//   and post numbers for the blocks
+//
+void ProfileSynthesis::BuildReversePostorder()
+{
+    m_comp->EnsureBasicBlockEpoch();
+    m_comp->fgBBReversePostorder = new (m_comp, CMK_Pgo) BasicBlock*[m_comp->fgBBNumMax + 1]{};
+    m_comp->fgComputeEnterBlocksSet();
+    m_comp->fgDfsReversePostorder();
+
+    // Build map from bbNum to Block*.
+    //
+    m_bbNumToBlockMap = new (m_comp, CMK_Pgo) BasicBlock*[m_comp->fgBBNumMax + 1]{};
+    for (BasicBlock* const block : m_comp->Blocks())
+    {
+        m_bbNumToBlockMap[block->bbNum] = block;
+    }
+
+#ifdef DEBUG
+    if (m_comp->verbose)
+    {
+        printf("\nAfter doing a post order traversal of the BB graph, this is the ordering:\n");
+        for (unsigned i = 1; i <= m_comp->fgBBNumMax; ++i)
+        {
+            printf("%02u -> " FMT_BB "\n", i, m_comp->fgBBReversePostorder[i]->bbNum);
+        }
+        printf("\n");
+    }
+#endif // DEBUG
+}
+
+//------------------------------------------------------------------------
+// FindLoops: locate and classify loops
+//
+void ProfileSynthesis::FindLoops()
+{
+    CompAllocator allocator    = m_comp->getAllocator(CMK_Pgo);
+    m_loops                    = new (allocator) LoopVector(allocator);
+    unsigned improperLoopCount = 0;
+
+    // Identify loops
+    //
+    for (unsigned i = 1; i <= m_comp->fgBBNumMax; i++)
+    {
+        BasicBlock* const block = m_comp->fgBBReversePostorder[i];
+
+        // If a block is a DFS ancestor of one if its predecessors then the block is a loop header.
+        //
+        SimpleLoop* loop = nullptr;
+
+        for (FlowEdge* predEdge : block->PredEdges())
+        {
+            if (IsDfsAncestor(block, predEdge->getSourceBlock()))
+            {
+                if (loop == nullptr)
+                {
+                    loop = new (allocator) SimpleLoop(block, allocator);
+                    JITDUMP("\n");
+                }
+
+                JITDUMP(FMT_BB " -> " FMT_BB " is a backedge\n", predEdge->getSourceBlock()->bbNum, block->bbNum);
+                loop->m_backEdges.push_back(predEdge);
+            }
+        }
+
+        if (loop == nullptr)
+        {
+            continue;
+        }
+
+        JITDUMP(FMT_BB " is head of a DFS loop with %d back edges\n", block->bbNum, loop->m_backEdges.size());
+
+        // Now walk back in flow along the back edges from block to determine if
+        // this is a natural loop and to find all the blocks in the loop.
+        //
+        loop->m_blocks = BlockSetOps::MakeEmpty(m_comp);
+        BlockSetOps::AddElemD(m_comp, loop->m_blocks, block->bbNum);
+
+        // todo: hoist this out and just do a reset here
+        jitstd::list<BasicBlock*> worklist(allocator);
+
+        // Seed the worklist
+        //
+        for (FlowEdge* backEdge : loop->m_backEdges)
+        {
+            BasicBlock* const backEdgeSource = backEdge->getSourceBlock();
+
+            if (BlockSetOps::IsMember(m_comp, loop->m_blocks, backEdgeSource->bbNum))
+            {
+                continue;
+            }
+
+            worklist.push_back(backEdgeSource);
+        }
+
+        bool isNaturalLoop = true;
+
+        // Work back through flow to loop head or to another pred
+        // that is clearly outside the loop.
+        //
+        // TODO: verify that we can indeed get back to the loop head
+        // and not get stopped somewhere (eg loop through EH).
+        //
+        while (!worklist.empty() & isNaturalLoop)
+        {
+            BasicBlock* const loopBlock = worklist.back();
+            worklist.pop_back();
+            BlockSetOps::AddElemD(m_comp, loop->m_blocks, loopBlock->bbNum);
+
+            for (FlowEdge* const predEdge : loopBlock->PredEdges())
+            {
+                BasicBlock* const predBlock = predEdge->getSourceBlock();
+
+                // `block` cannot dominate `predBlock` unless it is a DFS ancestor.
+                //
+                if (!IsDfsAncestor(block, predBlock))
+                {
+                    // Does this represent flow out of some handler?
+                    // If so we will ignore it.
+                    //
+                    // Might want to vet that handler's try region entry
+                    // is a dfs ancestor...?
+                    //
+                    if (!BasicBlock::sameHndRegion(block, predBlock))
+                    {
+                        continue;
+                    }
+
+                    JITDUMP("Loop is not natural; witness " FMT_BB " -> " FMT_BB "\n", predBlock->bbNum,
+                            loopBlock->bbNum);
+
+                    isNaturalLoop = false;
+                    improperLoopCount++;
+                    break;
+                }
+
+                if (BlockSetOps::IsMember(m_comp, loop->m_blocks, predBlock->bbNum))
+                {
+                    continue;
+                }
+
+                worklist.push_back(predBlock);
+            }
+        }
+
+        if (!isNaturalLoop)
+        {
+            continue;
+        }
+
+        JITDUMP("Loop has %d blocks\n", BlockSetOps::Count(m_comp, loop->m_blocks));
+
+        // Find the exit edges
+        //
+        BlockSetOps::Iter iter(m_comp, loop->m_blocks);
+        unsigned          bbNum = 0;
+        while (iter.NextElem(&bbNum))
+        {
+            BasicBlock* const loopBlock = m_bbNumToBlockMap[bbNum];
+
+            for (BasicBlock* const succBlock : loopBlock->Succs(m_comp))
+            {
+                if (!BlockSetOps::IsMember(m_comp, loop->m_blocks, succBlock->bbNum))
+                {
+                    FlowEdge* const exitEdge = m_comp->fgGetPredForBlock(succBlock, loopBlock);
+                    JITDUMP(FMT_BB " -> " FMT_BB " is an exit edge\n", loopBlock->bbNum, succBlock->bbNum);
+                    loop->m_exitEdges.push_back(exitEdge);
+                }
+            }
+        }
+
+        // Find the entry edges
+        //
+        // Note if fgEntryBB is a loop head we won't have an entry edge.
+        // So it needs to be special cased later on when processing
+        // entry edges.
+        //
+        for (FlowEdge* const predEdge : loop->m_head->PredEdges())
+        {
+            if (!IsDfsAncestor(block, predEdge->getSourceBlock()))
+            {
+                JITDUMP(FMT_BB " -> " FMT_BB " is an entry edge\n", predEdge->getSourceBlock()->bbNum,
+                        loop->m_head->bbNum);
+                loop->m_entryEdges.push_back(predEdge);
+            }
+        }
+
+        // Search for parent loop, validate proper nesting.
+        //
+        // Since loops record in outer->inner order the parent will be the
+        // most recently recorded loop that contains this loop's header.
+        //
+        for (auto it = m_loops->rbegin(), itEnd = m_loops->rend(); it != itEnd; ++it)
+        {
+            SimpleLoop* const otherLoop = *it;
+
+            if (BlockSetOps::IsMember(m_comp, otherLoop->m_blocks, block->bbNum))
+            {
+                // Ancestor loop; should contain all blocks of this loop
+                //
+                assert(BlockSetOps::IsSubset(m_comp, loop->m_blocks, otherLoop->m_blocks));
+
+                if (loop->m_parent == nullptr)
+                {
+                    loop->m_parent = otherLoop;
+                    loop->m_depth  = otherLoop->m_depth + 1;
+                    JITDUMP("at depth %u, nested within loop starting at " FMT_BB "\n", loop->m_depth,
+                            otherLoop->m_head->bbNum);
+
+                    // Note we could break here but that would bypass the non-overlap check
+                    // just below, so for now we check against all known loops.
+                }
+            }
+            else
+            {
+                // Non-ancestor loop; should have no blocks in common with current loop
+                //
+                assert(BlockSetOps::IsEmptyIntersection(m_comp, loop->m_blocks, otherLoop->m_blocks));
+            }
+        }
+
+        if (loop->m_parent == nullptr)
+        {
+            JITDUMP("top-level loop\n");
+            loop->m_depth = 1;
+        }
+
+        // Record this loop
+        //
+        m_loops->push_back(loop);
+    }
+
+    if (m_loops->size() > 0)
+    {
+        JITDUMP("\nFound %d loops\n", m_loops->size());
+    }
+
+    if (improperLoopCount > 0)
+    {
+        JITDUMP("Rejected %d loops\n", improperLoopCount);
+    }
+}
+
+//------------------------------------------------------------------------
+// FindCyclicProbabilities: for each loop, compute how much flow returns
+//   to the loop head given one external count.
+//
+void ProfileSynthesis::ComputeCyclicProbabilities()
+{
+    // We found loop walking in reverse postorder, so the loop vector
+    // is naturally organized with outer loops before inner.
+    //
+    // Walk it backwards here so we compute inner loop cyclic probabilities
+    // first. We rely on that when processing outer loops.
+    //
+    for (auto it = m_loops->rbegin(), itEnd = m_loops->rend(); it != itEnd; ++it)
+    {
+        SimpleLoop* const loop = *it;
+        ComputeCyclicProbabilities(loop);
+    }
+}
+
+//------------------------------------------------------------------------
+// FindCyclicProbabilities: for a given loop, compute how much flow returns
+//   to the loop head given one external count.
+//
+void ProfileSynthesis::ComputeCyclicProbabilities(SimpleLoop* loop)
+{
+    // Initialize
+    //
+    BlockSetOps::Iter iter(m_comp, loop->m_blocks);
+    unsigned          bbNum = 0;
+    while (iter.NextElem(&bbNum))
+    {
+        BasicBlock* const loopBlock = m_bbNumToBlockMap[bbNum];
+        loopBlock->bbWeight         = 0.0;
+    }
+
+    // Process loop blocks in RPO. Just takes one pass through the loop blocks
+    // as any cyclic contributions are handled by cyclic probabilities.
+    //
+    for (unsigned int i = 1; i <= m_comp->fgBBNumMax; i++)
+    {
+        BasicBlock* const block = m_comp->fgBBReversePostorder[i];
+
+        if (!BlockSetOps::IsMember(m_comp, loop->m_blocks, block->bbNum))
+        {
+            continue;
+        }
+
+        // Loop head gets external count of 1
+        //
+        if (block == loop->m_head)
+        {
+            JITDUMP("ccp: " FMT_BB " :: 1.0\n", block->bbNum);
+            block->bbWeight = 1.0;
+        }
+        else
+        {
+            SimpleLoop* const nestedLoop = GetLoopFromHeader(block);
+
+            if (nestedLoop != nullptr)
+            {
+                // We should have figured this out already.
+                //
+                assert(nestedLoop->m_cyclicProbability != 0);
+
+                // Sum entry edges, multply by Cp
+                //
+                weight_t newWeight = 0.0;
+
+                for (FlowEdge* const edge : nestedLoop->m_entryEdges)
+                {
+                    if (!BasicBlock::sameHndRegion(block, edge->getSourceBlock()))
+                    {
+                        continue;
+                    }
+
+                    newWeight += edge->getLikelyWeight();
+                }
+
+                newWeight *= nestedLoop->m_cyclicProbability;
+                block->bbWeight = newWeight;
+
+                JITDUMP("ccp (nested header): " FMT_BB " :: " FMT_WT "\n", block->bbNum, newWeight);
+            }
+            else
+            {
+                weight_t newWeight = 0.0;
+
+                for (FlowEdge* const edge : block->PredEdges())
+                {
+                    if (!BasicBlock::sameHndRegion(block, edge->getSourceBlock()))
+                    {
+                        continue;
+                    }
+
+                    newWeight += edge->getLikelyWeight();
+                }
+
+                block->bbWeight = newWeight;
+
+                JITDUMP("ccp: " FMT_BB " :: " FMT_WT "\n", block->bbNum, newWeight);
+            }
+        }
+    }
+
+    // Now look at cyclic flow back to the head block.
+    //
+    weight_t cyclicWeight = 0;
+    bool     capped       = false;
+
+    for (FlowEdge* const edge : loop->m_backEdges)
+    {
+        JITDUMP("ccp backedge " FMT_BB " (" FMT_WT ") -> " FMT_BB " likelihood " FMT_WT "\n",
+                edge->getSourceBlock()->bbNum, edge->getSourceBlock()->bbWeight, loop->m_head->bbNum,
+                edge->getLikelihood());
+
+        cyclicWeight += edge->getLikelyWeight();
+    }
+
+    // Allow for a bit of rounding error, but not too much.
+    // (todo: decrease loop gain if we are in a deep nest?)
+    // assert(cyclicWeight <= 1.01);
+    //
+    if (cyclicWeight > 0.999)
+    {
+        capped       = true;
+        cyclicWeight = 0.999;
+    }
+
+    weight_t cyclicProbability = 1.0 / (1.0 - cyclicWeight);
+
+    JITDUMP("For loop at " FMT_BB " cyclic weight is " FMT_WT " cyclic probability is " FMT_WT "%s\n",
+            loop->m_head->bbNum, cyclicWeight, cyclicProbability, capped ? " [capped]" : "");
+
+    loop->m_cyclicProbability = cyclicProbability;
+}
+
+//------------------------------------------------------------------------
+// fgAssignInputWeights: provide initial profile weights for all blocks
+//
+//
+// Notes:
+//   For finallys we may want to come back and rescale once we know the
+//   weights of all the callfinallies, or perhaps just use the weight
+//   of the first block in the associated try.
+//
+void ProfileSynthesis::AssignInputWeights()
+{
+    const weight_t entryWeight = BB_UNITY_WEIGHT;
+    const weight_t ehWeight    = entryWeight / 1000;
+
+    for (BasicBlock* block : m_comp->Blocks())
+    {
+        block->bbWeight = 0.0;
+    }
+
+    m_comp->fgFirstBB->bbWeight = entryWeight;
+
+    for (EHblkDsc* const HBtab : EHClauses(m_comp))
+    {
+        if (HBtab->HasFilter())
+        {
+            HBtab->ebdFilter->bbWeight = ehWeight;
+        }
+
+        HBtab->ebdHndBeg->bbWeight = ehWeight;
+    }
+}
+
+//------------------------------------------------------------------------
+// ComputeBlockWeights: compute weights for all blocks
+//   based on input weights, edge likelihoods, and cyclic probabilities
+//
+void ProfileSynthesis::ComputeBlockWeights()
+{
+    JITDUMP("Computing block weights\n");
+
+    for (unsigned int i = 1; i <= m_comp->fgBBNumMax; i++)
+    {
+        BasicBlock* const block = m_comp->fgBBReversePostorder[i];
+        SimpleLoop* const loop  = GetLoopFromHeader(block);
+
+        if (loop != nullptr)
+        {
+            // Start with initial weight, sum entry edges, multiply by Cp
+            //
+            weight_t newWeight = block->bbWeight;
+
+            for (FlowEdge* const edge : loop->m_entryEdges)
+            {
+                newWeight += edge->getLikelyWeight();
+            }
+
+            newWeight *= loop->m_cyclicProbability;
+            block->bbWeight = newWeight;
+
+            JITDUMP("cbw (header): " FMT_BB " :: " FMT_WT "\n", block->bbNum, block->bbWeight);
+        }
+        else
+        {
+            // start with initial weight, sum all incoming edges
+            //
+            weight_t newWeight = block->bbWeight;
+
+            for (FlowEdge* const edge : block->PredEdges())
+            {
+                newWeight += edge->getLikelyWeight();
+            }
+
+            block->bbWeight = newWeight;
+
+            JITDUMP("cbw: " FMT_BB " :: " FMT_WT "\n", block->bbNum, block->bbWeight);
+        }
+
+        // Todo: just use weight to determine run rarely, not flag
+        //
+        if (block->bbWeight == 0.0)
+        {
+            block->bbSetRunRarely();
+        }
+        else
+        {
+            block->bbFlags &= ~BBF_RUN_RARELY;
+        }
+    }
+}
diff --git a/src/coreclr/jit/fgprofilesynthesis.h b/src/coreclr/jit/fgprofilesynthesis.h
new file mode 100644 (file)
index 0000000..393f332
--- /dev/null
@@ -0,0 +1,102 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#ifndef _FGPROFILESYNTHESIS_H_
+#define _FGPROFILESYNTHESIS_H_
+
+#include "compiler.h"
+#include "jitstd.h"
+
+// Flowgraph Profile Synthesis
+
+typedef jitstd::vector<FlowEdge*> EdgeVector;
+
+struct SimpleLoop
+{
+    SimpleLoop(BasicBlock* head, CompAllocator allocator)
+        : m_head(head)
+        , m_parent(nullptr)
+        , m_blocks(BlockSetOps::UninitVal())
+        , m_entryEdges(allocator)
+        , m_backEdges(allocator)
+        , m_exitEdges(allocator)
+        , m_cyclicProbability(0)
+        , m_depth(0)
+    {
+    }
+
+    BasicBlock* m_head;
+    SimpleLoop* m_parent;
+    BlockSet    m_blocks;
+    EdgeVector  m_entryEdges;
+    EdgeVector  m_backEdges;
+    EdgeVector  m_exitEdges;
+    weight_t    m_cyclicProbability;
+    unsigned    m_depth;
+};
+
+typedef jitstd::vector<SimpleLoop*> LoopVector;
+
+//------------------------------------------------------------------------
+// ProfileSynthesisOption: specify behavior of profile synthesis
+//
+enum class ProfileSynthesisOption
+{
+    AssignLikelihoods,
+    RetainLikelihoods,
+    BlendLikelihoods,
+    ResetAndSynthesize,
+    ReverseLikelihoods,
+    RandomLikelihoods,
+};
+
+//------------------------------------------------------------------------
+// ProfileSynthesis: synthesize, repair, or alter existing profile information
+//
+class ProfileSynthesis
+{
+public:
+    static void Run(Compiler* compiler, ProfileSynthesisOption option)
+    {
+        ProfileSynthesis p(compiler);
+        p.Run(option);
+    }
+
+private:
+    ProfileSynthesis(Compiler* compiler) : m_comp(compiler), m_loops(nullptr), m_bbNumToBlockMap(nullptr)
+    {
+    }
+
+    void Run(ProfileSynthesisOption option);
+
+    void        BuildReversePostorder();
+    static bool IsDfsAncestor(BasicBlock* x, BasicBlock* y);
+    bool IsLoopBackEdge(FlowEdge* edge);
+    bool IsLoopExitEdge(FlowEdge* edge);
+
+    void        FindLoops();
+    SimpleLoop* GetLoopFromHeader(BasicBlock* block);
+
+    void AssignLikelihoods();
+    void AssignLikelihoodNext(BasicBlock* block);
+    void AssignLikelihoodJump(BasicBlock* block);
+    void AssignLikelihoodCond(BasicBlock* block);
+    void AssignLikelihoodSwitch(BasicBlock* block);
+    void BlendLikelihoods();
+    void ClearLikelihoods();
+    void ReverseLikelihoods();
+    void RandomizeLikelihoods();
+
+    void ComputeCyclicProbabilities();
+    void ComputeCyclicProbabilities(SimpleLoop* loop);
+
+    void AssignInputWeights();
+    void ComputeBlockWeights();
+
+private:
+    Compiler* const m_comp;
+    LoopVector*     m_loops;
+    BasicBlock**    m_bbNumToBlockMap;
+};
+
+#endif // !_FGPROFILESYNTHESIS_H_
index be7a58ef128629624d4ce15f3d6450e85faf4b1b..a1305299854ee38a68a0824142be19b00a0a6aec 100644 (file)
@@ -592,7 +592,10 @@ CONFIG_INTEGER(JitCrossCheckDevirtualizationAndPGO, W("JitCrossCheckDevirtualiza
 CONFIG_INTEGER(JitNoteFailedExactDevirtualization, W("JitNoteFailedExactDevirtualization"), 0)
 CONFIG_INTEGER(JitRandomlyCollect64BitCounts, W("JitRandomlyCollect64BitCounts"), 0) // Collect 64-bit counts randomly
                                                                                      // for some methods.
-#endif                                                                               // debug
+// 1: profile synthesis for root methods
+// 2: profile synthesis for root methods w/o pgo data
+CONFIG_INTEGER(JitSynthesizeCounts, W("JitSynthesizeCounts"), 0)
+#endif
 
 // Devirtualize virtual calls with getExactClasses (NativeAOT only for now)
 CONFIG_INTEGER(JitEnableExactDevirtualization, W("JitEnableExactDevirtualization"), 1)