From 73b2a0f75a2fd6f88ee14b7bd8438865ed0a6453 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Mon, 6 Mar 2023 11:13:49 -0800 Subject: [PATCH] JIT: initial implementation of profile synthesis (#82926) Implements a profile synthesis algorithm based on the classic Wu-Larus paper (Static branch frequency and program profile analysis, Micro-27, 1994), with a simple set of heuristics. First step is construction of a depth-first spanning tree (DFST) for the flowgraph, and corresponding reverse postorder (RPO). Together these drive loop recognition; currently we only recognize reducible loops. We use DFST (non-) ancestry as a proxy for (non-) domination: the dominator of a node is required to be a DFST ancestor. So no explicit dominance computation is needed. Irreducible loops are noted but ignored. Loop features like entry, back, and exit edges, body sets, and nesting are computed and saved. Next step is assignment of edge likelihoods. Here we use some simple local heuristics based on loop structure, returns, and throws. A final heuristic gives slight preference to conditional branches that fall through to the next IL offset. After that we use loop nesting to compute the "cyclic probability" $cp$ for each loop, working inner to outer in loops and RPO within loops. $cp$ summarizes the effect of flow through the loop and around loop back edges. We cap $cp$ at no more than 1000. When finding $cp$ for outer loops we use $cp$ for inner loops. Once all $cp$ values are known, we assign "external" input weights to method and EH entry points, and then a final RPO walk computes the expected weight of each block (and, via edge likelihoods, each edge). We use the existing DFS code to build the DFST and the RPO, augmented by some fixes to ensure all blocks (even ones in isolated cycles) get numbered. This initial version is intended to establish the right functionality, enable wider correctness testing, and to provide a foundation for refinement of the heuristics. It is not yet as efficient as it could be. The loop recognition and recording done here overlaps with similar code elsewhere in the JIT. The version here is structural and not sensitive to IL patterns, so is arguably more general and I believe a good deal simpler than the lexically driven recognition we use for the current loop optimizer. I aspire to reconcile this somehow in future work. All this is disabled by default; a new config option either enables using synthesis to set block weights for all root methods or just those without PGO data. Synthesis for inlinees is not yet enabled; progress here is blocked by #82755. --- src/coreclr/jit/CMakeLists.txt | 2 + src/coreclr/jit/block.cpp | 4 +- src/coreclr/jit/compiler.h | 2 + src/coreclr/jit/fgprofile.cpp | 16 + src/coreclr/jit/fgprofilesynthesis.cpp | 923 +++++++++++++++++++++++++++++++++ src/coreclr/jit/fgprofilesynthesis.h | 102 ++++ src/coreclr/jit/jitconfigvalues.h | 5 +- 7 files changed, 1051 insertions(+), 3 deletions(-) create mode 100644 src/coreclr/jit/fgprofilesynthesis.cpp create mode 100644 src/coreclr/jit/fgprofilesynthesis.h diff --git a/src/coreclr/jit/CMakeLists.txt b/src/coreclr/jit/CMakeLists.txt index 50e4706..480bfdc 100644 --- a/src/coreclr/jit/CMakeLists.txt +++ b/src/coreclr/jit/CMakeLists.txt @@ -113,6 +113,7 @@ set( JIT_SOURCES fginline.cpp fgopt.cpp fgprofile.cpp + fgprofilesynthesis.cpp fgstmt.cpp flowgraph.cpp forwardsub.cpp @@ -295,6 +296,7 @@ set( JIT_HEADERS emitjmps.h emitpub.h error.h + fgprofilesynthesis.h gentree.h gentreeopsdef.h gtlist.h diff --git a/src/coreclr/jit/block.cpp b/src/coreclr/jit/block.cpp index d81a8ae..20f48f5 100644 --- a/src/coreclr/jit/block.cpp +++ b/src/coreclr/jit/block.cpp @@ -1431,15 +1431,15 @@ BasicBlock* Compiler::bbNewBasicBlock(BBjumpKinds jumpKind) /* Give the block a number, set the ancestor count and weight */ ++fgBBcount; - ++fgBBNumMax; if (compIsForInlining()) { block->bbNum = ++impInlineInfo->InlinerCompiler->fgBBNumMax; + fgBBNumMax = block->bbNum; } else { - block->bbNum = fgBBNumMax; + block->bbNum = ++fgBBNumMax; } if (compRationalIRForm) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 81c5536..9af2e4f 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -86,6 +86,7 @@ class CSE_DataFlow; // defined in OptCSE.cpp class OptBoolsDsc; // defined in optimizer.cpp struct RelopImplicationInfo; // defined in redundantbranchopts.cpp struct JumpThreadInfo; // defined in redundantbranchopts.cpp +class ProfileSynthesis; // defined in profilesynthesis.h #ifdef DEBUG struct IndentStack; #endif @@ -1993,6 +1994,7 @@ class Compiler friend class SharedTempsScope; friend class CallArgs; friend class IndirectCallTransformer; + friend class ProfileSynthesis; #ifdef FEATURE_HW_INTRINSICS friend struct HWIntrinsicInfo; diff --git a/src/coreclr/jit/fgprofile.cpp b/src/coreclr/jit/fgprofile.cpp index 8a3ab88..bd2b198 100644 --- a/src/coreclr/jit/fgprofile.cpp +++ b/src/coreclr/jit/fgprofile.cpp @@ -7,6 +7,8 @@ #pragma hdrstop #endif +#include "fgprofilesynthesis.h" + // Flowgraph Profile Support //------------------------------------------------------------------------ @@ -2425,6 +2427,20 @@ PhaseStatus Compiler::fgIncorporateProfileData() return PhaseStatus::MODIFIED_EVERYTHING; } +#ifdef DEBUG + // Optionally just run synthesis + // + if ((JitConfig.JitSynthesizeCounts() > 0) && !compIsForInlining()) + { + if ((JitConfig.JitSynthesizeCounts() == 1) || ((JitConfig.JitSynthesizeCounts() == 2) && !fgHaveProfileData())) + { + JITDUMP("Synthesizing profile data\n"); + ProfileSynthesis::Run(this, ProfileSynthesisOption::AssignLikelihoods); + return PhaseStatus::MODIFIED_EVERYTHING; + } + } +#endif + // Do we have profile data? // if (!fgHaveProfileData()) diff --git a/src/coreclr/jit/fgprofilesynthesis.cpp b/src/coreclr/jit/fgprofilesynthesis.cpp new file mode 100644 index 0000000..4260012 --- /dev/null +++ b/src/coreclr/jit/fgprofilesynthesis.cpp @@ -0,0 +1,923 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "jitpch.h" + +#ifdef _MSC_VER +#pragma hdrstop +#endif + +#include "fgprofilesynthesis.h" + +// TODO +// +// * faster way of doing fgGetPredForBlock +// * vet against some real data +// * IR based heuristics (perhaps) +// * PGO blend modes, random, etc +// * Repair mode +// * Inlinee flow graphs +// * During Cp, avoid repeatedly propagating through nested loops +// * Fake BB0 or always force scratch BB +// * Reconcile with our other loop finding stuff +// * Stop the upweight/downweight of loops in rest of jit +// * Durable edge properties (exit, back) +// * Proper weight comp for finallies +// * Tweak RunRarely to be at or near zero +// * OSR entry weight +// * Special handling for deep nests? +// * Plan for irreducible cases -- MoveNext's + +//------------------------------------------------------------------------ +// fgProfileSynthesis: update edge likelihoods and block counts based +// on various strategies +// +// Arguments: +// options - options to control synthesis +// +void ProfileSynthesis::Run(ProfileSynthesisOption option) +{ + // Just root instances for now. + // Need to sort out issues with flow analysis for inlinees. + // + if (m_comp->compIsForInlining()) + { + return; + } + + BuildReversePostorder(); + FindLoops(); + + // Retain or compute edge likelihood information + // + switch (option) + { + case ProfileSynthesisOption::AssignLikelihoods: + AssignLikelihoods(); + break; + + case ProfileSynthesisOption::RetainLikelihoods: + break; + + case ProfileSynthesisOption::BlendLikelihoods: + BlendLikelihoods(); + break; + + case ProfileSynthesisOption::ResetAndSynthesize: + ClearLikelihoods(); + AssignLikelihoods(); + break; + + case ProfileSynthesisOption::ReverseLikelihoods: + ReverseLikelihoods(); + break; + + case ProfileSynthesisOption::RandomLikelihoods: + RandomizeLikelihoods(); + break; + + default: + assert(!"unexpected profile synthesis option"); + break; + } + + // Determine cyclic probabilities + // + ComputeCyclicProbabilities(); + + // Assign weights to entry points in the flow graph + // + AssignInputWeights(); + + // Compute the block weights given the inputs and edge likelihoods + // + ComputeBlockWeights(); + + // For now, since we have installed synthesized profile data, + // act as if we don't have "real" profile data. + // + m_comp->fgPgoHaveWeights = false; +} + +//------------------------------------------------------------------------ +// AssignLikelihoods: update edge likelihoods and block counts based +// entirely on heuristics. +// +// Notes: +// any existing likelihoods are removed in favor of heuristic +// likelihoods +// +void ProfileSynthesis::AssignLikelihoods() +{ + JITDUMP("Assigning edge likelihoods based on heuristics\n"); + + for (BasicBlock* const block : m_comp->Blocks()) + { + switch (block->bbJumpKind) + { + case BBJ_THROW: + case BBJ_RETURN: + case BBJ_EHFINALLYRET: + // No successor cases + // (todo: finally ret may have succs) + break; + + case BBJ_NONE: + case BBJ_CALLFINALLY: + // Single successor next cases + // + // Note we handle flow to the finally + // specially; this represents return + // from the finally. + AssignLikelihoodNext(block); + break; + + case BBJ_ALWAYS: + case BBJ_LEAVE: + case BBJ_EHCATCHRET: + case BBJ_EHFILTERRET: + // Single successor jump cases + AssignLikelihoodJump(block); + break; + + case BBJ_COND: + // Two successor cases + AssignLikelihoodCond(block); + break; + + case BBJ_SWITCH: + // N successor cases + AssignLikelihoodSwitch(block); + break; + + default: + unreached(); + } + } +} + +//------------------------------------------------------------------------ +// IsDfsAncestor: see if block `x` is ancestor of block `y` in the depth +// first spanning tree +// +// Arguments: +// x -- block that is possible ancestor +// y -- block that is possible descendant +// +// Returns: +// True if x is ancestor of y in the depth first spanning tree. +// +// Notes: +// If return value is false, then x does not dominate y. +// +bool ProfileSynthesis::IsDfsAncestor(BasicBlock* x, BasicBlock* y) +{ + return ((x->bbPreorderNum <= y->bbPreorderNum) && (y->bbPostorderNum <= x->bbPostorderNum)); +} + +//------------------------------------------------------------------------ +// GetLoopFromHeader: see if a block is a loop header, and if so return +// the associated loop. +// +// Arguments: +// block - block in question +// +// Returns: +// loop headed by block, or nullptr +// +SimpleLoop* ProfileSynthesis::GetLoopFromHeader(BasicBlock* block) +{ + for (SimpleLoop* loop : *m_loops) + { + if (loop->m_head == block) + { + return loop; + } + } + + return nullptr; +} + +//------------------------------------------------------------------------ +// IsLoopBackEdge: see if an edge is a loop back edge +// +// Arguments: +// edge - edge in question +// +// Returns: +// True if edge is a backedge in some recognized loop. +// +// Notes: +// Different than asking IsDfsAncestor since we disqualify some +// natural backedges for complex loop strctures. +// +// Todo: +// Annotate the edge directly +// +bool ProfileSynthesis::IsLoopBackEdge(FlowEdge* edge) +{ + for (SimpleLoop* loop : *m_loops) + { + for (FlowEdge* loopBackEdge : loop->m_backEdges) + { + if (loopBackEdge == edge) + { + return true; + } + } + } + + return false; +} + +//------------------------------------------------------------------------ +// IsLoopExitEdge: see if a flow edge is a loop exit edge +// +// Arguments: +// edge - edge in question +// +// Returns: +// True if edge is an exit edge in some recognized loop +// +// Todo: +// Annotate the edge directly +// +// Decide if we want to report that the edge exits +// multiple loops. + +bool ProfileSynthesis::IsLoopExitEdge(FlowEdge* edge) +{ + for (SimpleLoop* loop : *m_loops) + { + for (FlowEdge* loopExitEdge : loop->m_exitEdges) + { + if (loopExitEdge == edge) + { + return true; + } + } + } + + return false; +} + +//------------------------------------------------------------------------ +// AssignLikelihoodNext: update edge likelihood for block that always +// transfers control to bbNext +// +// Arguments; +// block -- block in question +// +void ProfileSynthesis::AssignLikelihoodNext(BasicBlock* block) +{ + FlowEdge* const edge = m_comp->fgGetPredForBlock(block->bbNext, block); + edge->setLikelihood(1.0); +} + +//------------------------------------------------------------------------ +// AssignLikelihoodJump: update edge likelihood for a block that always +// transfers control to bbJumpDest +// +// Arguments; +// block -- block in question +// +void ProfileSynthesis::AssignLikelihoodJump(BasicBlock* block) +{ + FlowEdge* const edge = m_comp->fgGetPredForBlock(block->bbJumpDest, block); + edge->setLikelihood(1.0); +} + +//------------------------------------------------------------------------ +// AssignLikelihoodCond: update edge likelihood for a block that +// ends in a conditional branch +// +// Arguments; +// block -- block in question (BBJ_COND) +// +void ProfileSynthesis::AssignLikelihoodCond(BasicBlock* block) +{ + BasicBlock* const jump = block->bbJumpDest; + BasicBlock* const next = block->bbNext; + + // Watch for degenerate case + // + if (jump == next) + { + AssignLikelihoodNext(block); + return; + } + + FlowEdge* const jumpEdge = m_comp->fgGetPredForBlock(jump, block); + FlowEdge* const nextEdge = m_comp->fgGetPredForBlock(next, block); + + // THROW heuristic + // + bool const isJumpThrow = (jump->bbJumpKind == BBJ_THROW); + bool const isNextThrow = (next->bbJumpKind == BBJ_THROW); + + if (isJumpThrow != isNextThrow) + { + if (isJumpThrow) + { + jumpEdge->setLikelihood(0.0); + nextEdge->setLikelihood(1.0); + } + else + { + jumpEdge->setLikelihood(1.0); + nextEdge->setLikelihood(0.0); + } + + return; + } + + // LOOP BACK EDGE heuristic + // + bool const isJumpEdgeBackEdge = IsLoopBackEdge(jumpEdge); + bool const isNextEdgeBackEdge = IsLoopBackEdge(nextEdge); + + if (isJumpEdgeBackEdge != isNextEdgeBackEdge) + { + if (isJumpEdgeBackEdge) + { + JITDUMP(FMT_BB "->" FMT_BB " is loop back edge\n", block->bbNum, jump->bbNum); + jumpEdge->setLikelihood(0.9); + nextEdge->setLikelihood(0.1); + } + else + { + JITDUMP(FMT_BB "->" FMT_BB " is loop back edge\n", block->bbNum, next->bbNum); + jumpEdge->setLikelihood(0.1); + nextEdge->setLikelihood(0.9); + } + + return; + } + + // LOOP EXIT EDGE heuristic + // + // Consider: adjust probability if loop has multiple exit edges, so that + // overall exit probability is around 0.1. + // + bool const isJumpEdgeExitEdge = IsLoopExitEdge(jumpEdge); + bool const isNextEdgeExitEdge = IsLoopExitEdge(nextEdge); + + if (isJumpEdgeExitEdge != isNextEdgeExitEdge) + { + if (isJumpEdgeExitEdge) + { + JITDUMP(FMT_BB "->" FMT_BB " is loop exit edge\n", block->bbNum, jump->bbNum); + jumpEdge->setLikelihood(0.1); + nextEdge->setLikelihood(0.9); + } + else + { + JITDUMP(FMT_BB "->" FMT_BB " is loop exit edge\n", block->bbNum, next->bbNum); + jumpEdge->setLikelihood(0.9); + nextEdge->setLikelihood(0.1); + } + + return; + } + + // RETURN heuristic + // + bool const isJumpReturn = (jump->bbJumpKind == BBJ_RETURN); + bool const isNextReturn = (next->bbJumpKind == BBJ_RETURN); + + if (isJumpReturn != isNextReturn) + { + if (isJumpReturn) + { + jumpEdge->setLikelihood(0.2); + nextEdge->setLikelihood(0.8); + } + else + { + jumpEdge->setLikelihood(0.8); + nextEdge->setLikelihood(0.2); + } + + return; + } + + // IL OFFSET heuristic + // + // Give slight preference to bbNext + // + jumpEdge->setLikelihood(0.48); + nextEdge->setLikelihood(0.52); +} + +//------------------------------------------------------------------------ +// AssignLikelihoodSwitch: update edge likelihood for a block that +// ends in a switch +// +// Arguments; +// block -- block in question (BBJ_SWITCH) +// +void ProfileSynthesis::AssignLikelihoodSwitch(BasicBlock* block) +{ + // Assume each switch case is equally probable + // + const unsigned n = block->NumSucc(); + assert(n != 0); + const weight_t p = 1 / (weight_t)n; + + // Each unique edge gets some multiple of that basic probability + // + for (BasicBlock* const succ : block->Succs(m_comp)) + { + FlowEdge* const edge = m_comp->fgGetPredForBlock(succ, block); + edge->setLikelihood(p * edge->getDupCount()); + } +} + +//------------------------------------------------------------------------ +// fgBlendLikelihoods: blend existing and heuristic likelihoods for all blocks +// +void ProfileSynthesis::BlendLikelihoods() +{ +} + +void ProfileSynthesis::ClearLikelihoods() +{ +} + +void ProfileSynthesis::ReverseLikelihoods() +{ +} + +void ProfileSynthesis::RandomizeLikelihoods() +{ +} + +//------------------------------------------------------------------------ +// fgBuildReversePostorder: compute depth first spanning tree and pre +// and post numbers for the blocks +// +void ProfileSynthesis::BuildReversePostorder() +{ + m_comp->EnsureBasicBlockEpoch(); + m_comp->fgBBReversePostorder = new (m_comp, CMK_Pgo) BasicBlock*[m_comp->fgBBNumMax + 1]{}; + m_comp->fgComputeEnterBlocksSet(); + m_comp->fgDfsReversePostorder(); + + // Build map from bbNum to Block*. + // + m_bbNumToBlockMap = new (m_comp, CMK_Pgo) BasicBlock*[m_comp->fgBBNumMax + 1]{}; + for (BasicBlock* const block : m_comp->Blocks()) + { + m_bbNumToBlockMap[block->bbNum] = block; + } + +#ifdef DEBUG + if (m_comp->verbose) + { + printf("\nAfter doing a post order traversal of the BB graph, this is the ordering:\n"); + for (unsigned i = 1; i <= m_comp->fgBBNumMax; ++i) + { + printf("%02u -> " FMT_BB "\n", i, m_comp->fgBBReversePostorder[i]->bbNum); + } + printf("\n"); + } +#endif // DEBUG +} + +//------------------------------------------------------------------------ +// FindLoops: locate and classify loops +// +void ProfileSynthesis::FindLoops() +{ + CompAllocator allocator = m_comp->getAllocator(CMK_Pgo); + m_loops = new (allocator) LoopVector(allocator); + unsigned improperLoopCount = 0; + + // Identify loops + // + for (unsigned i = 1; i <= m_comp->fgBBNumMax; i++) + { + BasicBlock* const block = m_comp->fgBBReversePostorder[i]; + + // If a block is a DFS ancestor of one if its predecessors then the block is a loop header. + // + SimpleLoop* loop = nullptr; + + for (FlowEdge* predEdge : block->PredEdges()) + { + if (IsDfsAncestor(block, predEdge->getSourceBlock())) + { + if (loop == nullptr) + { + loop = new (allocator) SimpleLoop(block, allocator); + JITDUMP("\n"); + } + + JITDUMP(FMT_BB " -> " FMT_BB " is a backedge\n", predEdge->getSourceBlock()->bbNum, block->bbNum); + loop->m_backEdges.push_back(predEdge); + } + } + + if (loop == nullptr) + { + continue; + } + + JITDUMP(FMT_BB " is head of a DFS loop with %d back edges\n", block->bbNum, loop->m_backEdges.size()); + + // Now walk back in flow along the back edges from block to determine if + // this is a natural loop and to find all the blocks in the loop. + // + loop->m_blocks = BlockSetOps::MakeEmpty(m_comp); + BlockSetOps::AddElemD(m_comp, loop->m_blocks, block->bbNum); + + // todo: hoist this out and just do a reset here + jitstd::list worklist(allocator); + + // Seed the worklist + // + for (FlowEdge* backEdge : loop->m_backEdges) + { + BasicBlock* const backEdgeSource = backEdge->getSourceBlock(); + + if (BlockSetOps::IsMember(m_comp, loop->m_blocks, backEdgeSource->bbNum)) + { + continue; + } + + worklist.push_back(backEdgeSource); + } + + bool isNaturalLoop = true; + + // Work back through flow to loop head or to another pred + // that is clearly outside the loop. + // + // TODO: verify that we can indeed get back to the loop head + // and not get stopped somewhere (eg loop through EH). + // + while (!worklist.empty() & isNaturalLoop) + { + BasicBlock* const loopBlock = worklist.back(); + worklist.pop_back(); + BlockSetOps::AddElemD(m_comp, loop->m_blocks, loopBlock->bbNum); + + for (FlowEdge* const predEdge : loopBlock->PredEdges()) + { + BasicBlock* const predBlock = predEdge->getSourceBlock(); + + // `block` cannot dominate `predBlock` unless it is a DFS ancestor. + // + if (!IsDfsAncestor(block, predBlock)) + { + // Does this represent flow out of some handler? + // If so we will ignore it. + // + // Might want to vet that handler's try region entry + // is a dfs ancestor...? + // + if (!BasicBlock::sameHndRegion(block, predBlock)) + { + continue; + } + + JITDUMP("Loop is not natural; witness " FMT_BB " -> " FMT_BB "\n", predBlock->bbNum, + loopBlock->bbNum); + + isNaturalLoop = false; + improperLoopCount++; + break; + } + + if (BlockSetOps::IsMember(m_comp, loop->m_blocks, predBlock->bbNum)) + { + continue; + } + + worklist.push_back(predBlock); + } + } + + if (!isNaturalLoop) + { + continue; + } + + JITDUMP("Loop has %d blocks\n", BlockSetOps::Count(m_comp, loop->m_blocks)); + + // Find the exit edges + // + BlockSetOps::Iter iter(m_comp, loop->m_blocks); + unsigned bbNum = 0; + while (iter.NextElem(&bbNum)) + { + BasicBlock* const loopBlock = m_bbNumToBlockMap[bbNum]; + + for (BasicBlock* const succBlock : loopBlock->Succs(m_comp)) + { + if (!BlockSetOps::IsMember(m_comp, loop->m_blocks, succBlock->bbNum)) + { + FlowEdge* const exitEdge = m_comp->fgGetPredForBlock(succBlock, loopBlock); + JITDUMP(FMT_BB " -> " FMT_BB " is an exit edge\n", loopBlock->bbNum, succBlock->bbNum); + loop->m_exitEdges.push_back(exitEdge); + } + } + } + + // Find the entry edges + // + // Note if fgEntryBB is a loop head we won't have an entry edge. + // So it needs to be special cased later on when processing + // entry edges. + // + for (FlowEdge* const predEdge : loop->m_head->PredEdges()) + { + if (!IsDfsAncestor(block, predEdge->getSourceBlock())) + { + JITDUMP(FMT_BB " -> " FMT_BB " is an entry edge\n", predEdge->getSourceBlock()->bbNum, + loop->m_head->bbNum); + loop->m_entryEdges.push_back(predEdge); + } + } + + // Search for parent loop, validate proper nesting. + // + // Since loops record in outer->inner order the parent will be the + // most recently recorded loop that contains this loop's header. + // + for (auto it = m_loops->rbegin(), itEnd = m_loops->rend(); it != itEnd; ++it) + { + SimpleLoop* const otherLoop = *it; + + if (BlockSetOps::IsMember(m_comp, otherLoop->m_blocks, block->bbNum)) + { + // Ancestor loop; should contain all blocks of this loop + // + assert(BlockSetOps::IsSubset(m_comp, loop->m_blocks, otherLoop->m_blocks)); + + if (loop->m_parent == nullptr) + { + loop->m_parent = otherLoop; + loop->m_depth = otherLoop->m_depth + 1; + JITDUMP("at depth %u, nested within loop starting at " FMT_BB "\n", loop->m_depth, + otherLoop->m_head->bbNum); + + // Note we could break here but that would bypass the non-overlap check + // just below, so for now we check against all known loops. + } + } + else + { + // Non-ancestor loop; should have no blocks in common with current loop + // + assert(BlockSetOps::IsEmptyIntersection(m_comp, loop->m_blocks, otherLoop->m_blocks)); + } + } + + if (loop->m_parent == nullptr) + { + JITDUMP("top-level loop\n"); + loop->m_depth = 1; + } + + // Record this loop + // + m_loops->push_back(loop); + } + + if (m_loops->size() > 0) + { + JITDUMP("\nFound %d loops\n", m_loops->size()); + } + + if (improperLoopCount > 0) + { + JITDUMP("Rejected %d loops\n", improperLoopCount); + } +} + +//------------------------------------------------------------------------ +// FindCyclicProbabilities: for each loop, compute how much flow returns +// to the loop head given one external count. +// +void ProfileSynthesis::ComputeCyclicProbabilities() +{ + // We found loop walking in reverse postorder, so the loop vector + // is naturally organized with outer loops before inner. + // + // Walk it backwards here so we compute inner loop cyclic probabilities + // first. We rely on that when processing outer loops. + // + for (auto it = m_loops->rbegin(), itEnd = m_loops->rend(); it != itEnd; ++it) + { + SimpleLoop* const loop = *it; + ComputeCyclicProbabilities(loop); + } +} + +//------------------------------------------------------------------------ +// FindCyclicProbabilities: for a given loop, compute how much flow returns +// to the loop head given one external count. +// +void ProfileSynthesis::ComputeCyclicProbabilities(SimpleLoop* loop) +{ + // Initialize + // + BlockSetOps::Iter iter(m_comp, loop->m_blocks); + unsigned bbNum = 0; + while (iter.NextElem(&bbNum)) + { + BasicBlock* const loopBlock = m_bbNumToBlockMap[bbNum]; + loopBlock->bbWeight = 0.0; + } + + // Process loop blocks in RPO. Just takes one pass through the loop blocks + // as any cyclic contributions are handled by cyclic probabilities. + // + for (unsigned int i = 1; i <= m_comp->fgBBNumMax; i++) + { + BasicBlock* const block = m_comp->fgBBReversePostorder[i]; + + if (!BlockSetOps::IsMember(m_comp, loop->m_blocks, block->bbNum)) + { + continue; + } + + // Loop head gets external count of 1 + // + if (block == loop->m_head) + { + JITDUMP("ccp: " FMT_BB " :: 1.0\n", block->bbNum); + block->bbWeight = 1.0; + } + else + { + SimpleLoop* const nestedLoop = GetLoopFromHeader(block); + + if (nestedLoop != nullptr) + { + // We should have figured this out already. + // + assert(nestedLoop->m_cyclicProbability != 0); + + // Sum entry edges, multply by Cp + // + weight_t newWeight = 0.0; + + for (FlowEdge* const edge : nestedLoop->m_entryEdges) + { + if (!BasicBlock::sameHndRegion(block, edge->getSourceBlock())) + { + continue; + } + + newWeight += edge->getLikelyWeight(); + } + + newWeight *= nestedLoop->m_cyclicProbability; + block->bbWeight = newWeight; + + JITDUMP("ccp (nested header): " FMT_BB " :: " FMT_WT "\n", block->bbNum, newWeight); + } + else + { + weight_t newWeight = 0.0; + + for (FlowEdge* const edge : block->PredEdges()) + { + if (!BasicBlock::sameHndRegion(block, edge->getSourceBlock())) + { + continue; + } + + newWeight += edge->getLikelyWeight(); + } + + block->bbWeight = newWeight; + + JITDUMP("ccp: " FMT_BB " :: " FMT_WT "\n", block->bbNum, newWeight); + } + } + } + + // Now look at cyclic flow back to the head block. + // + weight_t cyclicWeight = 0; + bool capped = false; + + for (FlowEdge* const edge : loop->m_backEdges) + { + JITDUMP("ccp backedge " FMT_BB " (" FMT_WT ") -> " FMT_BB " likelihood " FMT_WT "\n", + edge->getSourceBlock()->bbNum, edge->getSourceBlock()->bbWeight, loop->m_head->bbNum, + edge->getLikelihood()); + + cyclicWeight += edge->getLikelyWeight(); + } + + // Allow for a bit of rounding error, but not too much. + // (todo: decrease loop gain if we are in a deep nest?) + // assert(cyclicWeight <= 1.01); + // + if (cyclicWeight > 0.999) + { + capped = true; + cyclicWeight = 0.999; + } + + weight_t cyclicProbability = 1.0 / (1.0 - cyclicWeight); + + JITDUMP("For loop at " FMT_BB " cyclic weight is " FMT_WT " cyclic probability is " FMT_WT "%s\n", + loop->m_head->bbNum, cyclicWeight, cyclicProbability, capped ? " [capped]" : ""); + + loop->m_cyclicProbability = cyclicProbability; +} + +//------------------------------------------------------------------------ +// fgAssignInputWeights: provide initial profile weights for all blocks +// +// +// Notes: +// For finallys we may want to come back and rescale once we know the +// weights of all the callfinallies, or perhaps just use the weight +// of the first block in the associated try. +// +void ProfileSynthesis::AssignInputWeights() +{ + const weight_t entryWeight = BB_UNITY_WEIGHT; + const weight_t ehWeight = entryWeight / 1000; + + for (BasicBlock* block : m_comp->Blocks()) + { + block->bbWeight = 0.0; + } + + m_comp->fgFirstBB->bbWeight = entryWeight; + + for (EHblkDsc* const HBtab : EHClauses(m_comp)) + { + if (HBtab->HasFilter()) + { + HBtab->ebdFilter->bbWeight = ehWeight; + } + + HBtab->ebdHndBeg->bbWeight = ehWeight; + } +} + +//------------------------------------------------------------------------ +// ComputeBlockWeights: compute weights for all blocks +// based on input weights, edge likelihoods, and cyclic probabilities +// +void ProfileSynthesis::ComputeBlockWeights() +{ + JITDUMP("Computing block weights\n"); + + for (unsigned int i = 1; i <= m_comp->fgBBNumMax; i++) + { + BasicBlock* const block = m_comp->fgBBReversePostorder[i]; + SimpleLoop* const loop = GetLoopFromHeader(block); + + if (loop != nullptr) + { + // Start with initial weight, sum entry edges, multiply by Cp + // + weight_t newWeight = block->bbWeight; + + for (FlowEdge* const edge : loop->m_entryEdges) + { + newWeight += edge->getLikelyWeight(); + } + + newWeight *= loop->m_cyclicProbability; + block->bbWeight = newWeight; + + JITDUMP("cbw (header): " FMT_BB " :: " FMT_WT "\n", block->bbNum, block->bbWeight); + } + else + { + // start with initial weight, sum all incoming edges + // + weight_t newWeight = block->bbWeight; + + for (FlowEdge* const edge : block->PredEdges()) + { + newWeight += edge->getLikelyWeight(); + } + + block->bbWeight = newWeight; + + JITDUMP("cbw: " FMT_BB " :: " FMT_WT "\n", block->bbNum, block->bbWeight); + } + + // Todo: just use weight to determine run rarely, not flag + // + if (block->bbWeight == 0.0) + { + block->bbSetRunRarely(); + } + else + { + block->bbFlags &= ~BBF_RUN_RARELY; + } + } +} diff --git a/src/coreclr/jit/fgprofilesynthesis.h b/src/coreclr/jit/fgprofilesynthesis.h new file mode 100644 index 0000000..393f332 --- /dev/null +++ b/src/coreclr/jit/fgprofilesynthesis.h @@ -0,0 +1,102 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifndef _FGPROFILESYNTHESIS_H_ +#define _FGPROFILESYNTHESIS_H_ + +#include "compiler.h" +#include "jitstd.h" + +// Flowgraph Profile Synthesis + +typedef jitstd::vector EdgeVector; + +struct SimpleLoop +{ + SimpleLoop(BasicBlock* head, CompAllocator allocator) + : m_head(head) + , m_parent(nullptr) + , m_blocks(BlockSetOps::UninitVal()) + , m_entryEdges(allocator) + , m_backEdges(allocator) + , m_exitEdges(allocator) + , m_cyclicProbability(0) + , m_depth(0) + { + } + + BasicBlock* m_head; + SimpleLoop* m_parent; + BlockSet m_blocks; + EdgeVector m_entryEdges; + EdgeVector m_backEdges; + EdgeVector m_exitEdges; + weight_t m_cyclicProbability; + unsigned m_depth; +}; + +typedef jitstd::vector LoopVector; + +//------------------------------------------------------------------------ +// ProfileSynthesisOption: specify behavior of profile synthesis +// +enum class ProfileSynthesisOption +{ + AssignLikelihoods, + RetainLikelihoods, + BlendLikelihoods, + ResetAndSynthesize, + ReverseLikelihoods, + RandomLikelihoods, +}; + +//------------------------------------------------------------------------ +// ProfileSynthesis: synthesize, repair, or alter existing profile information +// +class ProfileSynthesis +{ +public: + static void Run(Compiler* compiler, ProfileSynthesisOption option) + { + ProfileSynthesis p(compiler); + p.Run(option); + } + +private: + ProfileSynthesis(Compiler* compiler) : m_comp(compiler), m_loops(nullptr), m_bbNumToBlockMap(nullptr) + { + } + + void Run(ProfileSynthesisOption option); + + void BuildReversePostorder(); + static bool IsDfsAncestor(BasicBlock* x, BasicBlock* y); + bool IsLoopBackEdge(FlowEdge* edge); + bool IsLoopExitEdge(FlowEdge* edge); + + void FindLoops(); + SimpleLoop* GetLoopFromHeader(BasicBlock* block); + + void AssignLikelihoods(); + void AssignLikelihoodNext(BasicBlock* block); + void AssignLikelihoodJump(BasicBlock* block); + void AssignLikelihoodCond(BasicBlock* block); + void AssignLikelihoodSwitch(BasicBlock* block); + void BlendLikelihoods(); + void ClearLikelihoods(); + void ReverseLikelihoods(); + void RandomizeLikelihoods(); + + void ComputeCyclicProbabilities(); + void ComputeCyclicProbabilities(SimpleLoop* loop); + + void AssignInputWeights(); + void ComputeBlockWeights(); + +private: + Compiler* const m_comp; + LoopVector* m_loops; + BasicBlock** m_bbNumToBlockMap; +}; + +#endif // !_FGPROFILESYNTHESIS_H_ diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index be7a58e..a130529 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -592,7 +592,10 @@ CONFIG_INTEGER(JitCrossCheckDevirtualizationAndPGO, W("JitCrossCheckDevirtualiza CONFIG_INTEGER(JitNoteFailedExactDevirtualization, W("JitNoteFailedExactDevirtualization"), 0) CONFIG_INTEGER(JitRandomlyCollect64BitCounts, W("JitRandomlyCollect64BitCounts"), 0) // Collect 64-bit counts randomly // for some methods. -#endif // debug +// 1: profile synthesis for root methods +// 2: profile synthesis for root methods w/o pgo data +CONFIG_INTEGER(JitSynthesizeCounts, W("JitSynthesizeCounts"), 0) +#endif // Devirtualize virtual calls with getExactClasses (NativeAOT only for now) CONFIG_INTEGER(JitEnableExactDevirtualization, W("JitEnableExactDevirtualization"), 1) -- 2.7.4