JIT: revise inlinee scale computations (#51593)
authorAndy Ayers <andya@microsoft.com>
Wed, 21 Apr 2021 03:31:34 +0000 (20:31 -0700)
committerGitHub <noreply@github.com>
Wed, 21 Apr 2021 03:31:34 +0000 (20:31 -0700)
Rework the inlinee profile scale computations so that all scaling happens
during the profile incorporation phase, rather than sometimes deferring
the scaling until inlining. Because of this we no longer need to record
the scale on the inline info.

Toss out profile data if all counts are zero.

Update the edge profile solver to handle a special case where no return block
was executed, but edges within the method had counts. In such cases the entry
block count can end up zero and blocking proper scaling computations. For this
case, try and deduce a plausible count in this case from nearby blocks and edges.

Fix the edge weight computations to tolerate inconsistent data rather than
to assert.

src/coreclr/jit/compiler.h
src/coreclr/jit/fginline.cpp
src/coreclr/jit/fgprofile.cpp
src/coreclr/jit/inline.h

index 3bc79d2..4a2975a 100644 (file)
@@ -5706,7 +5706,7 @@ public:
 
     void WalkSpanningTree(SpanningTreeVisitor* visitor);
     void fgSetProfileWeight(BasicBlock* block, BasicBlock::weight_t weight);
-    void fgComputeProfileScale();
+    void fgApplyProfileScale();
 
     // fgIsUsingProfileWeights - returns true if we have real profile data for this method
     //                           or if we have some fake profile data for the stress mode
index 18e82d2..176b9b5 100644 (file)
@@ -881,8 +881,6 @@ void Compiler::fgInvokeInlineeCompiler(GenTreeCall* call, InlineResult* inlineRe
     inlineInfo.retExprClassHnd        = nullptr;
     inlineInfo.retExprClassHndIsExact = false;
     inlineInfo.inlineResult           = inlineResult;
-    inlineInfo.profileScaleState      = InlineInfo::ProfileScaleState::UNDETERMINED;
-    inlineInfo.profileScaleFactor     = 0.0;
 #ifdef FEATURE_SIMD
     inlineInfo.hasSIMDTypeArgLocalOrReturn = false;
 #endif // FEATURE_SIMD
@@ -1274,10 +1272,6 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo)
     //
     // Set the try and handler index and fix the jump types of inlinee's blocks.
     //
-
-    bool inheritWeight;
-    inheritWeight = true; // The firstBB does inherit the weight from the iciBlock
-
     for (block = InlineeCompiler->fgFirstBB; block != nullptr; block = block->bbNext)
     {
         noway_assert(!block->hasTryIndex());
@@ -1299,48 +1293,20 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo)
 
         if (block->bbJumpKind == BBJ_RETURN)
         {
-            inheritWeight = true; // A return block does inherit the weight from the iciBlock
             noway_assert((block->bbFlags & BBF_HAS_JMP) == 0);
             if (block->bbNext)
             {
+                JITDUMP("\nConvert bbJumpKind of " FMT_BB " to BBJ_ALWAYS to bottomBlock " FMT_BB "\n", block->bbNum,
+                        bottomBlock->bbNum);
                 block->bbJumpKind = BBJ_ALWAYS;
                 block->bbJumpDest = bottomBlock;
-#ifdef DEBUG
-                if (verbose)
-                {
-                    printf("\nConvert bbJumpKind of " FMT_BB " to BBJ_ALWAYS to bottomBlock " FMT_BB "\n", block->bbNum,
-                           bottomBlock->bbNum);
-                }
-#endif // DEBUG
             }
             else
             {
-#ifdef DEBUG
-                if (verbose)
-                {
-                    printf("\nConvert bbJumpKind of " FMT_BB " to BBJ_NONE\n", block->bbNum);
-                }
-#endif // DEBUG
+                JITDUMP("\nConvert bbJumpKind of " FMT_BB " to BBJ_NONE\n", block->bbNum);
                 block->bbJumpKind = BBJ_NONE;
             }
         }
-
-        // Update profile weight for callee blocks, if we didn't do it already.
-        if (pInlineInfo->profileScaleState == InlineInfo::ProfileScaleState::KNOWN)
-        {
-            continue;
-        }
-
-        // If we were unable to compute a scale for some reason, then
-        // try to do something plausible. Entry/exit blocks match call
-        // site, internal blocks scaled by half; all rare blocks left alone.
-        //
-        if (!block->isRunRarely())
-        {
-            block->inheritWeightPercentage(iciBlock, inheritWeight ? 100 : 50);
-        }
-
-        inheritWeight = false;
     }
 
     // Insert inlinee's blocks into inliner's block list.
index 6f71c2e..dcbf7f1 100644 (file)
@@ -48,25 +48,17 @@ bool Compiler::fgHaveProfileData()
 }
 
 //------------------------------------------------------------------------
-// fgComputeProfileScale: determine how much scaling to apply
-//   to raw profile count data.
+// fgApplyProfileScale: scale inlinee counts by appropriate scale factor
 //
-// Notes:
-//   Scaling is only needed for inlinees, and the results of this
-//   computation are recorded in fields of impInlineInfo.
-//
-void Compiler::fgComputeProfileScale()
+void Compiler::fgApplyProfileScale()
 {
     // Only applicable to inlinees
-    assert(compIsForInlining());
-
-    // Have we already determined the scale?
-    if (impInlineInfo->profileScaleState != InlineInfo::ProfileScaleState::UNDETERMINED)
+    //
+    if (!compIsForInlining())
     {
         return;
     }
 
-    // No, not yet -- try and compute the scale.
     JITDUMP("Computing inlinee profile scale:\n");
 
     // Callee has profile data?
@@ -85,18 +77,15 @@ void Compiler::fgComputeProfileScale()
     //
     // Note when/if we early do normalization this may need to change.
     //
-    BasicBlock::weight_t const calleeWeight = fgFirstBB->bbWeight;
+    BasicBlock::weight_t calleeWeight = fgFirstBB->bbWeight;
 
     // Callee entry weight is nonzero?
-    //
-    // If callee entry profile count is zero, perhaps we should discard
-    // the profile data.
+    // If so, just choose the smallest plausible weight.
     //
     if (calleeWeight == BB_ZERO_WEIGHT)
     {
-        JITDUMP("   ... callee entry count is zero\n");
-        impInlineInfo->profileScaleState = InlineInfo::ProfileScaleState::UNAVAILABLE;
-        return;
+        calleeWeight = fgHaveProfileData() ? 1.0f : BB_UNITY_WEIGHT;
+        JITDUMP("   ... callee entry has weight zero, will use weight of " FMT_WT " to scale\n", calleeWeight);
     }
 
     // Call site has profile weight?
@@ -136,12 +125,16 @@ void Compiler::fgComputeProfileScale()
     //
     // Hence, scale can be somewhat arbitrary...
     //
-    const double scale                = ((double)callSiteWeight) / calleeWeight;
-    impInlineInfo->profileScaleFactor = scale;
-    impInlineInfo->profileScaleState  = InlineInfo::ProfileScaleState::KNOWN;
+    const BasicBlock::weight_t scale = callSiteWeight / calleeWeight;
 
     JITDUMP("   call site count " FMT_WT " callee entry count " FMT_WT " scale " FMT_WT "\n", callSiteWeight,
             calleeWeight, scale);
+    JITDUMP("Scaling inlinee blocks\n");
+
+    for (BasicBlock* bb = fgFirstBB; bb != nullptr; bb = bb->bbNext)
+    {
+        bb->scaleBBWeight(scale);
+    }
 }
 
 //------------------------------------------------------------------------
@@ -1689,12 +1682,7 @@ PhaseStatus Compiler::fgIncorporateProfileData()
     {
         JITDUMP("JitStress -- incorporating random profile data\n");
         fgIncorporateBlockCounts();
-
-        if (compIsForInlining())
-        {
-            fgComputeProfileScale();
-        }
-
+        fgApplyProfileScale();
         return PhaseStatus::MODIFIED_EVERYTHING;
     }
 
@@ -1702,6 +1690,8 @@ PhaseStatus Compiler::fgIncorporateProfileData()
     //
     if (!fgHaveProfileData())
     {
+        // No...
+        //
         if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_BBOPT))
         {
             JITDUMP("BBOPT set, but no profile data available (hr=%08x)\n", fgPgoQueryResult);
@@ -1711,12 +1701,11 @@ PhaseStatus Compiler::fgIncorporateProfileData()
             JITDUMP("BBOPT not set\n");
         }
 
-        if (compIsForInlining())
-        {
-            fgComputeProfileScale();
-        }
+        // Scale the "synthetic" block weights.
+        //
+        fgApplyProfileScale();
 
-        return PhaseStatus::MODIFIED_NOTHING;
+        return compIsForInlining() ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING;
     }
 
     // Summarize profile data
@@ -1777,13 +1766,9 @@ PhaseStatus Compiler::fgIncorporateProfileData()
         fgIncorporateEdgeCounts();
     }
 
-    // Now that we have profile data, compute the profile scale for inlinees,
-    // if we haven'd done so already.
+    // Scale data as appropriate
     //
-    if (compIsForInlining())
-    {
-        fgComputeProfileScale();
-    }
+    fgApplyProfileScale();
 
     return PhaseStatus::MODIFIED_EVERYTHING;
 }
@@ -1801,17 +1786,6 @@ PhaseStatus Compiler::fgIncorporateProfileData()
 //
 void Compiler::fgSetProfileWeight(BasicBlock* block, BasicBlock::weight_t profileWeight)
 {
-    // Scale count appropriately for inlinees.
-    //
-    if (compIsForInlining())
-    {
-        if (impInlineInfo->profileScaleState == InlineInfo::ProfileScaleState::KNOWN)
-        {
-            double scaledWeight = impInlineInfo->profileScaleFactor * profileWeight;
-            profileWeight       = (BasicBlock::weight_t)scaledWeight;
-        }
-    }
-
     block->setBBProfileWeight(profileWeight);
 
 #if HANDLER_ENTRY_MUST_BE_IN_HOT_SECTION
@@ -1830,8 +1804,6 @@ void Compiler::fgSetProfileWeight(BasicBlock* block, BasicBlock::weight_t profil
 //   and set block weights
 //
 // Notes:
-//   Count data for inlinees is scaled (usually down).
-//
 //   Since we are now running before the importer, we do not know which
 //   blocks will be imported, and we should not see any internal blocks.
 //
@@ -2039,6 +2011,7 @@ private:
     bool m_mismatch;
     bool m_negativeCount;
     bool m_failedToConverge;
+    bool m_allWeightsZero;
 
 public:
     EfficientEdgeCountReconstructor(Compiler* comp)
@@ -2056,6 +2029,7 @@ public:
         , m_mismatch(false)
         , m_negativeCount(false)
         , m_failedToConverge(false)
+        , m_allWeightsZero(true)
     {
     }
 
@@ -2186,49 +2160,49 @@ void EfficientEdgeCountReconstructor::Prepare()
                 // Optimization TODO: if profileCount is zero, we can just ignore this edge
                 // and the right things will happen.
                 //
-                uint32_t const profileCount = *(uint32_t*)(m_comp->fgPgoData + schemaEntry.Offset);
-                {
-                    BasicBlock::weight_t const weight = (BasicBlock::weight_t)profileCount;
+                uint32_t const             profileCount = *(uint32_t*)(m_comp->fgPgoData + schemaEntry.Offset);
+                BasicBlock::weight_t const weight       = (BasicBlock::weight_t)profileCount;
 
-                    // Find the blocks.
-                    //
-                    BasicBlock* sourceBlock = nullptr;
+                m_allWeightsZero &= (profileCount == 0);
 
-                    if (!m_keyToBlockMap.Lookup(schemaEntry.ILOffset, &sourceBlock))
-                    {
-                        JITDUMP("Could not find source block for schema entry %d (IL offset/key %08x\n", iSchema,
-                                schemaEntry.ILOffset);
-                    }
+                // Find the blocks.
+                //
+                BasicBlock* sourceBlock = nullptr;
 
-                    BasicBlock* targetBlock = nullptr;
+                if (!m_keyToBlockMap.Lookup(schemaEntry.ILOffset, &sourceBlock))
+                {
+                    JITDUMP("Could not find source block for schema entry %d (IL offset/key %08x\n", iSchema,
+                            schemaEntry.ILOffset);
+                }
 
-                    if (!m_keyToBlockMap.Lookup(schemaEntry.Other, &targetBlock))
-                    {
-                        JITDUMP("Could not find target block for schema entry %d (IL offset/key %08x\n", iSchema,
-                                schemaEntry.ILOffset);
-                    }
+                BasicBlock* targetBlock = nullptr;
 
-                    if ((sourceBlock == nullptr) || (targetBlock == nullptr))
-                    {
-                        // Looks like there is skew between schema and graph.
-                        //
-                        Mismatch();
-                        continue;
-                    }
+                if (!m_keyToBlockMap.Lookup(schemaEntry.Other, &targetBlock))
+                {
+                    JITDUMP("Could not find target block for schema entry %d (IL offset/key %08x\n", iSchema,
+                            schemaEntry.ILOffset);
+                }
 
-                    Edge* const edge = new (m_allocator) Edge(sourceBlock, targetBlock);
+                if ((sourceBlock == nullptr) || (targetBlock == nullptr))
+                {
+                    // Looks like there is skew between schema and graph.
+                    //
+                    Mismatch();
+                    continue;
+                }
 
-                    JITDUMP("... adding known edge " FMT_BB " -> " FMT_BB ": weight " FMT_WT "\n",
-                            edge->m_sourceBlock->bbNum, edge->m_targetBlock->bbNum, weight);
+                Edge* const edge = new (m_allocator) Edge(sourceBlock, targetBlock);
 
-                    edge->m_weightKnown = true;
-                    edge->m_weight      = weight;
+                JITDUMP("... adding known edge " FMT_BB " -> " FMT_BB ": weight " FMT_WT "\n",
+                        edge->m_sourceBlock->bbNum, edge->m_targetBlock->bbNum, weight);
 
-                    EdgeKey edgeKey(schemaEntry.ILOffset, schemaEntry.Other);
-                    m_edgeKeyToEdgeMap.Set(edgeKey, edge);
+                edge->m_weightKnown = true;
+                edge->m_weight      = weight;
 
-                    m_edges++;
-                }
+                EdgeKey edgeKey(schemaEntry.ILOffset, schemaEntry.Other);
+                m_edgeKeyToEdgeMap.Set(edgeKey, edge);
+
+                m_edges++;
             }
             break;
 
@@ -2245,9 +2219,10 @@ void EfficientEdgeCountReconstructor::Solve()
 {
     // If issues arose earlier, then don't try solving.
     //
-    if (m_badcode || m_mismatch)
+    if (m_badcode || m_mismatch || m_allWeightsZero)
     {
-        JITDUMP("... not solving because of the %s\n", m_badcode ? "badcode" : "mismatch")
+        JITDUMP("... not solving because of the %s\n",
+                m_badcode ? "badcode" : m_allWeightsZero ? "zero counts" : "mismatch");
         return;
     }
 
@@ -2432,24 +2407,66 @@ void EfficientEdgeCountReconstructor::Solve()
         }
     }
 
-    if (m_unknownBlocks == 0)
-    {
-        JITDUMP("\nSolver: converged in %u passes\n", nPasses);
-    }
-    else
+    if (m_unknownBlocks != 0)
     {
         JITDUMP("\nSolver: failed to converge in %u passes, %u blocks and %u edges remain unsolved\n", nPasses,
                 m_unknownBlocks, m_unknownEdges);
         FailedToConverge();
+        return;
+    }
+
+    JITDUMP("\nSolver: converged in %u passes\n", nPasses);
+
+    // If, after solving, the entry weight ends up as zero, set it to
+    // the max of the weight of successor edges or join-free successor
+    // block weight. We do this so we can determine a plausible scale
+    // count.
+    //
+    // This can happen for methods that do not return (say they always
+    // throw, or had not yet returned when we snapped the counts).
+    //
+    // Note we know there are nonzero counts elsewhere in the method, otherwise
+    // m_allWeightsZero would be true and we would have bailed out above.
+    //
+    BlockInfo* const firstInfo = BlockToInfo(m_comp->fgFirstBB);
+    if (firstInfo->m_weight == BB_ZERO_WEIGHT)
+    {
+        assert(!m_allWeightsZero);
+
+        BasicBlock::weight_t newWeight = BB_ZERO_WEIGHT;
+
+        for (Edge* edge = firstInfo->m_outgoingEdges; edge != nullptr; edge = edge->m_nextOutgoingEdge)
+        {
+            if (edge->m_weightKnown)
+            {
+                newWeight = max(newWeight, edge->m_weight);
+            }
+
+            BlockInfo* const targetBlockInfo  = BlockToInfo(edge->m_targetBlock);
+            Edge* const      targetBlockEdges = targetBlockInfo->m_incomingEdges;
+
+            if (targetBlockInfo->m_weightKnown && (targetBlockEdges->m_nextIncomingEdge == nullptr))
+            {
+                newWeight = max(newWeight, targetBlockInfo->m_weight);
+            }
+        }
+
+        if (newWeight == BB_ZERO_WEIGHT)
+        {
+            JITDUMP("Entry block weight and neighborhood was zero\n");
+        }
+        else
+        {
+            JITDUMP("Entry block weight was zero, setting entry weight to neighborhood max " FMT_WT "\n", newWeight);
+        }
+
+        firstInfo->m_weight = newWeight;
     }
 }
 
 //------------------------------------------------------------------------
 // EfficientEdgeCountReconstructor::Propagate: actually set block weights.
 //
-// Notes:
-//    For inlinees, weights are scaled appropriately.
-//
 void EfficientEdgeCountReconstructor::Propagate()
 {
     // We don't expect mismatches or convergence failures.
@@ -2462,32 +2479,21 @@ void EfficientEdgeCountReconstructor::Propagate()
 
     // If any issues arose during reconstruction, don't set weights.
     //
-    if (m_badcode || m_mismatch || m_failedToConverge)
+    if (m_badcode || m_mismatch || m_failedToConverge || m_allWeightsZero)
     {
         JITDUMP("... discarding profile data because of %s\n",
-                m_badcode ? "badcode" : m_mismatch ? "mismatch" : "failed to converge");
+                m_badcode ? "badcode" : m_mismatch ? "mismatch" : m_allWeightsZero ? "zero counts"
+                                                                                   : "failed to converge");
 
         // Make sure nothing else in the jit looks at the profile data.
         //
         m_comp->fgPgoSchema     = nullptr;
-        m_comp->fgPgoFailReason = "PGO data available, but there was a reconstruction error";
+        m_comp->fgPgoFailReason = "PGO data available, but there was a reconstruction problem";
 
         return;
     }
 
-    if (m_comp->compIsForInlining())
-    {
-        // Tentatively set first block's profile to compute inlinee profile scale.
-        //
-        BlockInfo* const info = BlockToInfo(m_comp->fgFirstBB);
-        assert(info->m_weightKnown);
-
-        m_comp->fgSetProfileWeight(m_comp->fgFirstBB, info->m_weight);
-        m_comp->fgComputeProfileScale();
-    }
-
-    // Set weight on all blocks (will reset entry weight for inlinees based
-    // on above computed scale).
+    // Set weight on all blocks.
     //
     for (BasicBlock* block = m_comp->fgFirstBB; (block != nullptr); block = block->bbNext)
     {
@@ -2558,7 +2564,7 @@ bool flowList::setEdgeWeightMinChecked(BasicBlock::weight_t newWeight,
             {
                 result = true;
 
-                if (flEdgeWeightMax != 0)
+                if (flEdgeWeightMax != BB_ZERO_WEIGHT)
                 {
                     // We will raise flEdgeWeightMin and Max towards newWeight
                     flEdgeWeightMin = flEdgeWeightMax;
@@ -2579,10 +2585,11 @@ bool flowList::setEdgeWeightMinChecked(BasicBlock::weight_t newWeight,
             {
                 result = true;
 
-                assert(flEdgeWeightMax != 0);
-
-                // We will lower flEdgeWeightMin towards newWeight
-                flEdgeWeightMin = newWeight;
+                if (flEdgeWeightMax != BB_ZERO_WEIGHT)
+                {
+                    // We will lower flEdgeWeightMin towards newWeight
+                    flEdgeWeightMin = newWeight;
+                }
 
                 if (wbUsedSlop != nullptr)
                 {
@@ -2657,7 +2664,7 @@ bool flowList::setEdgeWeightMaxChecked(BasicBlock::weight_t newWeight,
             {
                 result = true;
 
-                if (flEdgeWeightMax != 0)
+                if (flEdgeWeightMax != BB_ZERO_WEIGHT)
                 {
                     // We will allow this to raise flEdgeWeightMax towards newWeight
                     flEdgeWeightMax = newWeight;
@@ -2677,11 +2684,12 @@ bool flowList::setEdgeWeightMaxChecked(BasicBlock::weight_t newWeight,
             {
                 result = true;
 
-                assert(flEdgeWeightMax != 0);
-
-                // We will allow this to lower flEdgeWeightMin and Max towards newWeight
-                flEdgeWeightMax = flEdgeWeightMin;
-                flEdgeWeightMin = newWeight;
+                if (flEdgeWeightMax != BB_ZERO_WEIGHT)
+                {
+                    // We will allow this to lower flEdgeWeightMin and Max towards newWeight
+                    flEdgeWeightMax = flEdgeWeightMin;
+                    flEdgeWeightMin = newWeight;
+                }
 
                 if (wbUsedSlop != nullptr)
                 {
index 82da668..3ed9a5d 100644 (file)
@@ -630,17 +630,6 @@ struct InlineInfo
     GenTreeCall* iciCall;  // The GT_CALL node to be inlined.
     Statement*   iciStmt;  // The statement iciCall is in.
     BasicBlock*  iciBlock; // The basic block iciStmt is in.
-
-    // Profile support
-    enum class ProfileScaleState
-    {
-        UNDETERMINED,
-        KNOWN,
-        UNAVAILABLE
-    };
-
-    ProfileScaleState profileScaleState;
-    double            profileScaleFactor;
 };
 
 // InlineContext tracks the inline history in a method.