JIT: make profile data available to inlinees (#42277)
authorAndy Ayers <andya@microsoft.com>
Wed, 16 Sep 2020 18:14:37 +0000 (11:14 -0700)
committerGitHub <noreply@github.com>
Wed, 16 Sep 2020 18:14:37 +0000 (11:14 -0700)
Update the jit to try and read profile data for inlinees, and if successful,
scale it appropriately for the inline call site. This kicks in for crossgen
BBOPT and TieredPGO Tier1.

Update VM and Crossgen hosts to handle requests for inlinee profile counts.
Crossgen2 does not seem to support profile data retrieval yet.

Note crossgen experience may not be as good as one might expect, because
crossgen BBINSTR loses counts for inlinees. But enabling this for crossgen
even with this limitation is probably a win overall.

Fix small issue in the jit where we were overly aggressive about merging the
callee block's flags into the callsite block's flags.

src/coreclr/src/jit/compiler.cpp
src/coreclr/src/jit/compiler.h
src/coreclr/src/jit/flowgraph.cpp
src/coreclr/src/jit/inline.h
src/coreclr/src/vm/jitinterface.cpp
src/coreclr/src/zap/zapinfo.cpp

index f0f2042..561b8f7 100644 (file)
@@ -2485,7 +2485,6 @@ void Compiler::compInitOptions(JitFlags* jitFlags)
     {
         // The following flags are lost when inlining. (They are removed in
         // Compiler::fgInvokeInlineeCompiler().)
-        assert(!jitFlags->IsSet(JitFlags::JIT_FLAG_BBOPT));
         assert(!jitFlags->IsSet(JitFlags::JIT_FLAG_BBINSTR));
         assert(!jitFlags->IsSet(JitFlags::JIT_FLAG_PROF_ENTERLEAVE));
         assert(!jitFlags->IsSet(JitFlags::JIT_FLAG_DEBUG_EnC));
@@ -2740,6 +2739,49 @@ void Compiler::compInitOptions(JitFlags* jitFlags)
     opts.compFastTailCalls = true;
 #endif // FEATURE_FASTTAILCALL
 
+    // Profile data
+    //
+    fgBlockCounts                = nullptr;
+    fgProfileData_ILSizeMismatch = false;
+    fgNumProfileRuns             = 0;
+    if (jitFlags->IsSet(JitFlags::JIT_FLAG_BBOPT))
+    {
+        HRESULT hr;
+        hr = info.compCompHnd->getMethodBlockCounts(info.compMethodHnd, &fgBlockCountsCount, &fgBlockCounts,
+                                                    &fgNumProfileRuns);
+
+        JITDUMP("BBOPT set -- VM query for profile data for %s returned: hr=%0x; counts at %p, %d blocks, %d runs\n",
+                info.compFullName, hr, fgBlockCounts, fgBlockCountsCount, fgNumProfileRuns);
+
+        // a failed result that also has a non-NULL fgBlockCounts
+        // indicates that the ILSize for the method no longer matches
+        // the ILSize for the method when profile data was collected.
+        //
+        // We will discard the IBC data in this case
+        //
+        if (FAILED(hr) && (fgBlockCounts != nullptr))
+        {
+            fgProfileData_ILSizeMismatch = true;
+            fgBlockCounts                = nullptr;
+        }
+#ifdef DEBUG
+        // A successful result implies a non-NULL fgBlockCounts
+        //
+        if (SUCCEEDED(hr))
+        {
+            assert(fgBlockCounts != nullptr);
+        }
+
+        // A failed result implies a NULL fgBlockCounts
+        //   see implementation of Compiler::fgHaveProfileData()
+        //
+        if (FAILED(hr))
+        {
+            assert(fgBlockCounts == nullptr);
+        }
+#endif
+    }
+
     if (compIsForInlining())
     {
         return;
@@ -3147,45 +3189,6 @@ void Compiler::compInitOptions(JitFlags* jitFlags)
 #endif
     }
 
-    fgBlockCounts                = nullptr;
-    fgProfileData_ILSizeMismatch = false;
-    fgNumProfileRuns             = 0;
-    if (jitFlags->IsSet(JitFlags::JIT_FLAG_BBOPT))
-    {
-        assert(!compIsForInlining());
-        HRESULT hr;
-        hr = info.compCompHnd->getMethodBlockCounts(info.compMethodHnd, &fgBlockCountsCount, &fgBlockCounts,
-                                                    &fgNumProfileRuns);
-
-        // a failed result that also has a non-NULL fgBlockCounts
-        // indicates that the ILSize for the method no longer matches
-        // the ILSize for the method when profile data was collected.
-        //
-        // We will discard the IBC data in this case
-        //
-        if (FAILED(hr) && (fgBlockCounts != nullptr))
-        {
-            fgProfileData_ILSizeMismatch = true;
-            fgBlockCounts                = nullptr;
-        }
-#ifdef DEBUG
-        // A successful result implies a non-NULL fgBlockCounts
-        //
-        if (SUCCEEDED(hr))
-        {
-            assert(fgBlockCounts != nullptr);
-        }
-
-        // A failed result implies a NULL fgBlockCounts
-        //   see implementation of Compiler::fgHaveProfileData()
-        //
-        if (FAILED(hr))
-        {
-            assert(fgBlockCounts == nullptr);
-        }
-#endif
-    }
-
 #ifdef DEBUG
     // Now, set compMaxUncheckedOffsetForNullObject for STRESS_NULL_OBJECT_CHECK
     if (compStressCompile(STRESS_NULL_OBJECT_CHECK, 30))
index c3a01fe..c56a40b 100644 (file)
@@ -5384,6 +5384,7 @@ protected:
     }
 
     bool fgHaveProfileData();
+    void fgComputeProfileScale();
     bool fgGetProfileWeightForBasicBlock(IL_OFFSET offset, unsigned* weight);
     void fgInstrumentMethod();
 
index a52ba8d..80821fc 100644 (file)
@@ -176,12 +176,41 @@ void Compiler::fgInit()
     fgPreviousCandidateSIMDFieldAsgStmt = nullptr;
 #endif
 
-    fgHasSwitch = false;
+    fgHasSwitch   = false;
+    fgBlockCounts = nullptr;
 }
 
+//------------------------------------------------------------------------
+// fgHaveProfileData: check if profile data is available
+//
+// Returns:
+//   true if so
+//
+// Note:
+//   This now returns true for inlinees. We might consider preserving the
+//   old behavior for crossgen, since crossgen BBINSTRs still do inlining
+//   and don't instrument the inlinees.
+//
+//   Thus if BBINSTR and BBOPT do the same inlines (which can happen)
+//   profile data for an inlinee (if available) will not fully reflect
+//   the behavior of the inlinee when called from this method.
+//
+//   If this inlinee was not inlined by the BBINSTR run then the
+//   profile data for the inlinee will reflect this method's influence.
+//
+//   * for ALWAYS_INLINE and FORCE_INLINE cases it is unlikely we'll find
+//     any profile data, as BBINSTR and BBOPT callers will both inline;
+//     only indirect callers will invoke the instrumented version to run.
+//   * for DISCRETIONARY_INLINE cases we may or may not find relevant
+//     data, depending, but chances are the data is relevant.
+//
+//  TieredPGO data comes from Tier0 methods, which currently do not do
+//  any inlining; thus inlinee profile data should be available and
+//  representative.
+//
 bool Compiler::fgHaveProfileData()
 {
-    if (compIsForInlining() || compIsForImportOnly())
+    if (compIsForImportOnly())
     {
         return false;
     }
@@ -189,6 +218,111 @@ bool Compiler::fgHaveProfileData()
     return (fgBlockCounts != nullptr);
 }
 
+//------------------------------------------------------------------------
+// fgComputeProfileScale: determine how much scaling to apply
+//   to raw profile count data.
+//
+// Notes:
+//   Scaling is only needed for inlinees, and the results of this
+//   computation are recorded in fields of impInlineInfo.
+//
+void Compiler::fgComputeProfileScale()
+{
+    // Only applicable to inlinees
+    assert(compIsForInlining());
+
+    // Have we already determined the scale?
+    if (impInlineInfo->profileScaleState != InlineInfo::ProfileScaleState::UNDETERMINED)
+    {
+        return;
+    }
+
+    // No, not yet -- try and compute the scale.
+    JITDUMP("Computing inlinee profile scale:\n");
+
+    // Call site has profile weight?
+    //
+    // Todo: handle case of unprofiled caller invoking profiled callee.
+    //
+    const BasicBlock* callSiteBlock = impInlineInfo->iciBlock;
+    if (!callSiteBlock->hasProfileWeight())
+    {
+        JITDUMP("   ... call site not profiled\n");
+        impInlineInfo->profileScaleState = InlineInfo::ProfileScaleState::UNAVAILABLE;
+        return;
+    }
+
+    const BasicBlock::weight_t callSiteWeight = callSiteBlock->bbWeight;
+
+    // Call site has zero count?
+    //
+    // Todo: perhaps retain some semblance of callee profile data,
+    // possibly scaled down severely.
+    //
+    if (callSiteWeight == 0)
+    {
+        JITDUMP("   ... zero call site count\n");
+        impInlineInfo->profileScaleState = InlineInfo::ProfileScaleState::UNAVAILABLE;
+        return;
+    }
+
+    // Callee has profile data?
+    //
+    if (!fgHaveProfileData())
+    {
+        JITDUMP("   ... no callee profile data\n");
+        impInlineInfo->profileScaleState = InlineInfo::ProfileScaleState::UNAVAILABLE;
+        return;
+    }
+
+    // Find callee's unscaled entry weight.
+    //
+    // Ostensibly this should be fgCalledCount for the callee, but that's not available
+    // as it requires some analysis.
+    //
+    // For most callees it will be the same as the entry block count.
+    //
+    BasicBlock::weight_t calleeWeight = 0;
+
+    if (!fgGetProfileWeightForBasicBlock(0, &calleeWeight))
+    {
+        JITDUMP("   ... no callee profile data for entry block\n");
+        impInlineInfo->profileScaleState = InlineInfo::ProfileScaleState::UNAVAILABLE;
+        return;
+    }
+
+    // We should generally be able to assume calleeWeight >= callSiteWeight.
+    // If this isn't so, perhaps something is wrong with the profile data
+    // collection or retrieval.
+    //
+    // For now, ignore callee data if we'd need to upscale.
+    //
+    if (calleeWeight < callSiteWeight)
+    {
+        JITDUMP("   ... callee entry count %d is less than call site count %d\n", calleeWeight, callSiteWeight);
+        impInlineInfo->profileScaleState = InlineInfo::ProfileScaleState::UNAVAILABLE;
+        return;
+    }
+
+    // Hence, scale is always in the range (0.0...1.0] -- we are always scaling down callee counts.
+    //
+    const double scale                = ((double)callSiteWeight) / calleeWeight;
+    impInlineInfo->profileScaleFactor = scale;
+    impInlineInfo->profileScaleState  = InlineInfo::ProfileScaleState::KNOWN;
+
+    JITDUMP("   call site count %u callee entry count %u scale %f\n", callSiteWeight, calleeWeight, scale);
+}
+
+//------------------------------------------------------------------------
+// fgGetProfileWeightForBasicBlock: obtain profile data for a block
+//
+// Arguments:
+//   offset       - IL offset of the block
+//   weightWB     - [OUT] weight obtained
+//
+// Returns:
+//   true if data was found
+//
 bool Compiler::fgGetProfileWeightForBasicBlock(IL_OFFSET offset, unsigned* weightWB)
 {
     noway_assert(weightWB != nullptr);
@@ -229,19 +363,16 @@ bool Compiler::fgGetProfileWeightForBasicBlock(IL_OFFSET offset, unsigned* weigh
     }
 #endif // DEBUG
 
-    if (fgHaveProfileData() == false)
+    if (!fgHaveProfileData())
     {
         return false;
     }
 
-    noway_assert(!compIsForInlining());
     for (UINT32 i = 0; i < fgBlockCountsCount; i++)
     {
         if (fgBlockCounts[i].ILOffset == offset)
         {
-            weight = fgBlockCounts[i].ExecutionCount;
-
-            *weightWB = weight;
+            *weightWB = fgBlockCounts[i].ExecutionCount;
             return true;
         }
     }
@@ -5621,6 +5752,11 @@ unsigned Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, F
         }
     }
 
+    if (compIsForInlining())
+    {
+        fgComputeProfileScale();
+    }
+
     do
     {
         unsigned   jmpAddr = DUMMY_INIT(BAD_IL_OFFSET);
@@ -6043,9 +6179,19 @@ unsigned Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, F
         curBBdesc->bbCodeOffsEnd = nxtBBoffs;
 
         unsigned profileWeight;
+
         if (fgGetProfileWeightForBasicBlock(curBBoffs, &profileWeight))
         {
+            if (compIsForInlining())
+            {
+                if (impInlineInfo->profileScaleState == InlineInfo::ProfileScaleState::KNOWN)
+                {
+                    profileWeight = (unsigned)(impInlineInfo->profileScaleFactor * profileWeight);
+                }
+            }
+
             curBBdesc->setBBProfileWeight(profileWeight);
+
             if (profileWeight == 0)
             {
                 curBBdesc->bbSetRunRarely();
@@ -7182,7 +7328,7 @@ unsigned Compiler::fgGetNestingLevel(BasicBlock* block, unsigned* pFinallyNestin
 }
 
 //------------------------------------------------------------------------
-// fgImport: read the IL forf the method and create jit IR
+// fgImport: read the IL for the method and create jit IR
 //
 // Returns:
 //    phase status
@@ -23229,6 +23375,8 @@ void Compiler::fgInvokeInlineeCompiler(GenTreeCall* call, InlineResult* inlineRe
     inlineInfo.retExprClassHnd        = nullptr;
     inlineInfo.retExprClassHndIsExact = false;
     inlineInfo.inlineResult           = inlineResult;
+    inlineInfo.profileScaleState      = InlineInfo::ProfileScaleState::UNDETERMINED;
+    inlineInfo.profileScaleFactor     = 0.0;
 #ifdef FEATURE_SIMD
     inlineInfo.hasSIMDTypeArgLocalOrReturn = false;
 #endif // FEATURE_SIMD
@@ -23302,7 +23450,6 @@ void Compiler::fgInvokeInlineeCompiler(GenTreeCall* call, InlineResult* inlineRe
 
                 // The following flags are lost when inlining.
                 // (This is checked in Compiler::compInitOptions().)
-                compileFlagsForInlinee.Clear(JitFlags::JIT_FLAG_BBOPT);
                 compileFlagsForInlinee.Clear(JitFlags::JIT_FLAG_BBINSTR);
                 compileFlagsForInlinee.Clear(JitFlags::JIT_FLAG_PROF_ENTERLEAVE);
                 compileFlagsForInlinee.Clear(JitFlags::JIT_FLAG_DEBUG_EnC);
@@ -23508,7 +23655,9 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo)
             const unsigned __int64 inlineeBlockFlags = InlineeCompiler->fgFirstBB->bbFlags;
             noway_assert((inlineeBlockFlags & BBF_HAS_JMP) == 0);
             noway_assert((inlineeBlockFlags & BBF_KEEP_BBJ_ALWAYS) == 0);
-            iciBlock->bbFlags |= inlineeBlockFlags;
+
+            // Todo: we may want to exclude other flags here.
+            iciBlock->bbFlags |= (inlineeBlockFlags & ~BBF_RUN_RARELY);
 
 #ifdef DEBUG
             if (verbose)
@@ -23667,6 +23816,13 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo)
                 block->bbJumpKind = BBJ_NONE;
             }
         }
+
+        // Update profile weight for callee blocks, if we didn't do it already.
+        if (pInlineInfo->profileScaleState == InlineInfo::ProfileScaleState::KNOWN)
+        {
+            continue;
+        }
+
         if (inheritWeight)
         {
             block->inheritWeight(iciBlock);
index c8b5880..8302356 100644 (file)
@@ -614,6 +614,17 @@ struct InlineInfo
     GenTreeCall* iciCall;  // The GT_CALL node to be inlined.
     Statement*   iciStmt;  // The statement iciCall is in.
     BasicBlock*  iciBlock; // The basic block iciStmt is in.
+
+    // Profile support
+    enum class ProfileScaleState
+    {
+        UNDETERMINED,
+        KNOWN,
+        UNAVAILABLE
+    };
+
+    ProfileScaleState profileScaleState;
+    double            profileScaleFactor;
 };
 
 // InlineContext tracks the inline history in a method.
index 8fc0af1..dde3fd9 100644 (file)
@@ -11900,32 +11900,23 @@ HRESULT CEEJitInfo::getMethodBlockCounts (
 
 #ifdef FEATURE_PGO
 
-    // For now, only return the info for the method being jitted.
-    // Will need to fix this to gain access to pgo data for inlinees.
     MethodDesc* pMD = (MethodDesc*)ftnHnd;
-
-    if (pMD == m_pMethodBeingCompiled)
+    unsigned codeSize = 0;
+    if (pMD->IsDynamicMethod())
     {
-        unsigned codeSize = 0;
-        if (pMD->IsDynamicMethod())
-        {
-            unsigned stackSize, ehSize;
-            CorInfoOptions options;
-            DynamicResolver * pResolver = m_pMethodBeingCompiled->AsDynamicMethodDesc()->GetResolver();
-            pResolver->GetCodeInfo(&codeSize, &stackSize, &options, &ehSize);
-        }
-        else
-        {
-            codeSize = m_ILHeader->GetCodeSize();
-        }
-
-        hr = PgoManager::getMethodBlockCounts(pMD, codeSize, pCount, pBlockCounts, pNumRuns);
+        unsigned stackSize, ehSize;
+        CorInfoOptions options;
+        DynamicResolver * pResolver = m_pMethodBeingCompiled->AsDynamicMethodDesc()->GetResolver();
+        pResolver->GetCodeInfo(&codeSize, &stackSize, &options, &ehSize);
     }
-    else
+    else if (pMD->HasILHeader())
     {
-        hr = E_NOTIMPL;
+        COR_ILMETHOD_DECODER decoder(pMD->GetILHeader());
+        codeSize = decoder.GetCodeSize();
     }
 
+    hr = PgoManager::getMethodBlockCounts(pMD, codeSize, pCount, pBlockCounts, pNumRuns);
+
 #else
     _ASSERTE(!"getMethodBlockCounts not implemented on CEEJitInfo!");
     hr = E_NOTIMPL;
index 90b047b..5fad49e 100644 (file)
@@ -976,7 +976,6 @@ HRESULT ZapInfo::getMethodBlockCounts (
 {
     _ASSERTE(pBlockCounts != nullptr);
     _ASSERTE(pCount != nullptr);
-    _ASSERTE(ftnHnd == m_currentMethodHandle);
 
     HRESULT hr;
 
@@ -1004,23 +1003,11 @@ HRESULT ZapInfo::getMethodBlockCounts (
         return E_FAIL;
     }
 
-    mdMethodDef md = m_currentMethodToken;
+    mdMethodDef md;
+    IfFailRet(m_zapper->m_pEECompileInfo->GetMethodDef(ftnHnd, &md));
 
     if (IsNilToken(md))
     {
-        // This must be the non-System.Object instantiation of a generic type/method.
-        IfFailRet(m_zapper->m_pEECompileInfo->GetMethodDef(ftnHnd, &md));
-    }
-#ifdef _DEBUG
-    else
-    {
-        mdMethodDef mdTemp;
-        IfFailRet(m_zapper->m_pEECompileInfo->GetMethodDef(ftnHnd, &mdTemp));
-        _ASSERTE(md == mdTemp);
-    }
-#endif
-    if (IsNilToken(md))
-    {
         return E_FAIL;
     }
 
@@ -1064,9 +1051,23 @@ HRESULT ZapInfo::getMethodBlockCounts (
     *pBlockCounts = (ICorJitInfo::BlockCounts *) &profileData->method.block[0];
     *pCount  = profileData->method.cBlock;
 
+    // Find method's IL size
+    //
+    unsigned ilSize = m_currentMethodInfo.ILCodeSize;
+
+    if (ftnHnd != m_currentMethodHandle)
+    {
+        CORINFO_METHOD_INFO methodInfo;
+        if (!getMethodInfo(ftnHnd, &methodInfo))
+        {
+            return E_FAIL;
+        }
+        ilSize = methodInfo.ILCodeSize;
+    }
+
     // If the ILSize is non-zero the the ILCodeSize also must match
     //
-    if ((profileData->method.ILSize != 0) && (profileData->method.ILSize != m_currentMethodInfo.ILCodeSize))
+    if ((profileData->method.ILSize != 0) && (profileData->method.ILSize != ilSize))
     {
         // IL code for this method does not match the IL code for the method when it was profiled
         // in such cases we tell the JIT to discard the profile data by returning E_FAIL