Initial version of class profiling for PGO (#45133)
authorAndy Ayers <andya@microsoft.com>
Wed, 25 Nov 2020 19:22:54 +0000 (11:22 -0800)
committerGitHub <noreply@github.com>
Wed, 25 Nov 2020 19:22:54 +0000 (11:22 -0800)
* Initial version of class profiling for PGO

Add support to the jit and runtime so that PGO can determine the distribution of
classes at virtual and indirect call sites.

Use this information when jitting to enable guarded devirtualization, if there
is a suitably likely class to guess for.

Enable by setting:
```
COMPlus_TieredCompilation=1
COMPlus_TieredPGO=1
COMPlus_JitClassProfiling=1
COMPlus_JitEnableGuardedDevirtualization=1
```
impact can be enhanced by also setting
```
COMPlus_TC_QuickJitForLoops=1
```
to allow more methods to pass through Tier0.

36 files changed:
eng/pipelines/common/templates/runtimes/run-test-job.yml
src/coreclr/src/ToolBox/superpmi/superpmi-shared/icorjitinfoimpl.h
src/coreclr/src/ToolBox/superpmi/superpmi-shared/lwmlist.h
src/coreclr/src/ToolBox/superpmi/superpmi-shared/methodcontext.cpp
src/coreclr/src/ToolBox/superpmi/superpmi-shared/methodcontext.h
src/coreclr/src/ToolBox/superpmi/superpmi-shim-collector/icorjitinfo.cpp
src/coreclr/src/ToolBox/superpmi/superpmi-shim-counter/icorjitinfo.cpp
src/coreclr/src/ToolBox/superpmi/superpmi-shim-simple/icorjitinfo.cpp
src/coreclr/src/ToolBox/superpmi/superpmi/icorjitinfo.cpp
src/coreclr/src/inc/corinfo.h
src/coreclr/src/inc/corjit.h
src/coreclr/src/inc/jithelpers.h
src/coreclr/src/jit/block.h
src/coreclr/src/jit/compiler.cpp
src/coreclr/src/jit/compiler.h
src/coreclr/src/jit/flowgraph.cpp
src/coreclr/src/jit/gentree.h
src/coreclr/src/jit/importer.cpp
src/coreclr/src/jit/indirectcalltransformer.cpp
src/coreclr/src/jit/inline.h
src/coreclr/src/jit/jitconfigvalues.h
src/coreclr/src/jit/patchpoint.cpp
src/coreclr/src/jit/utils.h
src/coreclr/src/tools/Common/JitInterface/CorInfoBase.cs
src/coreclr/src/tools/Common/JitInterface/ThunkGenerator/ThunkInput.txt
src/coreclr/src/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs
src/coreclr/src/tools/aot/jitinterface/jitinterface.h
src/coreclr/src/tools/aot/jitinterface/jitwrapper.cpp
src/coreclr/src/vm/jithelpers.cpp
src/coreclr/src/vm/jitinterface.cpp
src/coreclr/src/vm/jitinterface.h
src/coreclr/src/vm/pgo.cpp
src/coreclr/src/vm/pgo.h
src/coreclr/src/zap/zapinfo.cpp
src/coreclr/src/zap/zapinfo.h
src/tests/Common/testenvironment.proj

index 5fd480b..66f3b40 100644 (file)
@@ -472,6 +472,7 @@ jobs:
           - jitobjectstackallocation
           - jitpgo
           - jitpgo_inline
+          - jitpgo_classes
         ${{ if in(parameters.testGroup, 'ilasm') }}:
           scenarios:
           - ilasmroundtrip
index 0a2ea7b..0a7e086 100644 (file)
@@ -979,6 +979,34 @@ struct BlockCounts  // Also defined here: code:CORBBTPROF_BLOCK_DATA
     UINT32 ILOffset;
     UINT32 ExecutionCount;
 };
+
+// Data structure for a single class probe.
+//
+// ILOffset is the IL offset in the method for the call site being probed.
+// Currently it must be ORed with CLASS_FLAG and (for interface calls)
+// INTERFACE_FLAG.
+//
+// Count is the number of times a call was made at that call site.
+//
+// SIZE is the number of entries in the table.
+//
+// SAMPLE_INTERVAL must be >= SIZE. SAMPLE_INTERVAL / SIZE
+// gives the average number of calls between table updates.
+// 
+struct ClassProfile
+{
+    enum { 
+        SIZE = 8, 
+        SAMPLE_INTERVAL = 32, 
+        CLASS_FLAG     = 0x80000000, 
+        INTERFACE_FLAG = 0x40000000,
+        OFFSET_MASK    = 0x3FFFFFFF
+    };
+
+    UINT32 ILOffset;
+    UINT32 Count;
+    CORINFO_CLASS_HANDLE ClassTable[SIZE];
+};
 */
 
 // allocate a basic block profile buffer where execution counts will be stored
@@ -993,6 +1021,15 @@ HRESULT getMethodBlockCounts(CORINFO_METHOD_HANDLE ftnHnd,
                              BlockCounts**     pBlockCounts,
                              UINT32 *          pNumRuns);
 
+// Get the likely implementing class for a virtual call or interface call made by ftnHnd
+// at the indicated IL offset. baseHnd is the interface class or base class for the method
+// being called. 
+CORINFO_CLASS_HANDLE getLikelyClass(CORINFO_METHOD_HANDLE ftnHnd,
+                                    CORINFO_CLASS_HANDLE  baseHnd,
+                                    UINT32                ilOffset,
+                                    UINT32*               pLikelihood,
+                                    UINT32*               pNumberOfCases);
+
 // Associates a native call site, identified by its offset in the native code stream, with
 // the signature information and method handle the JIT used to lay out the call site. If
 // the call site has no signature information (e.g. a helper call) or has no method handle
index 3355f01..803eae0 100644 (file)
@@ -100,6 +100,7 @@ LWM(GetJitFlags, DWORD, DD)
 LWM(GetJitTimeLogFilename, DWORD, DWORD)
 LWM(GetJustMyCodeHandle, DWORDLONG, DLDL)
 LWM(GetLazyStringLiteralHelper, DWORDLONG, DWORD)
+LWM(GetLikelyClass, Agnostic_GetLikelyClass, Agnostic_GetLikelyClassResult)
 LWM(GetLocationOfThisType, DWORDLONG, Agnostic_CORINFO_LOOKUP_KIND)
 LWM(GetMethodAttribs, DWORDLONG, DWORD)
 LWM(GetMethodClass, DWORDLONG, DWORDLONG)
index 054a60e..43131dd 100644 (file)
@@ -5265,6 +5265,48 @@ HRESULT MethodContext::repGetMethodBlockCounts(CORINFO_METHOD_HANDLE        ftnH
     return result;
 }
 
+void MethodContext::recGetLikelyClass(CORINFO_METHOD_HANDLE ftnHnd, CORINFO_CLASS_HANDLE baseHnd, UINT32 ilOffset, CORINFO_CLASS_HANDLE result, UINT32* pLikelihood, UINT32* pNumberOfClasses)
+{
+    if (GetLikelyClass == nullptr)
+        GetLikelyClass = new LightWeightMap<Agnostic_GetLikelyClass, Agnostic_GetLikelyClassResult>();
+
+    Agnostic_GetLikelyClass key;
+    ZeroMemory(&key, sizeof(Agnostic_GetLikelyClass));
+
+    key.ftnHnd = (DWORDLONG) ftnHnd;
+    key.baseHnd = (DWORDLONG) baseHnd;
+    key.ilOffset = (DWORD) ilOffset;
+
+    Agnostic_GetLikelyClassResult value;
+    ZeroMemory(&value, sizeof(Agnostic_GetLikelyClassResult));
+    value.classHnd = (DWORDLONG) result;
+    value.likelihood = *pLikelihood;
+    value.numberOfClasses = *pNumberOfClasses;
+
+    GetLikelyClass->Add(key, value);
+    DEBUG_REC(dmpGetLikelyClass(key, value));
+}
+void MethodContext::dmpGetLikelyClass(const Agnostic_GetLikelyClass& key, const Agnostic_GetLikelyClassResult& value)
+{
+    printf("GetLikelyClass key ftn-%016llX base-%016llX il-%u, class-%016llX likelihood-%u numberOfClasses-%u", 
+        key.ftnHnd, key.baseHnd, key.ilOffset, value.classHnd, value.likelihood, value.numberOfClasses);
+}
+CORINFO_CLASS_HANDLE MethodContext::repGetLikelyClass(CORINFO_METHOD_HANDLE ftnHnd, CORINFO_CLASS_HANDLE baseHnd, UINT32 ilOffset, UINT32* pLikelihood, UINT32* pNumberOfClasses)
+{
+    Agnostic_GetLikelyClass key;
+    ZeroMemory(&key, sizeof(Agnostic_GetLikelyClass));
+    key.ftnHnd = (DWORDLONG) ftnHnd;
+    key.baseHnd = (DWORDLONG) baseHnd;
+    key.ilOffset = (DWORD) ilOffset;
+
+    Agnostic_GetLikelyClassResult value = GetLikelyClass->Get(key);
+    DEBUG_REP(dmpGetLikelyClass(key, value));
+
+    *pLikelihood = value.likelihood;
+    *pNumberOfClasses = value.numberOfClasses;
+    return (CORINFO_CLASS_HANDLE) value.classHnd;
+}
+
 void MethodContext::recMergeClasses(CORINFO_CLASS_HANDLE cls1, CORINFO_CLASS_HANDLE cls2, CORINFO_CLASS_HANDLE result)
 {
     if (MergeClasses == nullptr)
index 5dafd7c..75466c1 100644 (file)
@@ -435,6 +435,21 @@ public:
         DWORD numRuns;
         DWORD result;
     };
+
+    struct Agnostic_GetLikelyClass
+    {
+        DWORDLONG ftnHnd;
+        DWORDLONG baseHnd;
+        DWORD     ilOffset;
+    };
+
+    struct Agnostic_GetLikelyClassResult
+    {
+        DWORDLONG classHnd;
+        DWORD     likelihood;
+        DWORD     numberOfClasses;
+    };
+
     struct Agnostic_GetProfilingHandle
     {
         DWORD     bHookFunction;
@@ -1193,6 +1208,10 @@ public:
                                     ICorJitInfo::BlockCounts**   pBlockCounts,
                                     UINT32 *                     pNumRuns);
 
+    void recGetLikelyClass(CORINFO_METHOD_HANDLE ftnHnd, CORINFO_CLASS_HANDLE  baseHnd, UINT32 ilOffset, CORINFO_CLASS_HANDLE classHnd, UINT32* pLikelihood, UINT32* pNumberOfClasses);
+    void dmpGetLikelyClass(const Agnostic_GetLikelyClass& key, const Agnostic_GetLikelyClassResult& value);
+    CORINFO_CLASS_HANDLE repGetLikelyClass(CORINFO_METHOD_HANDLE ftnHnd, CORINFO_CLASS_HANDLE  baseHnd, UINT32 ilOffset, UINT32* pLikelihood, UINT32* pNumberOfClasses);
+
     void recMergeClasses(CORINFO_CLASS_HANDLE cls1, CORINFO_CLASS_HANDLE cls2, CORINFO_CLASS_HANDLE result);
     void dmpMergeClasses(DLDL key, DWORDLONG value);
     CORINFO_CLASS_HANDLE repMergeClasses(CORINFO_CLASS_HANDLE cls1, CORINFO_CLASS_HANDLE cls2);
@@ -1359,7 +1378,7 @@ private:
 };
 
 // ********************* Please keep this up-to-date to ease adding more ***************
-// Highest packet number: 181
+// Highest packet number: 182
 // *************************************************************************************
 enum mcPackets
 {
@@ -1458,6 +1477,7 @@ enum mcPackets
     Packet_GetJitFlags                                   = 154, // Added 2/3/2016
     Packet_GetJitTimeLogFilename                         = 67,
     Packet_GetJustMyCodeHandle                           = 68,
+    Packet_GetLikelyClass                                = 182, // Added 9/27/2020
     Packet_GetLocationOfThisType                         = 69,
     Packet_GetMethodAttribs                              = 70,
     Packet_GetMethodClass                                = 71,
index f56e019..116e075 100644 (file)
@@ -2049,6 +2049,21 @@ HRESULT interceptor_ICJI::getMethodBlockCounts(CORINFO_METHOD_HANDLE ftnHnd,
     return temp;
 }
 
+// Get the likely implementing class for a virtual call or interface call made by ftnHnd
+// at the indicated IL offset. baseHnd is the interface class or base class for the method
+// being called. 
+CORINFO_CLASS_HANDLE interceptor_ICJI::getLikelyClass(CORINFO_METHOD_HANDLE ftnHnd,
+                                                      CORINFO_CLASS_HANDLE  baseHnd,
+                                                      UINT32                ilOffset,
+                                                      UINT32*               pLikelihood,
+                                                      UINT32*               pNumberOfClasses)
+{
+    mc->cr->AddCall("getLikelyClass");
+    CORINFO_CLASS_HANDLE result = original_ICorJitInfo->getLikelyClass(ftnHnd, baseHnd, ilOffset, pLikelihood, pNumberOfClasses);
+    mc->recGetLikelyClass(ftnHnd, baseHnd, ilOffset, result, pLikelihood, pNumberOfClasses);
+    return result;
+}
+
 // Associates a native call site, identified by its offset in the native code stream, with
 // the signature information and method handle the JIT used to lay out the call site. If
 // the call site has no signature information (e.g. a helper call) or has no method handle
index 03a5643..496dd3d 100644 (file)
@@ -1623,6 +1623,19 @@ HRESULT interceptor_ICJI::getMethodBlockCounts(CORINFO_METHOD_HANDLE ftnHnd,
     return original_ICorJitInfo->getMethodBlockCounts(ftnHnd, pCount, pBlockCounts, pNumRuns);
 }
 
+// Get the likely implementing class for a virtual call or interface call made by ftnHnd
+// at the indicated IL offset. baseHnd is the interface class or base class for the method
+// being called. 
+CORINFO_CLASS_HANDLE interceptor_ICJI::getLikelyClass(CORINFO_METHOD_HANDLE ftnHnd,
+                                                      CORINFO_CLASS_HANDLE  baseHnd,
+                                                      UINT32                ilOffset,
+                                                      UINT32*               pLikelihood,
+                                                      UINT32*               pNumberOfClasses)
+{
+    mcs->AddCall("getLikelyClass");
+    return original_ICorJitInfo->getLikelyClass(ftnHnd, baseHnd, ilOffset, pLikelihood, pNumberOfClasses);
+}
+
 // Associates a native call site, identified by its offset in the native code stream, with
 // the signature information and method handle the JIT used to lay out the call site. If
 // the call site has no signature information (e.g. a helper call) or has no method handle
index 139937e..613a299 100644 (file)
@@ -1441,6 +1441,18 @@ HRESULT interceptor_ICJI::getMethodBlockCounts(CORINFO_METHOD_HANDLE ftnHnd,
     return original_ICorJitInfo->getMethodBlockCounts(ftnHnd, pCount, pBlockCounts, pNumRuns);
 }
 
+// Get the likely implementing class for a virtual call or interface call made by ftnHnd
+// at the indicated IL offset. baseHnd is the interface class or base class for the method
+// being called. 
+CORINFO_CLASS_HANDLE interceptor_ICJI::getLikelyClass(CORINFO_METHOD_HANDLE ftnHnd,
+                                                      CORINFO_CLASS_HANDLE  baseHnd,
+                                                      UINT32                ilOffset,
+                                                      UINT32*               pLikelihood,
+                                                      UINT32*               pNumberOfClasses)
+{
+    return original_ICorJitInfo->getLikelyClass(ftnHnd, baseHnd, ilOffset, pLikelihood, pNumberOfClasses);
+}
+
 // Associates a native call site, identified by its offset in the native code stream, with
 // the signature information and method handle the JIT used to lay out the call site. If
 // the call site has no signature information (e.g. a helper call) or has no method handle
index c8b42b4..39aa366 100644 (file)
@@ -1805,6 +1805,19 @@ HRESULT MyICJI::getMethodBlockCounts(CORINFO_METHOD_HANDLE ftnHnd,
     return jitInstance->mc->repGetMethodBlockCounts(ftnHnd, pCount, pBlockCounts, pNumRuns);
 }
 
+// Get the likely implementing class for a virtual call or interface call made by ftnHnd
+// at the indicated IL offset. baseHnd is the interface class or base class for the method
+// being called. 
+CORINFO_CLASS_HANDLE MyICJI::getLikelyClass(CORINFO_METHOD_HANDLE ftnHnd,
+                                            CORINFO_CLASS_HANDLE  baseHnd,
+                                            UINT32                ilOffset,
+                                            UINT32*               pLikelihood,
+                                            UINT32*               pNumberOfClasses)
+{
+    jitInstance->mc->cr->AddCall("getLikelyClass");
+    return jitInstance->mc->repGetLikelyClass(ftnHnd, baseHnd, ilOffset, pLikelihood, pNumberOfClasses);
+}
+
 // Associates a native call site, identified by its offset in the native code stream, with
 // the signature information and method handle the JIT used to lay out the call site. If
 // the call site has no signature information (e.g. a helper call) or has no method handle
index 5a41023..fa9db2c 100644 (file)
@@ -208,11 +208,11 @@ TODO: Talk about initializing strutures before use
 //
 //////////////////////////////////////////////////////////////////////////////////////////////////////////
 
-constexpr GUID JITEEVersionIdentifier = { /* 8031aa05-4568-40fc-a0d2-d971d8edba16 */
-    0x8031aa05,
-    0x4568,
-    0x40fc,
-    {0xa0, 0xd2, 0xd9, 0x71, 0xd8, 0xed, 0xba, 0x16}
+constexpr GUID JITEEVersionIdentifier = { /* 0d235fe4-65a1-487a-8553-c845496da901 */
+    0x0d235fe4,
+    0x65a1,
+    0x487a,
+    {0x85, 0x53, 0xc8, 0x45, 0x49, 0x6d, 0xa9, 0x01}
 };
 
 //////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -623,6 +623,7 @@ enum CorInfoHelpFunc
     CORINFO_HELP_STACK_PROBE,               // Probes each page of the allocated stack frame
 
     CORINFO_HELP_PATCHPOINT,                // Notify runtime that code has reached a patchpoint
+    CORINFO_HELP_CLASSPROFILE,              // Update class profile for a call site
 
     CORINFO_HELP_COUNT,
 };
index 609bfc6..13a9e33 100644 (file)
@@ -251,6 +251,34 @@ public:
         UINT32 ExecutionCount;
     };
 
+    // Data structure for a single class probe.
+    //
+    // ILOffset is the IL offset in the method for the call site being probed.
+    // Currently it must be ORed with CLASS_FLAG and (for interface calls)
+    // INTERFACE_FLAG.
+    //
+    // Count is the number of times a call was made at that call site.
+    //
+    // SIZE is the number of entries in the table.
+    //
+    // SAMPLE_INTERVAL must be >= SIZE. SAMPLE_INTERVAL / SIZE
+    // gives the average number of calls between table updates.
+    // 
+    struct ClassProfile
+    {
+        enum { 
+            SIZE = 8, 
+            SAMPLE_INTERVAL = 32, 
+            CLASS_FLAG     = 0x80000000, 
+            INTERFACE_FLAG = 0x40000000,
+            OFFSET_MASK    = 0x3FFFFFFF
+        };
+
+        UINT32 ILOffset;
+        UINT32 Count;
+        CORINFO_CLASS_HANDLE ClassTable[SIZE];
+    };
+
     // allocate a basic block profile buffer where execution counts will be stored
     // for jitted basic blocks.
     virtual HRESULT allocMethodBlockCounts (
@@ -267,6 +295,24 @@ public:
             UINT32 *              pNumRuns         // pointer to the total number of profile scenarios run
             ) = 0;
 
+    // Get the likely implementing class for a virtual call or interface call made by ftnHnd
+    // at the indicated IL offset. baseHnd is the interface class or base class for the method
+    // being called. May returns NULL.
+    // 
+    // pLikelihood is the estimated percent chance that the class at runtime is the class
+    // returned by this method. A well-estimated monomorphic call site will return a likelihood
+    // of 100.
+    // 
+    // pNumberOfClasses is the estimated number of different classes seen at the site.
+    // A well-estimated monomorphic call site will return 1.
+    virtual CORINFO_CLASS_HANDLE getLikelyClass(
+            CORINFO_METHOD_HANDLE ftnHnd,
+            CORINFO_CLASS_HANDLE  baseHnd,
+            UINT32                ilOffset,
+            UINT32 *              pLikelihood,      // OUT, estimated likelihood of the class (0...100)
+            UINT32 *              pNumberOfClasses  // OUT, estimated number of possible classes
+            ) = 0;
+
     // Associates a native call site, identified by its offset in the native code stream, with
     // the signature information and method handle the JIT used to lay out the call site. If
     // the call site has no signature information (e.g. a helper call) or has no method handle
index a1f6878..38f5182 100644 (file)
 #endif
 
     JITHELPER(CORINFO_HELP_PATCHPOINT, JIT_Patchpoint, CORINFO_HELP_SIG_REG_ONLY)
+    JITHELPER(CORINFO_HELP_CLASSPROFILE, JIT_ClassProfile, CORINFO_HELP_SIG_REG_ONLY)
 
 #undef JITHELPER
 #undef DYNAMICJITHELPER
index 246b321..f996893 100644 (file)
@@ -448,6 +448,7 @@ struct BasicBlock : private LIR::Range
 
 #define BBF_BACKWARD_JUMP_TARGET           MAKE_BBFLAG(36) // Block is a target of a backward jump
 #define BBF_PATCHPOINT                     MAKE_BBFLAG(37) // Block is a patchpoint
+#define BBF_HAS_CLASS_PROFILE              MAKE_BBFLAG(38) // BB contains a call needing a class profile
 
 // clang-format on
 
@@ -492,7 +493,7 @@ struct BasicBlock : private LIR::Range
 #define BBF_SPLIT_GAINED                                                                                               \
     (BBF_DONT_REMOVE | BBF_HAS_LABEL | BBF_HAS_JMP | BBF_BACKWARD_JUMP | BBF_HAS_IDX_LEN | BBF_HAS_NEWARRAY |          \
      BBF_PROF_WEIGHT | BBF_HAS_NEWOBJ | BBF_KEEP_BBJ_ALWAYS | BBF_CLONED_FINALLY_END | BBF_HAS_NULLCHECK |             \
-     BBF_HAS_VTABREF)
+     BBF_HAS_VTABREF | BBF_HAS_CLASS_PROFILE)
 
 #ifndef __GNUC__ // GCC doesn't like C_ASSERT at global scope
     static_assert_no_msg((BBF_SPLIT_NONEXIST & BBF_SPLIT_LOST) == 0);
index bc3be90..3a6ea50 100644 (file)
@@ -5982,6 +5982,7 @@ int Compiler::compCompileHelper(CORINFO_MODULE_HANDLE classPtr,
     info.compNativeCodeSize    = 0;
     info.compTotalHotCodeSize  = 0;
     info.compTotalColdCodeSize = 0;
+    info.compClassProbeCount   = 0;
 
     compHasBackwardJump = false;
 
index 1fa48c9..15e7db0 100644 (file)
@@ -3779,7 +3779,8 @@ public:
                              CORINFO_CONTEXT_HANDLE* contextHandle,
                              CORINFO_CONTEXT_HANDLE* exactContextHandle,
                              bool                    isLateDevirtualization,
-                             bool                    isExplicitTailCall);
+                             bool                    isExplicitTailCall,
+                             IL_OFFSETX              ilOffset = BAD_IL_OFFSET);
 
     //=========================================================================
     //                          PROTECTED
@@ -6746,7 +6747,8 @@ public:
                                              CORINFO_METHOD_HANDLE methodHandle,
                                              CORINFO_CLASS_HANDLE  classHandle,
                                              unsigned              methodAttr,
-                                             unsigned              classAttr);
+                                             unsigned              classAttr,
+                                             unsigned              likelihood);
 
     bool doesMethodHaveExpRuntimeLookup()
     {
@@ -9311,6 +9313,10 @@ public:
 #define CPU_ARM64 0x0400 // The generic ARM64 CPU
 
         unsigned genCPU; // What CPU are we running on
+
+        // Number of class profile probes in this method
+        unsigned compClassProbeCount;
+
     } info;
 
     // Returns true if the method being compiled returns a non-void and non-struct value.
index db70d1e..7dc9865 100644 (file)
@@ -381,12 +381,34 @@ bool Compiler::fgGetProfileWeightForBasicBlock(IL_OFFSET offset, BasicBlock::wei
     return true;
 }
 
+//------------------------------------------------------------------------
+// fgInstrumentMethod: add instrumentation probes to the method
+//
+// Note:
+//
+//   By default this instruments each non-internal block with
+//   a counter probe.
+//
+//   Probes data is held in a runtime-allocated slab of Entries, with
+//   each Entry an (IL offset, count) pair. This method determines
+//   the number of Entrys needed and initializes each entry's IL offset.
+//
+//   Options (many not yet implemented):
+//   * suppress count instrumentation for methods with
+//     a single block, or
+//   * instrument internal blocks (requires same internal expansions
+//     for BBOPT and BBINSTR, not yet guaranteed)
+//   * use spanning tree for minimal count probing
+//   * add class profile probes for virtual and interface call sites
+//   * record indirection cells for VSD calls
+//
 void Compiler::fgInstrumentMethod()
 {
     noway_assert(!compIsForInlining());
 
     // Count the number of basic blocks in the method
-
+    // that will get block count probes.
+    //
     int         countOfBlocks = 0;
     BasicBlock* block;
     for (block = fgFirstBB; (block != nullptr); block = block->bbNext)
@@ -398,45 +420,255 @@ void Compiler::fgInstrumentMethod()
         countOfBlocks++;
     }
 
+    // We've already counted the number of class probes
+    // when importing.
+    //
+    int countOfCalls = info.compClassProbeCount;
+
+    // Optionally bail out, if there are less than three blocks and no call sites to profile.
+    // One block is common. We don't expect to see zero or two blocks here.
+    //
+    // Note we have to at least visit all the profile call sites to properly restore their
+    // stub addresses. So we can't bail out early if there are any of these.
+    //
+    if ((JitConfig.JitMinimalProfiling() > 0) && (countOfBlocks < 3) && (countOfCalls == 0))
+    {
+        JITDUMP("Not instrumenting method: %d blocks and %d calls\n", countOfBlocks, countOfCalls);
+        assert(countOfBlocks == 1);
+        return;
+    }
+
+    JITDUMP("Instrumenting method, %d blocks and %d calls\n", countOfBlocks, countOfCalls);
+
     // Allocate the profile buffer
+    //
+    // Allocation is in multiples of ICorJitInfo::BlockCounts. For each profile table we need
+    // some multiple of these.
+    //
+    const unsigned entriesPerCall = sizeof(ICorJitInfo::ClassProfile) / sizeof(ICorJitInfo::BlockCounts);
+    assert(entriesPerCall * sizeof(ICorJitInfo::BlockCounts) == sizeof(ICorJitInfo::ClassProfile));
+
+    const unsigned            totalEntries            = countOfBlocks + entriesPerCall * countOfCalls;
+    ICorJitInfo::BlockCounts* profileBlockCountsStart = nullptr;
 
-    ICorJitInfo::BlockCounts* profileBlockCountsStart;
+    HRESULT res = info.compCompHnd->allocMethodBlockCounts(totalEntries, &profileBlockCountsStart);
 
-    HRESULT res = info.compCompHnd->allocMethodBlockCounts(countOfBlocks, &profileBlockCountsStart);
+    // We may not be able to instrument, if so we'll set this false.
+    // We can't just early exit, because we have to clean up calls that we might have profiled.
+    //
+    bool instrument = true;
 
     if (!SUCCEEDED(res))
     {
+        JITDUMP("Unable to instrument -- block counter allocation failed: 0x%x\n", res);
+        instrument = false;
         // The E_NOTIMPL status is returned when we are profiling a generic method from a different assembly
-        if (res == E_NOTIMPL)
-        {
-            // expected failure...
-        }
-        else
+        if (res != E_NOTIMPL)
         {
             noway_assert(!"Error: failed to allocate profileBlockCounts");
             return;
         }
     }
-    else
-    {
-        // For each BasicBlock (non-Internal)
-        //  1. Assign the blocks bbCodeOffs to the ILOffset field of this blocks profile data.
-        //  2. Add an operation that increments the ExecutionCount field at the beginning of the block.
 
-        // Each (non-Internal) block has it own BlockCounts tuple [ILOffset, ExecutionCount]
-        // To start we initialize our current one with the first one that we allocated
+    ICorJitInfo::BlockCounts* profileBlockCountsEnd = &profileBlockCountsStart[countOfBlocks];
+    ICorJitInfo::BlockCounts* profileEnd            = &profileBlockCountsStart[totalEntries];
+
+    // For each BasicBlock (non-Internal)
+    //  1. Assign the blocks bbCodeOffs to the ILOffset field of this blocks profile data.
+    //  2. Add an operation that increments the ExecutionCount field at the beginning of the block.
+    //
+    // Each (non-Internal) block has it own BlockCounts tuple [ILOffset, ExecutionCount]
+    // To start we initialize our current one with the first one that we allocated
+    //
+    ICorJitInfo::BlockCounts* currentBlockCounts = profileBlockCountsStart;
+
+    for (block = fgFirstBB; (block != nullptr); block = block->bbNext)
+    {
+        // We don't want to profile any un-imported blocks
         //
-        ICorJitInfo::BlockCounts* currentBlockCounts = profileBlockCountsStart;
+        if ((block->bbFlags & BBF_IMPORTED) == 0)
+        {
+            continue;
+        }
 
-        for (block = fgFirstBB; (block != nullptr); block = block->bbNext)
+        // We may see class probes in internal blocks, thanks to the
+        // block splitting done by the indirect call transformer.
+        //
+        if (JitConfig.JitClassProfiling() > 0)
         {
-            if (!(block->bbFlags & BBF_IMPORTED) || (block->bbFlags & BBF_INTERNAL))
+            // Only works when jitting.
+            assert(!opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT));
+
+            if ((block->bbFlags & BBF_HAS_CLASS_PROFILE) != 0)
             {
-                continue;
+                // Would be nice to avoid having to search here by tracking
+                // candidates more directly.
+                //
+                JITDUMP("Scanning for calls to profile in " FMT_BB "\n", block->bbNum);
+
+                class ClassProbeVisitor final : public GenTreeVisitor<ClassProbeVisitor>
+                {
+                public:
+                    enum
+                    {
+                        DoPreOrder = true
+                    };
+
+                    int                        m_count;
+                    ICorJitInfo::ClassProfile* m_tableBase;
+                    bool                       m_instrument;
+
+                    ClassProbeVisitor(Compiler* compiler, ICorJitInfo::ClassProfile* tableBase, bool instrument)
+                        : GenTreeVisitor<ClassProbeVisitor>(compiler)
+                        , m_count(0)
+                        , m_tableBase(tableBase)
+                        , m_instrument(instrument)
+                    {
+                    }
+                    Compiler::fgWalkResult PreOrderVisit(GenTree** use, GenTree* user)
+                    {
+                        GenTree* const node = *use;
+                        if (node->IsCall())
+                        {
+                            GenTreeCall* const call = node->AsCall();
+                            if (call->IsVirtual() && (call->gtCallType != CT_INDIRECT))
+                            {
+                                JITDUMP("Found call [%06u] with probe index %d and ilOffset 0x%X\n",
+                                        m_compiler->dspTreeID(call), call->gtClassProfileCandidateInfo->probeIndex,
+                                        call->gtClassProfileCandidateInfo->ilOffset);
+
+                                m_count++;
+
+                                if (m_instrument)
+                                {
+                                    // We transform the call from (CALLVIRT obj, ... args ...) to
+                                    // to
+                                    //      (CALLVIRT
+                                    //        (COMMA
+                                    //          (ASG tmp, obj)
+                                    //          (COMMA
+                                    //            (CALL probe_fn tmp, &probeEntry)
+                                    //            tmp)))
+                                    //         ... args ...)
+                                    //
+
+                                    assert(call->gtCallThisArg->GetNode()->TypeGet() == TYP_REF);
+
+                                    // Figure out where the table is located.
+                                    //
+                                    ICorJitInfo::ClassProfile* classProfile =
+                                        &m_tableBase[call->gtClassProfileCandidateInfo->probeIndex];
+
+                                    // Grab a temp to hold the 'this' object as it will be used three times
+                                    //
+                                    unsigned const tmpNum = m_compiler->lvaGrabTemp(true DEBUGARG("class profile tmp"));
+                                    m_compiler->lvaTable[tmpNum].lvType = TYP_REF;
+
+                                    // Generate the IR...
+                                    //
+                                    GenTree* const classProfileNode =
+                                        m_compiler->gtNewIconNode((ssize_t)classProfile, TYP_I_IMPL);
+                                    GenTree* const          tmpNode = m_compiler->gtNewLclvNode(tmpNum, TYP_REF);
+                                    GenTreeCall::Use* const args = m_compiler->gtNewCallArgs(tmpNode, classProfileNode);
+                                    GenTree* const          helperCallNode =
+                                        m_compiler->gtNewHelperCallNode(CORINFO_HELP_CLASSPROFILE, TYP_VOID, args);
+                                    GenTree* const tmpNode2 = m_compiler->gtNewLclvNode(tmpNum, TYP_REF);
+                                    GenTree* const callCommaNode =
+                                        m_compiler->gtNewOperNode(GT_COMMA, TYP_REF, helperCallNode, tmpNode2);
+                                    GenTree* const tmpNode3 = m_compiler->gtNewLclvNode(tmpNum, TYP_REF);
+                                    GenTree* const asgNode  = m_compiler->gtNewOperNode(GT_ASG, TYP_REF, tmpNode3,
+                                                                                       call->gtCallThisArg->GetNode());
+                                    GenTree* const asgCommaNode =
+                                        m_compiler->gtNewOperNode(GT_COMMA, TYP_REF, asgNode, callCommaNode);
+
+                                    // Update the call
+                                    //
+                                    call->gtCallThisArg->SetNode(asgCommaNode);
+
+                                    JITDUMP("Modified call is now\n");
+                                    DISPTREE(call);
+
+                                    // Initialize the class table
+                                    //
+                                    // Hack: we use two high bits of the offset to indicate that this record
+                                    // is the start of a class profile, and what kind of call is being profiled.
+                                    //
+                                    IL_OFFSET offset = jitGetILoffs(call->gtClassProfileCandidateInfo->ilOffset);
+                                    assert((offset & (ICorJitInfo::ClassProfile::CLASS_FLAG |
+                                                      ICorJitInfo::ClassProfile::INTERFACE_FLAG)) == 0);
+
+                                    offset |= ICorJitInfo::ClassProfile::CLASS_FLAG;
+
+                                    if (call->IsVirtualStub())
+                                    {
+                                        offset |= ICorJitInfo::ClassProfile::INTERFACE_FLAG;
+                                    }
+                                    else
+                                    {
+                                        assert(call->IsVirtualVtable());
+                                    }
+
+                                    classProfile->ILOffset = offset;
+                                    classProfile->Count    = 0;
+
+                                    for (int i = 0; i < ICorJitInfo::ClassProfile::SIZE; i++)
+                                    {
+                                        classProfile->ClassTable[i] = NO_CLASS_HANDLE;
+                                    }
+                                }
+
+                                // Restore the stub address on call, whether instrumenting or not.
+                                //
+                                call->gtStubCallStubAddr = call->gtClassProfileCandidateInfo->stubAddr;
+                            }
+                        }
+
+                        return Compiler::WALK_CONTINUE;
+                    }
+                };
+
+                // Scan the statements and add class probes
+                //
+                ClassProbeVisitor visitor(this, (ICorJitInfo::ClassProfile*)profileBlockCountsEnd, instrument);
+                for (Statement* stmt : block->Statements())
+                {
+                    visitor.WalkTree(stmt->GetRootNodePointer(), nullptr);
+                }
+
+                // Bookkeeping
+                //
+                assert(visitor.m_count <= countOfCalls);
+                countOfCalls -= visitor.m_count;
+                JITDUMP("\n%d calls remain to be visited\n", countOfCalls);
             }
+            else
+            {
+                JITDUMP("No calls to profile in " FMT_BB "\n", block->bbNum);
+            }
+        }
+
+        // We won't need count probes in internal blocks.
+        //
+        // TODO, perhaps: profile the flow early expansion ... we would need
+        // some non-il based keying scheme.
+        //
+        if ((block->bbFlags & BBF_INTERNAL) != 0)
+        {
+            continue;
+        }
+
+        // One less block
+        countOfBlocks--;
 
+        if (instrument)
+        {
             // Assign the current block's IL offset into the profile data
-            currentBlockCounts->ILOffset       = block->bbCodeOffs;
+            // (make sure IL offset is sane)
+            //
+            IL_OFFSET offset = block->bbCodeOffs;
+            assert((int)offset >= 0);
+
+            currentBlockCounts->ILOffset       = offset;
             currentBlockCounts->ExecutionCount = 0;
 
             size_t addrOfCurrentExecutionCount = (size_t)&currentBlockCounts->ExecutionCount;
@@ -456,57 +688,63 @@ void Compiler::fgInstrumentMethod()
 
             // Advance to the next BlockCounts tuple [ILOffset, ExecutionCount]
             currentBlockCounts++;
-
-            // One less block
-            countOfBlocks--;
         }
-        // Check that we allocated and initialized the same number of BlockCounts tuples
-        noway_assert(countOfBlocks == 0);
+    }
 
-        // When prejitting, add the method entry callback node
-        if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT))
-        {
-            GenTree* arg;
+    if (!instrument)
+    {
+        return;
+    }
+
+    // Check that we allocated and initialized the same number of BlockCounts tuples
+    //
+    noway_assert(countOfBlocks == 0);
+    noway_assert(countOfCalls == 0);
+    assert(currentBlockCounts == profileBlockCountsEnd);
+
+    // When prejitting, add the method entry callback node
+    if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT))
+    {
+        GenTree* arg;
 
 #ifdef FEATURE_READYTORUN_COMPILER
-            if (opts.IsReadyToRun())
-            {
-                mdMethodDef currentMethodToken = info.compCompHnd->getMethodDefFromMethod(info.compMethodHnd);
+        if (opts.IsReadyToRun())
+        {
+            mdMethodDef currentMethodToken = info.compCompHnd->getMethodDefFromMethod(info.compMethodHnd);
 
-                CORINFO_RESOLVED_TOKEN resolvedToken;
-                resolvedToken.tokenContext = MAKE_METHODCONTEXT(info.compMethodHnd);
-                resolvedToken.tokenScope   = info.compScopeHnd;
-                resolvedToken.token        = currentMethodToken;
-                resolvedToken.tokenType    = CORINFO_TOKENKIND_Method;
+            CORINFO_RESOLVED_TOKEN resolvedToken;
+            resolvedToken.tokenContext = MAKE_METHODCONTEXT(info.compMethodHnd);
+            resolvedToken.tokenScope   = info.compScopeHnd;
+            resolvedToken.token        = currentMethodToken;
+            resolvedToken.tokenType    = CORINFO_TOKENKIND_Method;
 
-                info.compCompHnd->resolveToken(&resolvedToken);
+            info.compCompHnd->resolveToken(&resolvedToken);
 
-                arg = impTokenToHandle(&resolvedToken);
-            }
-            else
+            arg = impTokenToHandle(&resolvedToken);
+        }
+        else
 #endif
-            {
-                arg = gtNewIconEmbMethHndNode(info.compMethodHnd);
-            }
+        {
+            arg = gtNewIconEmbMethHndNode(info.compMethodHnd);
+        }
 
-            GenTreeCall::Use* args = gtNewCallArgs(arg);
-            GenTree*          call = gtNewHelperCallNode(CORINFO_HELP_BBT_FCN_ENTER, TYP_VOID, args);
+        GenTreeCall::Use* args = gtNewCallArgs(arg);
+        GenTree*          call = gtNewHelperCallNode(CORINFO_HELP_BBT_FCN_ENTER, TYP_VOID, args);
 
-            // Get the address of the first blocks ExecutionCount
-            size_t addrOfFirstExecutionCount = (size_t)&profileBlockCountsStart->ExecutionCount;
+        // Get the address of the first blocks ExecutionCount
+        size_t addrOfFirstExecutionCount = (size_t)&profileBlockCountsStart->ExecutionCount;
 
-            // Read Basic-Block count value
-            GenTree* valueNode = gtNewIndOfIconHandleNode(TYP_INT, addrOfFirstExecutionCount, GTF_ICON_BBC_PTR, false);
+        // Read Basic-Block count value
+        GenTree* valueNode = gtNewIndOfIconHandleNode(TYP_INT, addrOfFirstExecutionCount, GTF_ICON_BBC_PTR, false);
 
-            // Compare Basic-Block count value against zero
-            GenTree*   relop = gtNewOperNode(GT_NE, TYP_INT, valueNode, gtNewIconNode(0, TYP_INT));
-            GenTree*   colon = new (this, GT_COLON) GenTreeColon(TYP_VOID, gtNewNothingNode(), call);
-            GenTree*   cond  = gtNewQmarkNode(TYP_VOID, relop, colon);
-            Statement* stmt  = gtNewStmt(cond);
+        // Compare Basic-Block count value against zero
+        GenTree*   relop = gtNewOperNode(GT_NE, TYP_INT, valueNode, gtNewIconNode(0, TYP_INT));
+        GenTree*   colon = new (this, GT_COLON) GenTreeColon(TYP_VOID, gtNewNothingNode(), call);
+        GenTree*   cond  = gtNewQmarkNode(TYP_VOID, relop, colon);
+        Statement* stmt  = gtNewStmt(cond);
 
-            fgEnsureFirstBBisScratch();
-            fgInsertStmtAtEnd(fgFirstBB, stmt);
-        }
+        fgEnsureFirstBBisScratch();
+        fgInsertStmtAtEnd(fgFirstBB, stmt);
     }
 }
 
index 9fe2bca..7391a8b 100644 (file)
@@ -155,6 +155,7 @@ enum TargetHandleType : BYTE
 struct BasicBlock;
 struct InlineCandidateInfo;
 struct GuardedDevirtualizationCandidateInfo;
+struct ClassProfileCandidateInfo;
 
 typedef unsigned short AssertionIndex;
 
@@ -4516,6 +4517,7 @@ struct GenTreeCall final : public GenTree
         // gtInlineCandidateInfo is only used when inlining methods
         InlineCandidateInfo*                  gtInlineCandidateInfo;
         GuardedDevirtualizationCandidateInfo* gtGuardedDevirtualizationCandidateInfo;
+        ClassProfileCandidateInfo*            gtClassProfileCandidateInfo;
         void*                                 gtStubCallStubAddr; // GTF_CALL_VIRT_STUB - these are never inlined
         CORINFO_GENERIC_HANDLE compileTimeHelperArgumentHandle; // Used to track type handle argument of dynamic helpers
         void*                  gtDirectCallAddress; // Used to pass direct call address between lower and codegen
index 1536db7..fb9e10c 100644 (file)
@@ -8737,7 +8737,7 @@ var_types Compiler::impImportCall(OPCODE                  opcode,
             const bool isExplicitTailCall     = (tailCallFlags & PREFIX_TAILCALL_EXPLICIT) != 0;
             const bool isLateDevirtualization = false;
             impDevirtualizeCall(call->AsCall(), &callInfo->hMethod, &callInfo->methodFlags, &callInfo->contextHandle,
-                                &exactContextHnd, isLateDevirtualization, isExplicitTailCall);
+                                &exactContextHnd, isLateDevirtualization, isExplicitTailCall, rawILOffset);
         }
 
         if (impIsThis(obj))
@@ -20554,9 +20554,10 @@ bool Compiler::IsMathIntrinsic(GenTree* tree)
 //     method   -- [IN/OUT] the method handle for call. Updated iff call devirtualized.
 //     methodFlags -- [IN/OUT] flags for the method to call. Updated iff call devirtualized.
 //     contextHandle -- [IN/OUT] context handle for the call. Updated iff call devirtualized.
-//     exactContextHnd -- [OUT] updated context handle iff call devirtualized
+//     exactContextHandle -- [OUT] updated context handle iff call devirtualized
 //     isLateDevirtualization -- if devirtualization is happening after importation
 //     isExplicitTailCalll -- [IN] true if we plan on using an explicit tail call
+//     ilOffset -- IL offset of the call
 //
 // Notes:
 //     Virtual calls in IL will always "invoke" the base class method.
@@ -20591,7 +20592,8 @@ void Compiler::impDevirtualizeCall(GenTreeCall*            call,
                                    CORINFO_CONTEXT_HANDLE* contextHandle,
                                    CORINFO_CONTEXT_HANDLE* exactContextHandle,
                                    bool                    isLateDevirtualization,
-                                   bool                    isExplicitTailCall)
+                                   bool                    isExplicitTailCall,
+                                   IL_OFFSETX              ilOffset)
 {
     assert(call != nullptr);
     assert(method != nullptr);
@@ -20601,9 +20603,39 @@ void Compiler::impDevirtualizeCall(GenTreeCall*            call,
     // This should be a virtual vtable or virtual stub call.
     assert(call->IsVirtual());
 
-    // Bail if not optimizing
-    if (opts.OptimizationDisabled())
+    // Possibly instrument, if not optimizing.
+    //
+    if (opts.OptimizationDisabled() && (call->gtCallType != CT_INDIRECT))
     {
+        // During importation, optionally flag this block as one that
+        // contains calls requiring class profiling. Ideally perhaps
+        // we'd just keep track of the calls themselves, so we don't
+        // have to search for them later.
+        //
+        if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_BBINSTR) && !opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT) &&
+            (JitConfig.JitClassProfiling() > 0) && !isLateDevirtualization)
+        {
+            JITDUMP("\n ... marking [%06u] in " FMT_BB " for class profile instrumentation\n", dspTreeID(call),
+                    compCurBB->bbNum);
+            ClassProfileCandidateInfo* pInfo = new (this, CMK_Inlining) ClassProfileCandidateInfo;
+
+            // Record some info needed for the class profiling probe.
+            //
+            pInfo->ilOffset   = ilOffset;
+            pInfo->probeIndex = info.compClassProbeCount++;
+            pInfo->stubAddr   = call->gtStubCallStubAddr;
+
+            // note this overwrites gtCallStubAddr, so it needs to be undone
+            // during the instrumentation phase, or we won't generate proper
+            // code for vsd calls.
+            //
+            call->gtClassProfileCandidateInfo = pInfo;
+
+            // Flag block as needing scrutiny
+            //
+            compCurBB->bbFlags |= BBF_HAS_CLASS_PROFILE;
+        }
+
         return;
     }
 
@@ -20750,56 +20782,74 @@ void Compiler::impDevirtualizeCall(GenTreeCall*            call,
     //   IL_021e:  callvirt   instance int32 System.Object::GetHashCode()
     //
     // If so, we can't devirtualize, but we may be able to do guarded devirtualization.
+    //
     if ((objClassAttribs & CORINFO_FLG_INTERFACE) != 0)
     {
-        // If we're called during early devirtualiztion, attempt guarded devirtualization
-        // if there's currently just one implementing class.
-        if (exactContextHandle == nullptr)
+        // Don't try guarded devirtualiztion when we're doing late devirtualization.
+        //
+        if (isLateDevirtualization)
         {
-            JITDUMP("--- obj class is interface...unable to dervirtualize, sorry\n");
+            JITDUMP("No guarded devirt during late devirtualization\n");
             return;
         }
 
-        CORINFO_CLASS_HANDLE uniqueImplementingClass = NO_CLASS_HANDLE;
+        JITDUMP("Considering guarded devirt...\n");
 
-        // info.compCompHnd->getUniqueImplementingClass(objClass);
+        // See if the runtime can provide a class to guess for.
+        //
+        const unsigned       interfaceLikelihoodThreshold = 25;
+        unsigned             likelihood                   = 0;
+        unsigned             numberOfClasses              = 0;
+        CORINFO_CLASS_HANDLE likelyClass =
+            info.compCompHnd->getLikelyClass(info.compMethodHnd, baseClass, ilOffset, &likelihood, &numberOfClasses);
 
-        if (uniqueImplementingClass == NO_CLASS_HANDLE)
+        if (likelyClass == NO_CLASS_HANDLE)
         {
-            JITDUMP("No unique implementor of interface %p (%s), sorry\n", dspPtr(objClass), objClassName);
+            JITDUMP("No likely implementor of interface %p (%s), sorry\n", dspPtr(objClass), objClassName);
             return;
         }
+        else
+        {
+            JITDUMP("Likely implementor of interface %p (%s) is %p (%s) [likelihood:%u classes seen:%u]\n",
+                    dspPtr(objClass), objClassName, likelyClass, eeGetClassName(likelyClass), likelihood,
+                    numberOfClasses);
+        }
 
-        JITDUMP("Only known implementor of interface %p (%s) is %p (%s)!\n", dspPtr(objClass), objClassName,
-                uniqueImplementingClass, eeGetClassName(uniqueImplementingClass));
-
-        bool guessUniqueInterface = true;
-
-        INDEBUG(guessUniqueInterface = (JitConfig.JitGuardedDevirtualizationGuessUniqueInterface() > 0););
-
-        if (!guessUniqueInterface)
+        // Todo: a more advanced heuristic using likelihood, number of
+        // classes, and the profile count for this block.
+        //
+        // For now we will guess if the likelihood is 25% or more, as studies
+        // have shown this should pay off for interface calls.
+        //
+        if (likelihood < interfaceLikelihoodThreshold)
         {
-            JITDUMP("Guarded devirt for unique interface implementor is not enabled, sorry\n");
+            JITDUMP("Not guessing for class; likelihood is below interface call threshold %u\n",
+                    interfaceLikelihoodThreshold);
             return;
         }
 
-        // Ask the runtime to determine the method that would be called based on the guessed-for type.
-        CORINFO_CONTEXT_HANDLE ownerType = *contextHandle;
-        CORINFO_METHOD_HANDLE  uniqueImplementingMethod =
-            info.compCompHnd->resolveVirtualMethod(baseMethod, uniqueImplementingClass, ownerType);
+        // Ask the runtime to determine the method that would be called based on the likely type.
+        //
+        CORINFO_CONTEXT_HANDLE ownerType   = *contextHandle;
+        CORINFO_METHOD_HANDLE likelyMethod = info.compCompHnd->resolveVirtualMethod(baseMethod, likelyClass, ownerType);
 
-        if (uniqueImplementingMethod == nullptr)
+        if (likelyMethod == nullptr)
         {
             JITDUMP("Can't figure out which method would be invoked, sorry\n");
             return;
         }
 
-        JITDUMP("Interface call would invoke method %s\n", eeGetMethodName(uniqueImplementingMethod, nullptr));
-        DWORD uniqueMethodAttribs = info.compCompHnd->getMethodAttribs(uniqueImplementingMethod);
-        DWORD uniqueClassAttribs  = info.compCompHnd->getClassAttribs(uniqueImplementingClass);
+        JITDUMP("%s call would invoke method %s\n", callKind, eeGetMethodName(likelyMethod, nullptr));
+
+        // Some of these may be redundant
+        //
+        DWORD likelyMethodAttribs = info.compCompHnd->getMethodAttribs(likelyMethod);
+        DWORD likelyClassAttribs  = info.compCompHnd->getClassAttribs(likelyClass);
 
-        addGuardedDevirtualizationCandidate(call, uniqueImplementingMethod, uniqueImplementingClass,
-                                            uniqueMethodAttribs, uniqueClassAttribs);
+        // Try guarded devirtualization.
+        //
+        addGuardedDevirtualizationCandidate(call, likelyMethod, likelyClass, likelyMethodAttribs, likelyClassAttribs,
+                                            likelihood);
         return;
     }
 
@@ -20811,84 +20861,156 @@ void Compiler::impDevirtualizeCall(GenTreeCall*            call,
         JITDUMP("--- base class is interface\n");
     }
 
-    // Fetch the method that would be called based on the declared type of 'this'
+    // Fetch the method that would be called based on the declared type of 'this',
+    // and prepare to fetch the method attributes.
+    //
     CORINFO_CONTEXT_HANDLE ownerType     = *contextHandle;
     CORINFO_METHOD_HANDLE  derivedMethod = info.compCompHnd->resolveVirtualMethod(baseMethod, objClass, ownerType);
 
-    // If we failed to get a handle, we can't devirtualize.  This can
-    // happen when prejitting, if the devirtualization crosses
-    // servicing bubble boundaries.
-    //
-    // Note if we have some way of guessing a better and more likely type we can do something similar to the code
-    // above for the case where the best jit type is an interface type.
-    if (derivedMethod == nullptr)
-    {
-        JITDUMP("--- no derived method, sorry\n");
-        return;
-    }
-
-    // Fetch method attributes to see if method is marked final.
-    DWORD      derivedMethodAttribs = info.compCompHnd->getMethodAttribs(derivedMethod);
-    const bool derivedMethodIsFinal = ((derivedMethodAttribs & CORINFO_FLG_FINAL) != 0);
+    DWORD derivedMethodAttribs = 0;
+    bool  derivedMethodIsFinal = false;
+    bool  canDevirtualize      = false;
 
 #if defined(DEBUG)
     const char* derivedClassName  = "?derivedClass";
     const char* derivedMethodName = "?derivedMethod";
+    const char* note              = "inexact or not final";
+#endif
 
-    const char* note = "inexact or not final";
-    if (isExact)
-    {
-        note = "exact";
-    }
-    else if (objClassIsFinal)
+    // If we failed to get a method handle, we can't directly devirtualize.
+    //
+    // This can happen when prejitting, if the devirtualization crosses
+    // servicing bubble boundaries, or if objClass is a shared class.
+    //
+    if (derivedMethod == nullptr)
     {
-        note = "final class";
+        JITDUMP("--- no derived method\n");
     }
-    else if (derivedMethodIsFinal)
+    else
     {
-        note = "final method";
-    }
+        // Fetch method attributes to see if method is marked final.
+        derivedMethodAttribs = info.compCompHnd->getMethodAttribs(derivedMethod);
+        derivedMethodIsFinal = ((derivedMethodAttribs & CORINFO_FLG_FINAL) != 0);
 
-    if (verbose || doPrint)
-    {
-        derivedMethodName = eeGetMethodName(derivedMethod, &derivedClassName);
-        if (verbose)
+#if defined(DEBUG)
+        if (isExact)
         {
-            printf("    devirt to %s::%s -- %s\n", derivedClassName, derivedMethodName, note);
-            gtDispTree(call);
+            note = "exact";
+        }
+        else if (objClassIsFinal)
+        {
+            note = "final class";
+        }
+        else if (derivedMethodIsFinal)
+        {
+            note = "final method";
+        }
+
+        if (verbose || doPrint)
+        {
+            derivedMethodName = eeGetMethodName(derivedMethod, &derivedClassName);
+            if (verbose)
+            {
+                printf("    devirt to %s::%s -- %s\n", derivedClassName, derivedMethodName, note);
+                gtDispTree(call);
+            }
         }
-    }
 #endif // defined(DEBUG)
 
-    const bool canDevirtualize = isExact || objClassIsFinal || (!isInterface && derivedMethodIsFinal);
+        canDevirtualize = isExact || objClassIsFinal || (!isInterface && derivedMethodIsFinal);
+    }
 
+    // We still might be able to do a guarded devirtualization.
+    // Note the call might be an interface call or a virtual call.
+    //
     if (!canDevirtualize)
     {
         JITDUMP("    Class not final or exact%s\n", isInterface ? "" : ", and method not final");
 
-        // Have we enabled guarded devirtualization by guessing the jit's best class?
-        bool guessJitBestClass = true;
-        INDEBUG(guessJitBestClass = (JitConfig.JitGuardedDevirtualizationGuessBestClass() > 0););
+        // Don't try guarded devirtualiztion if we're doing late devirtualization.
+        //
+        if (isLateDevirtualization)
+        {
+            JITDUMP("No guarded devirt during late devirtualization\n");
+            return;
+        }
+
+        JITDUMP("Consdering guarded devirt...\n");
+
+        // See if there's a likely guess for the class.
+        //
+        const unsigned       likelihoodThreshold = isInterface ? 25 : 30;
+        unsigned             likelihood          = 0;
+        unsigned             numberOfClasses     = 0;
+        CORINFO_CLASS_HANDLE likelyClass =
+            info.compCompHnd->getLikelyClass(info.compMethodHnd, baseClass, ilOffset, &likelihood, &numberOfClasses);
 
-        if (!guessJitBestClass)
+        if (likelyClass != NO_CLASS_HANDLE)
         {
-            JITDUMP("No guarded devirt: guessing for jit best class disabled\n");
+            JITDUMP("Likely class for %p (%s) is %p (%s) [likelihood:%u classes seen:%u]\n", dspPtr(objClass),
+                    objClassName, likelyClass, eeGetClassName(likelyClass), likelihood, numberOfClasses);
+        }
+        else if (derivedMethod != nullptr)
+        {
+            // If we have a derived method we can optionally guess for
+            // the class that introduces the method.
+            //
+            bool guessJitBestClass = true;
+            INDEBUG(guessJitBestClass = (JitConfig.JitGuardedDevirtualizationGuessBestClass() > 0););
+
+            if (!guessJitBestClass)
+            {
+                JITDUMP("No guarded devirt: no likely class and guessing for jit best class disabled\n");
+                return;
+            }
+
+            // We will use the class that introduced the method as our guess
+            // for the runtime class of the object.
+            //
+            // We don't know now likely this is; just choose a value that gets
+            // us past the threshold.
+            likelyClass = info.compCompHnd->getMethodClass(derivedMethod);
+            likelihood  = likelihoodThreshold;
+
+            JITDUMP("Will guess implementing class for class %p (%s) is %p (%s)!\n", dspPtr(objClass), objClassName,
+                    likelyClass, eeGetClassName(likelyClass));
+        }
+
+        // Todo: a more advanced heuristic using likelihood, number of
+        // classes, and the profile count for this block.
+        //
+        // For now we will guess if the likelihood is at least 25%/30% (intfc/virt), as studies
+        // have shown this transformation should pay off even if we guess wrong sometimes.
+        //
+        if (likelihood < likelihoodThreshold)
+        {
+            JITDUMP("Not guessing for class; likelihood is below %s call threshold %u\n", callKind,
+                    likelihoodThreshold);
             return;
         }
 
-        // Don't try guarded devirtualiztion when we're doing late devirtualization.
-        if (isLateDevirtualization)
+        // Figure out which method will be called.
+        //
+        CORINFO_CONTEXT_HANDLE ownerType   = *contextHandle;
+        CORINFO_METHOD_HANDLE likelyMethod = info.compCompHnd->resolveVirtualMethod(baseMethod, likelyClass, ownerType);
+
+        if (likelyMethod == nullptr)
         {
-            JITDUMP("No guarded devirt during late devirtualization\n");
+            JITDUMP("Can't figure out which method would be invoked, sorry\n");
             return;
         }
 
-        // We will use the class that introduced the method as our guess
-        // for the runtime class of othe object.
-        CORINFO_CLASS_HANDLE derivedClass = info.compCompHnd->getMethodClass(derivedMethod);
+        JITDUMP("%s call would invoke method %s\n", callKind, eeGetMethodName(likelyMethod, nullptr));
+
+        // Some of these may be redundant
+        //
+        DWORD likelyMethodAttribs = info.compCompHnd->getMethodAttribs(likelyMethod);
+        DWORD likelyClassAttribs  = info.compCompHnd->getClassAttribs(likelyClass);
 
         // Try guarded devirtualization.
-        addGuardedDevirtualizationCandidate(call, derivedMethod, derivedClass, derivedMethodAttribs, objClassAttribs);
+        //
+        addGuardedDevirtualizationCandidate(call, likelyMethod, likelyClass, likelyMethodAttribs, likelyClassAttribs,
+                                            likelihood);
         return;
     }
 
@@ -20897,6 +21019,14 @@ void Compiler::impDevirtualizeCall(GenTreeCall*            call,
 
     JITDUMP("    %s; can devirtualize\n", note);
 
+    // See if the method we're devirtualizing to is an intrinsic.
+    //
+    if (derivedMethodAttribs & (CORINFO_FLG_JIT_INTRINSIC | CORINFO_FLG_INTRINSIC))
+    {
+        JITDUMP("!!! Devirt to intrinsic in %s, calling %s::%s\n", impInlineRoot()->info.compFullName, derivedClassName,
+                derivedMethodName);
+    }
+
     // Make the updates.
     call->gtFlags &= ~GTF_CALL_VIRT_VTABLE;
     call->gtFlags &= ~GTF_CALL_VIRT_STUB;
@@ -21289,12 +21419,14 @@ void Compiler::addFatPointerCandidate(GenTreeCall* call)
 //    classHandle - class that will be tested for at runtime
 //    methodAttr - attributes of the method
 //    classAttr - attributes of the class
+//    likelihood - odds that this class is the class seen at runtime
 //
 void Compiler::addGuardedDevirtualizationCandidate(GenTreeCall*          call,
                                                    CORINFO_METHOD_HANDLE methodHandle,
                                                    CORINFO_CLASS_HANDLE  classHandle,
                                                    unsigned              methodAttr,
-                                                   unsigned              classAttr)
+                                                   unsigned              classAttr,
+                                                   unsigned              likelihood)
 {
     // This transformation only makes sense for virtual calls
     assert(call->IsVirtual());
@@ -21334,24 +21466,46 @@ void Compiler::addGuardedDevirtualizationCandidate(GenTreeCall*          call,
         return;
     }
 
+#ifdef DEBUG
+
+    // See if disabled by range
+    //
+    static ConfigMethodRange JitGuardedDevirtualizationRange;
+    JitGuardedDevirtualizationRange.EnsureInit(JitConfig.JitGuardedDevirtualizationRange());
+    assert(!JitGuardedDevirtualizationRange.Error());
+    if (!JitGuardedDevirtualizationRange.Contains(impInlineRoot()->info.compMethodHash()))
+    {
+        JITDUMP("NOT Marking call [%06u] as guarded devirtualization candidate -- excluded by "
+                "JitGuardedDevirtualizationRange",
+                dspTreeID(call));
+        return;
+    }
+
+#endif
+
     // We're all set, proceed with candidate creation.
+    //
     JITDUMP("Marking call [%06u] as guarded devirtualization candidate; will guess for class %s\n", dspTreeID(call),
             eeGetClassName(classHandle));
     setMethodHasGuardedDevirtualization();
     call->SetGuardedDevirtualizationCandidate();
 
     // Spill off any GT_RET_EXPR subtrees so we can clone the call.
+    //
     SpillRetExprHelper helper(this);
     helper.StoreRetExprResultsInArgs(call);
 
     // Gather some information for later. Note we actually allocate InlineCandidateInfo
     // here, as the devirtualized half of this call will likely become an inline candidate.
+    //
     GuardedDevirtualizationCandidateInfo* pInfo = new (this, CMK_Inlining) InlineCandidateInfo;
 
     pInfo->guardedMethodHandle = methodHandle;
     pInfo->guardedClassHandle  = classHandle;
+    pInfo->likelihood          = likelihood;
 
     // Save off the stub address since it shares a union with the candidate info.
+    //
     if (call->IsVirtualStub())
     {
         JITDUMP("Saving stub addr %p in candidate info\n", dspPtr(call->gtStubCallStubAddr));
index ebbce23..156d7c9 100644 (file)
@@ -192,6 +192,7 @@ private:
             thenBlock      = nullptr;
             elseBlock      = nullptr;
             origCall       = nullptr;
+            likelihood     = HIGH_PROBABILITY;
         }
 
         //------------------------------------------------------------------------
@@ -204,7 +205,7 @@ private:
 
         void Transform()
         {
-            JITDUMP("*** %s: transforming" FMT_STMT "\n", Name(), stmt->GetID());
+            JITDUMP("*** %s: transforming " FMT_STMT "\n", Name(), stmt->GetID());
             FixupRetExpr();
             ClearFlag();
             CreateRemainder();
@@ -228,9 +229,8 @@ private:
         //
         void CreateRemainder()
         {
-            remainderBlock          = compiler->fgSplitBlockAfterStatement(currBlock, stmt);
-            unsigned propagateFlags = currBlock->bbFlags & BBF_GC_SAFE_POINT;
-            remainderBlock->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL | propagateFlags;
+            remainderBlock = compiler->fgSplitBlockAfterStatement(currBlock, stmt);
+            remainderBlock->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL | BBF_INTERNAL;
         }
 
         virtual void CreateCheck() = 0;
@@ -248,11 +248,7 @@ private:
         BasicBlock* CreateAndInsertBasicBlock(BBjumpKinds jumpKind, BasicBlock* insertAfter)
         {
             BasicBlock* block = compiler->fgNewBBafter(jumpKind, insertAfter, true);
-            if ((insertAfter->bbFlags & BBF_INTERNAL) == 0)
-            {
-                block->bbFlags &= ~BBF_INTERNAL;
-                block->bbFlags |= BBF_IMPORTED;
-            }
+            block->bbFlags |= BBF_IMPORTED;
             return block;
         }
 
@@ -274,8 +270,8 @@ private:
         {
             remainderBlock->inheritWeight(currBlock);
             checkBlock->inheritWeight(currBlock);
-            thenBlock->inheritWeightPercentage(currBlock, HIGH_PROBABILITY);
-            elseBlock->inheritWeightPercentage(currBlock, 100 - HIGH_PROBABILITY);
+            thenBlock->inheritWeightPercentage(currBlock, likelihood);
+            elseBlock->inheritWeightPercentage(currBlock, 100 - likelihood);
         }
 
         //------------------------------------------------------------------------
@@ -296,6 +292,7 @@ private:
         BasicBlock*  elseBlock;
         Statement*   stmt;
         GenTreeCall* origCall;
+        unsigned     likelihood;
 
         const int HIGH_PROBABILITY = 80;
     };
@@ -546,6 +543,10 @@ private:
                 return;
             }
 
+            likelihood = origCall->gtGuardedDevirtualizationCandidateInfo->likelihood;
+            assert((likelihood >= 0) && (likelihood <= 100));
+            JITDUMP("Likelihood of correct guess is %u\n", likelihood);
+
             Transform();
         }
 
@@ -688,7 +689,7 @@ private:
             thenBlock->bbFlags |= currBlock->bbFlags & BBF_SPLIT_GAINED;
 
             InlineCandidateInfo* inlineInfo = origCall->gtInlineCandidateInfo;
-            CORINFO_CLASS_HANDLE clsHnd     = inlineInfo->clsHandle;
+            CORINFO_CLASS_HANDLE clsHnd     = inlineInfo->guardedClassHandle;
 
             // copy 'this' to temp with exact type.
             const unsigned thisTemp  = compiler->lvaGrabTemp(false DEBUGARG("guarded devirt this exact temp"));
@@ -721,8 +722,9 @@ private:
             assert(!call->IsVirtual());
 
             // Re-establish this call as an inline candidate.
-            GenTree* oldRetExpr         = inlineInfo->retExpr;
-            inlineInfo->clsHandle       = clsHnd;
+            GenTree* oldRetExpr = inlineInfo->retExpr;
+            // Todo -- pass this back from impdevirt...?
+            inlineInfo->clsHandle       = compiler->info.compCompHnd->getMethodClass(methodHnd);
             inlineInfo->exactContextHnd = context;
             call->gtInlineCandidateInfo = inlineInfo;
 
index 54d4237..82da668 100644 (file)
@@ -512,14 +512,24 @@ private:
     bool                  m_Reported;
 };
 
-// GuardedDevirtualizationCandidateInfo provides information about
-// a potential target of a virtual call.
+// ClassProfileCandidateInfo provides information about
+// profiling an indirect or virtual call.
+//
+struct ClassProfileCandidateInfo
+{
+    IL_OFFSET ilOffset;
+    unsigned  probeIndex;
+    void*     stubAddr;
+};
 
-struct GuardedDevirtualizationCandidateInfo
+// GuardedDevirtualizationCandidateInfo provides information about
+// a potential target of a virtual or interface call.
+//
+struct GuardedDevirtualizationCandidateInfo : ClassProfileCandidateInfo
 {
     CORINFO_CLASS_HANDLE  guardedClassHandle;
     CORINFO_METHOD_HANDLE guardedMethodHandle;
-    void*                 stubAddr;
+    unsigned              likelihood;
 };
 
 // InlineCandidateInfo provides basic information about a particular
@@ -527,7 +537,7 @@ struct GuardedDevirtualizationCandidateInfo
 //
 // It is a superset of GuardedDevirtualizationCandidateInfo: calls
 // can start out as GDv candidates and turn into inline candidates
-
+//
 struct InlineCandidateInfo : public GuardedDevirtualizationCandidateInfo
 {
     CORINFO_METHOD_INFO    methInfo;
index 4e4b298..7b14eb3 100644 (file)
@@ -416,8 +416,10 @@ CONFIG_INTEGER(JitEnableGuardedDevirtualization, W("JitEnableGuardedDevirtualiza
 
 #if defined(DEBUG)
 // Various policies for GuardedDevirtualization
+CONFIG_STRING(JitGuardedDevirtualizationRange, W("JitGuardedDevirtualizationRange"))
 CONFIG_INTEGER(JitGuardedDevirtualizationGuessUniqueInterface, W("JitGuardedDevirtualizationGuessUniqueInterface"), 1)
 CONFIG_INTEGER(JitGuardedDevirtualizationGuessBestClass, W("JitGuardedDevirtualizationGuessBestClass"), 1)
+CONFIG_INTEGER(JitGuardedDeivrtualizationUseProfile, W("JitGuardedDevirtualizationUseProfile"), 0)
 #endif // DEBUG
 
 // Enable insertion of patchpoints into Tier0 methods with loops.
@@ -425,6 +427,10 @@ CONFIG_INTEGER(TC_OnStackReplacement, W("TC_OnStackReplacement"), 0)
 // Initial patchpoint counter value used by jitted code
 CONFIG_INTEGER(TC_OnStackReplacement_InitialCounter, W("TC_OnStackReplacement_InitialCounter"), 1000)
 
+// Profile instrumentation options
+CONFIG_INTEGER(JitMinimalProfiling, W("JitMinimalProfiling"), 0)
+CONFIG_INTEGER(JitClassProfiling, W("JitClassProfiling"), 0)
+
 #if defined(DEBUG)
 // JitFunctionFile: Name of a file that contains a list of functions. If the currently compiled function is in the
 // file, certain other JIT config variables will be active. If the currently compiled function is not in the file,
index 790ec3e..8ce03ea 100644 (file)
@@ -93,11 +93,7 @@ private:
     BasicBlock* CreateAndInsertBasicBlock(BBjumpKinds jumpKind, BasicBlock* insertAfter)
     {
         BasicBlock* block = compiler->fgNewBBafter(jumpKind, insertAfter, true);
-        if ((insertAfter->bbFlags & BBF_INTERNAL) == 0)
-        {
-            block->bbFlags &= ~BBF_INTERNAL;
-            block->bbFlags |= BBF_IMPORTED;
-        }
+        block->bbFlags |= BBF_IMPORTED;
         return block;
     }
 
@@ -138,6 +134,7 @@ private:
         block->bbJumpKind = BBJ_COND;
         block->bbJumpDest = remainderBlock;
         helperBlock->bbFlags |= BBF_BACKWARD_JUMP;
+        block->bbFlags |= BBF_INTERNAL;
 
         // Update weights
         remainderBlock->inheritWeight(block);
index 112367a..2a50680 100644 (file)
@@ -169,6 +169,11 @@ public:
         }
     }
 
+    bool IsEmpty() const
+    {
+        return m_lastRange == 0;
+    }
+
     // Error checks
     bool Error() const
     {
index 1c8c7ba..511ea87 100644 (file)
@@ -2441,6 +2441,21 @@ namespace Internal.JitInterface
         }
 
         [UnmanagedCallersOnly]
+        static CORINFO_CLASS_STRUCT_* _getLikelyClass(IntPtr thisHandle, IntPtr* ppException, CORINFO_METHOD_STRUCT_* ftnHnd, CORINFO_CLASS_STRUCT_* baseHnd, uint ilOffset, uint* pLikelihood, uint* pNumberOfClasses)
+        {
+            var _this = GetThis(thisHandle);
+            try
+            {
+                return _this.getLikelyClass(ftnHnd, baseHnd, ilOffset, ref *pLikelihood, ref *pNumberOfClasses);
+            }
+            catch (Exception ex)
+            {
+                *ppException = _this.AllocException(ex);
+                return default;
+            }
+        }
+
+        [UnmanagedCallersOnly]
         static void _recordCallSite(IntPtr thisHandle, IntPtr* ppException, uint instrOffset, CORINFO_SIG_INFO* callSig, CORINFO_METHOD_STRUCT_* methodHandle)
         {
             var _this = GetThis(thisHandle);
@@ -2516,7 +2531,7 @@ namespace Internal.JitInterface
 
         static IntPtr GetUnmanagedCallbacks()
         {
-            void** callbacks = (void**)Marshal.AllocCoTaskMem(sizeof(IntPtr) * 170);
+            void** callbacks = (void**)Marshal.AllocCoTaskMem(sizeof(IntPtr) * 171);
 
             callbacks[0] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_METHOD_STRUCT_*, uint>)&_getMethodAttribs;
             callbacks[1] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_METHOD_STRUCT_*, CorInfoMethodRuntimeFlags, void>)&_setMethodAttribs;
@@ -2683,11 +2698,12 @@ namespace Internal.JitInterface
             callbacks[162] = (delegate* unmanaged<IntPtr, IntPtr*, CorJitResult, void>)&_reportFatalError;
             callbacks[163] = (delegate* unmanaged<IntPtr, IntPtr*, uint, BlockCounts**, HRESULT>)&_allocMethodBlockCounts;
             callbacks[164] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_METHOD_STRUCT_*, uint*, BlockCounts**, uint*, HRESULT>)&_getMethodBlockCounts;
-            callbacks[165] = (delegate* unmanaged<IntPtr, IntPtr*, uint, CORINFO_SIG_INFO*, CORINFO_METHOD_STRUCT_*, void>)&_recordCallSite;
-            callbacks[166] = (delegate* unmanaged<IntPtr, IntPtr*, void*, void*, ushort, ushort, int, void>)&_recordRelocation;
-            callbacks[167] = (delegate* unmanaged<IntPtr, IntPtr*, void*, ushort>)&_getRelocTypeHint;
-            callbacks[168] = (delegate* unmanaged<IntPtr, IntPtr*, uint>)&_getExpectedTargetArchitecture;
-            callbacks[169] = (delegate* unmanaged<IntPtr, IntPtr*, CORJIT_FLAGS*, uint, uint>)&_getJitFlags;
+            callbacks[165] = (delegate* unmanaged<IntPtr, IntPtr*, CORINFO_METHOD_STRUCT_*, CORINFO_CLASS_STRUCT_*, uint, uint*, uint*, CORINFO_CLASS_STRUCT_*>)&_getLikelyClass;
+            callbacks[166] = (delegate* unmanaged<IntPtr, IntPtr*, uint, CORINFO_SIG_INFO*, CORINFO_METHOD_STRUCT_*, void>)&_recordCallSite;
+            callbacks[167] = (delegate* unmanaged<IntPtr, IntPtr*, void*, void*, ushort, ushort, int, void>)&_recordRelocation;
+            callbacks[168] = (delegate* unmanaged<IntPtr, IntPtr*, void*, ushort>)&_getRelocTypeHint;
+            callbacks[169] = (delegate* unmanaged<IntPtr, IntPtr*, uint>)&_getExpectedTargetArchitecture;
+            callbacks[170] = (delegate* unmanaged<IntPtr, IntPtr*, CORJIT_FLAGS*, uint, uint>)&_getJitFlags;
 
             return (IntPtr)callbacks;
         }
index 348c59e..7576357 100644 (file)
@@ -321,6 +321,7 @@ FUNCTIONS
     void reportFatalError(CorJitResult result)
     HRESULT allocMethodBlockCounts(UINT32 count, ICorJitInfo::BlockCounts** pBlockCounts)
     HRESULT getMethodBlockCounts(CORINFO_METHOD_HANDLE ftnHnd, UINT32* pCount, ICorJitInfo::BlockCounts** pBlockCounts, UINT32* pNumRuns)
+    CORINFO_CLASS_HANDLE getLikelyClass(CORINFO_METHOD_HANDLE ftnHnd, CORINFO_CLASS_HANDLE baseHnd, UINT32 ilOffset, UINT32* pLikelihood, UINT32* pNumberOfClasses)
     void recordCallSite(ULONG instrOffset, CORINFO_SIG_INFO* callSig, CORINFO_METHOD_HANDLE methodHandle)
     void recordRelocation(void* location, void* target, WORD fRelocType, WORD slotNum, INT32 addlDelta)
     WORD getRelocTypeHint(void* target)
index 90f7658..22d2d85 100644 (file)
@@ -2341,6 +2341,11 @@ namespace Internal.JitInterface
         private HRESULT getMethodBlockCounts(CORINFO_METHOD_STRUCT_* ftnHnd, ref uint pCount, ref BlockCounts* pBlockCounts, ref uint pNumRuns)
         { throw new NotImplementedException("getBBProfileData"); }
 
+        private CORINFO_CLASS_STRUCT_* getLikelyClass(CORINFO_METHOD_STRUCT_* ftnHnd, CORINFO_CLASS_STRUCT_* baseHnd, uint IlOffset, ref uint pLikelihood, ref uint pNumberOfClasses)
+        {
+            return null;
+        }
+
         private void getAddressOfPInvokeTarget(CORINFO_METHOD_STRUCT_* method, ref CORINFO_CONST_LOOKUP pLookup)
         {
             MethodDesc methodDesc = HandleToObject(method);
index ecb7e88..3782856 100644 (file)
@@ -171,6 +171,7 @@ struct JitInterfaceCallbacks
     void (* reportFatalError)(void * thisHandle, CorInfoException** ppException, int result);
     int (* allocMethodBlockCounts)(void * thisHandle, CorInfoException** ppException, unsigned int count, void** pBlockCounts);
     int (* getMethodBlockCounts)(void * thisHandle, CorInfoException** ppException, void* ftnHnd, unsigned int* pCount, void** pBlockCounts, unsigned int* pNumRuns);
+    void* (* getLikelyClass)(void * thisHandle, CorInfoException** ppException, void* ftnHnd, void* baseHnd, unsigned int ilOffset, unsigned int* pLikelihood, unsigned int* pNumberOfClasses);
     void (* recordCallSite)(void * thisHandle, CorInfoException** ppException, unsigned int instrOffset, void* callSig, void* methodHandle);
     void (* recordRelocation)(void * thisHandle, CorInfoException** ppException, void* location, void* target, unsigned short fRelocType, unsigned short slotNum, int addlDelta);
     unsigned short (* getRelocTypeHint)(void * thisHandle, CorInfoException** ppException, void* target);
@@ -1608,6 +1609,15 @@ public:
         return _ret;
     }
 
+    virtual void* getLikelyClass(void* ftnHnd, void* baseHnd, unsigned int ilOffset, unsigned int* pLikelihood, unsigned int* pNumberOfClasses)
+    {
+        CorInfoException* pException = nullptr;
+        void* _ret = _callbacks->getLikelyClass(_thisHandle, &pException, ftnHnd, baseHnd, ilOffset, pLikelihood, pNumberOfClasses);
+        if (pException != nullptr)
+            throw pException;
+        return _ret;
+    }
+
     virtual void recordCallSite(unsigned int instrOffset, void* callSig, void* methodHandle)
     {
         CorInfoException* pException = nullptr;
index cdb4543..f922a50 100644 (file)
@@ -26,11 +26,11 @@ private:
     uint64_t corJitFlags;
 };
 
-static const GUID JITEEVersionIdentifier ={ /* 8031aa05-4568-40fc-a0d2-d971d8edba16 */
-    0x8031aa05,
-    0x4568,
-    0x40fc,
-    {0xa0, 0xd2, 0xd9, 0x71, 0xd8, 0xed, 0xba, 0x16}
+static const GUID JITEEVersionIdentifier = { /* 0d235fe4-65a1-487a-8553-c845496da901 */
+    0x0d235fe4,
+    0x65a1,
+    0x487a,
+    {0x85, 0x53, 0xc8, 0x45, 0x49, 0x6d, 0xa9, 0x01}
 };
 
 class Jit
index c101e43..0ef036d 100644 (file)
@@ -55,6 +55,7 @@
 #include "runtimehandles.h"
 #include "castcache.h"
 #include "onstackreplacement.h"
+#include "pgo.h"
 
 //========================================================================
 //
@@ -5233,6 +5234,75 @@ void JIT_Patchpoint(int* counter, int ilOffset)
 
 #endif // FEATURE_ON_STACK_REPLACEMENT
 
+HCIMPL2(void, JIT_ClassProfile, Object *obj, void* tableAddress)
+{
+    FCALL_CONTRACT;
+    FC_GC_POLL_NOT_NEEDED();
+
+    OBJECTREF objRef = ObjectToOBJECTREF(obj);
+    VALIDATEOBJECTREF(objRef);
+
+    ICorJitInfo::ClassProfile* const classProfile = (ICorJitInfo::ClassProfile*) tableAddress;
+    volatile unsigned* pCount = (volatile unsigned*) &classProfile->Count;
+    const unsigned count = *pCount++;
+    const unsigned S = ICorJitInfo::ClassProfile::SIZE;
+    const unsigned N = ICorJitInfo::ClassProfile::SAMPLE_INTERVAL;
+    _ASSERTE(N >= S);
+
+    if (objRef == NULL)
+    {
+        return;
+    }
+
+    CORINFO_CLASS_HANDLE clsHnd = (CORINFO_CLASS_HANDLE)objRef->GetMethodTable();
+
+#ifdef _DEBUG
+    PgoManager::VerifyAddress(classProfile);
+    PgoManager::VerifyAddress(classProfile + 1);
+#endif
+
+    // If table is not yet full, just add entries in.
+    //
+    if (count < S)
+    {
+        classProfile->ClassTable[count] = clsHnd;
+    }
+    else
+    {
+        // generate a random number (xorshift32)
+        //
+        // intentionally simple so we can have multithreaded
+        // access w/o tearing state.
+        //
+        static volatile unsigned s_rng = 100;
+        
+        unsigned x = s_rng;
+        x ^= x << 13;
+        x ^= x >> 17;
+        x ^= x << 5;
+        s_rng = x;
+        
+        // N is the sampling window size,
+        // it should be larger than the table size.
+        //
+        // If we let N == count then we are building an entire
+        // run sample -- probability of update decreases over time.
+        // Would be a good strategy for an AOT profiler.
+        //
+        // But for TieredPGO we would prefer something that is more
+        // weighted to recent observations.
+        //
+        // For S=4, N=128, we'll sample (on average) every 32nd call.
+        //
+        if ((x % N) < S)
+        {
+            unsigned i = x % S;
+            classProfile->ClassTable[i] = clsHnd;
+        }
+    }
+}
+HCIMPLEND
+
 //========================================================================
 //
 //      INTEROP HELPERS
index 3824a1f..6515a79 100644 (file)
@@ -11924,6 +11924,54 @@ HRESULT CEEJitInfo::getMethodBlockCounts (
     return hr;
 }
 
+CORINFO_CLASS_HANDLE CEEJitInfo::getLikelyClass(
+                     CORINFO_METHOD_HANDLE ftnHnd,
+                     CORINFO_CLASS_HANDLE  baseHnd,
+                     UINT32                ilOffset,
+                     UINT32 *              pLikelihood,
+                     UINT32 *              pNumberOfClasses
+)
+{
+    CONTRACTL {
+        THROWS;
+        GC_TRIGGERS;
+        MODE_PREEMPTIVE;
+    } CONTRACTL_END;
+
+    CORINFO_CLASS_HANDLE result = NULL;
+    *pLikelihood = 0;
+    *pNumberOfClasses = 0;
+
+    JIT_TO_EE_TRANSITION();
+
+#ifdef FEATURE_PGO
+
+    // Query the PGO manager's per call site class profile.
+    //
+    MethodDesc* pMD = (MethodDesc*)ftnHnd;
+    unsigned codeSize = 0;
+    if (pMD->IsDynamicMethod())
+    {
+        unsigned stackSize, ehSize;
+        CorInfoOptions options;
+        DynamicResolver * pResolver = m_pMethodBeingCompiled->AsDynamicMethodDesc()->GetResolver();
+        pResolver->GetCodeInfo(&codeSize, &stackSize, &options, &ehSize);
+    }
+    else if (pMD->HasILHeader())
+    {
+        COR_ILMETHOD_DECODER decoder(pMD->GetILHeader());
+        codeSize = decoder.GetCodeSize();
+    }
+
+    result = PgoManager::getLikelyClass(pMD, codeSize, ilOffset, pLikelihood, pNumberOfClasses);
+
+#endif
+
+    EE_TO_JIT_TRANSITION();
+
+    return result;
+}
+
 void CEEJitInfo::allocMem (
     ULONG               hotCodeSize,    /* IN */
     ULONG               coldCodeSize,   /* IN */
@@ -12660,7 +12708,13 @@ CORJIT_FLAGS GetCompileFlags(MethodDesc * ftn, CORJIT_FLAGS flags, CORINFO_METHO
 
 #ifdef FEATURE_PGO
 
-    if (CLRConfig::GetConfigValue(CLRConfig::INTERNAL_WritePGOData) > 0)
+    // Instrument, if
+    // 
+    // * We're writing pgo data and we're jitting at Tier0.
+    // * Tiered PGO is enabled and we're jitting at Tier0.
+    //
+    if ((CLRConfig::GetConfigValue(CLRConfig::INTERNAL_WritePGOData) > 0)
+        && flags.IsSet(CORJIT_FLAGS::CORJIT_FLAG_TIER0))
     {
         flags.Set(CORJIT_FLAGS::CORJIT_FLAG_BBINSTR);
     }
@@ -14159,6 +14213,17 @@ HRESULT CEEInfo::getMethodBlockCounts(
     UNREACHABLE_RET();      // only called on derived class.
 }
 
+CORINFO_CLASS_HANDLE CEEInfo::getLikelyClass(
+                     CORINFO_METHOD_HANDLE ftnHnd,
+                     CORINFO_CLASS_HANDLE  baseHnd,
+                     UINT32                ilOffset,
+                     UINT32*               pLikelihood,
+                     UINT32*               pNumberOfCases
+)
+{
+    LIMITED_METHOD_CONTRACT;
+    UNREACHABLE_RET();      // only called on derived class.
+}
 
 void CEEInfo::recordCallSite(
         ULONG                 instrOffset,  /* IN */
index ccdafd0..7518004 100644 (file)
@@ -1045,6 +1045,14 @@ public:
             UINT32 *              pNumRuns
             );
 
+    CORINFO_CLASS_HANDLE getLikelyClass(
+            CORINFO_METHOD_HANDLE ftnHnd,
+            CORINFO_CLASS_HANDLE  baseHnd,
+            UINT32                ilOffset,            
+            UINT32 *              pLikelihood,
+            UINT32 *              pNumberOfClasses
+            );
+
     void recordCallSite(
             ULONG                 instrOffset,  /* IN */
             CORINFO_SIG_INFO *    callSig,      /* IN */
@@ -1247,6 +1255,14 @@ public:
         UINT32 *                      pNumRuns
     );
 
+    CORINFO_CLASS_HANDLE getLikelyClass(
+            CORINFO_METHOD_HANDLE ftnHnd,
+            CORINFO_CLASS_HANDLE  baseHnd,
+            UINT32                ilOffset,
+            UINT32 *              pLikelihood,
+            UINT32 *              pNumberOfClasses
+            );
+
     void recordCallSite(
             ULONG                     instrOffset,  /* IN */
             CORINFO_SIG_INFO *        callSig,      /* IN */
index 16d57f2..3b7ab17 100644 (file)
@@ -8,11 +8,82 @@
 #ifdef FEATURE_PGO
 
 ICorJitInfo::BlockCounts* PgoManager::s_PgoData;
-unsigned                  PgoManager::s_PgoIndex;
+unsigned volatile         PgoManager::s_PgoIndex;
 const char* const         PgoManager::s_FileHeaderString  = "*** START PGO Data, max index = %u ***\n";
 const char* const         PgoManager::s_FileTrailerString = "*** END PGO Data ***\n";
 const char* const         PgoManager::s_MethodHeaderString = "@@@ token 0x%08X hash 0x%08X ilSize 0x%08X records 0x%08X index %u\n";
 const char* const         PgoManager::s_RecordString = "ilOffs %u count %u\n";
+const char* const         PgoManager::s_ClassProfileHeader = "classProfile iloffs %u samples %u entries %u totalCount %u %s\n";
+const char* const         PgoManager::s_ClassProfileEntry = "class %p (%s) count %u\n";
+
+// Data item in class profile histogram
+//
+struct HistogramEntry
+{
+    // Class that was observed at runtime
+    CORINFO_CLASS_HANDLE m_mt;
+    // Number of observations in the table
+    unsigned             m_count;
+};
+
+// Summarizes a ClassProfile table by forming a Histogram
+//
+struct Histogram
+{
+    Histogram(const ICorJitInfo::ClassProfile* classProfile);
+
+    // Number of nonzero entries in the histogram
+    unsigned m_count;
+    // Sum of counts from all entries in the histogram
+    unsigned m_totalCount;
+    // Histogram entries, in no particular order.
+    // The first m_count of these will be valid.
+    HistogramEntry m_histogram[ICorJitInfo::ClassProfile::SIZE];
+};
+
+Histogram::Histogram(const ICorJitInfo::ClassProfile* classProfile)
+{
+    m_count = 0;
+    m_totalCount = 0;
+
+    for (unsigned k = 0; k < ICorJitInfo::ClassProfile::SIZE; k++)
+    {
+        CORINFO_CLASS_HANDLE currentEntry = classProfile->ClassTable[k];
+        
+        if (currentEntry == NULL)
+        {
+            continue;
+        }
+        
+        m_totalCount++;
+        
+        bool found = false;
+        unsigned h = 0;
+        for(; h < m_count; h++)
+        {
+            if (m_histogram[h].m_mt == currentEntry)
+            {
+                m_histogram[h].m_count++;
+                found = true;
+                break;
+            }
+        }
+        
+        if (!found)
+        {
+            m_histogram[h].m_mt = currentEntry;
+            m_histogram[h].m_count = 1;
+            m_count++;
+        }
+    }
+
+    // Zero the remainder
+    for (unsigned k = m_count; k < ICorJitInfo::ClassProfile::SIZE; k++)
+    {
+        m_histogram[k].m_mt = 0;
+        m_histogram[k].m_count = 0;
+    }
+}
 
 void PgoManager::Initialize()
 {
@@ -36,6 +107,12 @@ void PgoManager::Shutdown()
     WritePgoData();
 }
 
+void PgoManager::VerifyAddress(void* address)
+{
+    _ASSERTE(address > s_PgoData);
+    _ASSERTE(address <= s_PgoData + BUFFER_SIZE);
+}
+
 void PgoManager::WritePgoData()
 {
     if (CLRConfig::GetConfigValue(CLRConfig::INTERNAL_WritePGOData) == 0)
@@ -47,7 +124,6 @@ void PgoManager::WritePgoData()
     {
         return;
     }
-
     CLRConfigStringHolder fileName(CLRConfig::GetConfigValue(CLRConfig::INTERNAL_PGODataPath));
 
     if (fileName == NULL)
@@ -80,15 +156,79 @@ void PgoManager::WritePgoData()
 
         index += 2;
 
-        ICorJitInfo::BlockCounts* records     = &s_PgoData[index];
-        unsigned                  recordCount = header->recordCount - 2;
-        unsigned                  lastOffset  = 0;
-        for (unsigned i = 0; i < recordCount; i++)
+        ICorJitInfo::BlockCounts* records         = &s_PgoData[index];
+        unsigned                  recordCount     = header->recordCount - 2;
+        unsigned                  lastOffset      = 0;
+        bool                      hasClassProfile = false;
+        unsigned                  i               = 0;
+
+        while (i < recordCount)
         {
             const unsigned thisOffset = records[i].ILOffset;
-            assert((thisOffset > lastOffset) || (lastOffset == 0));
+
+
+            if ((thisOffset & ICorJitInfo::ClassProfile::CLASS_FLAG) != 0)
+            {
+                // remainder must be class probe data
+                hasClassProfile = true;
+                break;
+            }
+
             lastOffset = thisOffset;
             fprintf(pgoDataFile, s_RecordString, records[i].ILOffset, records[i].ExecutionCount);
+            i++;
+        }
+
+        if (hasClassProfile)
+        {
+            fflush(pgoDataFile);
+
+            // Write out histogram of each probe's data.
+            // We currently don't expect to be able to read this back in.
+            // 
+            while (i < recordCount)
+            {
+                // Should be enough room left for a class profile.
+                _ASSERTE(i + sizeof(ICorJitInfo::ClassProfile) / sizeof(ICorJitInfo::BlockCounts) <= recordCount);
+
+                const ICorJitInfo::ClassProfile* classProfile = (ICorJitInfo::ClassProfile*)&s_PgoData[i + index];
+
+                // Form a histogram...
+                //
+                Histogram h(classProfile);
+
+                // And display...
+                //
+                // Figure out if this is a virtual or interface probe.
+                //
+                const char* profileType = "virtual";
+
+                if ((classProfile->ILOffset & ICorJitInfo::ClassProfile::INTERFACE_FLAG) != 0)
+                {
+                    profileType = "interface";
+                }
+
+                // "classProfile iloffs %u samples %u entries %u totalCount %u %s\n";
+                //
+                fprintf(pgoDataFile, s_ClassProfileHeader, (classProfile->ILOffset & ICorJitInfo::ClassProfile::OFFSET_MASK),
+                    classProfile->Count, h.m_count, h.m_totalCount, profileType);
+
+                for (unsigned j = 0; j < h.m_count; j++)
+                {
+                    CORINFO_CLASS_HANDLE clsHnd = h.m_histogram[j].m_mt;
+                    const char* className = "n/a";
+#ifdef _DEBUG
+                    TypeHandle typeHnd(clsHnd);
+                    MethodTable* pMT = typeHnd.AsMethodTable();
+                    className = pMT->GetDebugClassName();
+#endif
+                    fprintf(pgoDataFile, s_ClassProfileEntry, clsHnd, className, h.m_histogram[j].m_count);
+                }
+
+                // Advance to next entry.
+                //
+                i += sizeof(ICorJitInfo::ClassProfile) / sizeof(ICorJitInfo::BlockCounts);
+            }
         }
 
         index += recordCount;
@@ -179,7 +319,7 @@ void PgoManager::ReadPgoData()
             continue;
         }
 
-        assert(index == rIndex);
+        _ASSERTE(index == rIndex);
         methods++;
 
         // If there's not enough room left, bail
@@ -218,8 +358,13 @@ void PgoManager::ReadPgoData()
 
             if (sscanf_s(buffer, s_RecordString, &s_PgoData[index].ILOffset, &s_PgoData[index].ExecutionCount) != 2)
             {
-                failed = true;
-                break;
+                // This might be class profile data; if so just skip it.
+                //
+                if (strstr(buffer, "class") != buffer)
+                {
+                    failed = true;
+                    break;
+                }
             }
 
             index++;
@@ -342,6 +487,172 @@ HRESULT PgoManager::getMethodBlockCounts(MethodDesc* pMD, unsigned ilSize, UINT3
     return E_NOTIMPL;
 }
 
+// See if there is a class profile for this method at the indicated il Offset.
+// If so, return the most frequently seen class, along with the likelihood that
+// it was the class seen, and the total number of classes seen.
+//
+// Return NULL if there is no profile data to be found.
+//
+CORINFO_CLASS_HANDLE PgoManager::getLikelyClass(MethodDesc* pMD, unsigned ilSize, unsigned ilOffset, UINT32* pLikelihood, UINT32* pNumberOfClasses)
+{
+    *pLikelihood = 0;
+    *pNumberOfClasses = 0;
+
+    // Bail if there's no profile data.
+    //
+    if (s_PgoData == NULL)
+    {
+        return NULL;
+    }
+
+    // See if we can find profile data for this method in the profile buffer.
+    //
+    const unsigned maxIndex = s_PgoIndex;
+    const unsigned token    = pMD->IsDynamicMethod() ? 0 : pMD->GetMemberDef();
+    const unsigned hash     = pMD->GetStableHash();
+
+    unsigned index = 0;
+    unsigned methodsChecked = 0;
+
+    while (index < maxIndex)
+    {
+        // The first two "records" of each entry are actually header data
+        // to identify the method.
+        //
+        Header* const header = (Header*)&s_PgoData[index];
+
+        // Sanity check that header data looks reasonable. If not, just
+        // fail the lookup.
+        //
+        if ((header->recordCount < MIN_RECORD_COUNT) || (header->recordCount > MAX_RECORD_COUNT))
+        {
+            break;
+        }
+
+        // See if the header info matches the current method.
+        //
+        if ((header->token == token) && (header->hash == hash) && (header->ilSize == ilSize))
+        {
+            // Yep, found data. See if there is a suitable class profile.
+            //
+            // This bit is currently somewhat hacky ... we scan the records, the count records come
+            // first and are in increasing IL offset order. Class profiles have inverted IL offsets
+            // so when we find an offset with high bit set, it's going to be an class profile.
+            //
+            unsigned countILOffset = 0;
+            unsigned j = 2;
+
+            // Skip past all the count entries
+            //
+            while (j < header->recordCount)
+            {
+                if ((s_PgoData[index + j].ILOffset & ICorJitInfo::ClassProfile::CLASS_FLAG) != 0)
+                {
+                    break;
+                }
+
+                countILOffset = s_PgoData[index + j].ILOffset;
+                j++;
+            }
+
+            // Now we're in the "class profile" portion of the slab for this method.
+            // Look for the one that has the right IL offset.
+            //
+            while (j < header->recordCount)
+            {
+                const ICorJitInfo::ClassProfile* const classProfile = (ICorJitInfo::ClassProfile*)&s_PgoData[index + j];
+
+                if ((classProfile->ILOffset & ICorJitInfo::ClassProfile::OFFSET_MASK) != ilOffset)
+                {
+                    // Need to make sure this is even divisor
+                    //
+                    j += sizeof(ICorJitInfo::ClassProfile) / sizeof(ICorJitInfo::BlockCounts);
+                    continue;
+                }
+
+                // Form a histogram
+                //
+                Histogram h(classProfile);
+
+                // Use histogram count as number of classes estimate
+                //
+                *pNumberOfClasses = h.m_count;
+
+                // Report back what we've learned
+                // (perhaps, use count to augment likelihood?)
+                // 
+                switch (h.m_count)
+                {
+                    case 0:
+                    {
+                        return NULL;
+                    }
+                    break;
+
+                    case 1:
+                    {
+                        *pLikelihood = 100;
+                        return h.m_histogram[0].m_mt;
+                    }
+                    break;
+
+                    case 2:
+                    {
+                        if (h.m_histogram[0].m_count >= h.m_histogram[1].m_count)
+                        {
+                            *pLikelihood = (100 * h.m_histogram[0].m_count) / h.m_totalCount;
+                            return h.m_histogram[0].m_mt;
+                        }
+                        else
+                        {
+                            *pLikelihood = (100 * h.m_histogram[1].m_count) / h.m_totalCount;
+                            return h.m_histogram[1].m_mt;
+                        }
+                    }
+                    break;
+
+                    default:
+                    {
+                        // Find maximum entry and return it
+                        //
+                        unsigned maxIndex = 0;
+                        unsigned maxCount = 0;
+
+                        for (unsigned m = 0; m < h.m_count; m++)
+                        {
+                            if (h.m_histogram[m].m_count > maxCount)
+                            {
+                                maxIndex = m;
+                                maxCount = h.m_histogram[m].m_count;
+                            }
+                        }
+
+                        if (maxCount > 0)
+                        {
+                            *pLikelihood = (100 * maxCount) / h.m_totalCount;
+                            return h.m_histogram[maxIndex].m_mt;
+                        }
+
+                        return NULL;
+                    }
+                    break;
+                }
+            }
+
+            // Failed to find a class profile entry
+            //
+            return NULL;
+        }
+
+        index += header->recordCount;
+        methodsChecked++;
+    }
+
+    // Failed to find any sort of profile data for this method
+    //
+    return NULL;
+}
+
 #else
 
 // Stub version for !FEATURE_PGO builds
@@ -364,4 +675,11 @@ HRESULT PgoManager::getMethodBlockCounts(MethodDesc* pMD, unsigned ilSize, UINT3
     return E_NOTIMPL;
 }
 
+// Stub version for !FEATURE_PGO builds
+//
+CORINFO_CLASS_HANDLE PgoManager::getLikelyClass(MethodDesc* pMD, unsigned ilSize, unsigned ilOffset)
+{
+    return NULL;
+}
+
 #endif // FEATURE_PGO
index c5fc527..3ba4ab6 100644 (file)
@@ -22,18 +22,26 @@ public:
     static HRESULT allocMethodBlockCounts(MethodDesc* pMD, UINT32 count,
         ICorJitInfo::BlockCounts** pBlockCounts, unsigned ilSize);
 
-    // Retreive the profile block count buffer for a method
+    // Retrieve the profile block count buffer for a method
     static HRESULT getMethodBlockCounts(MethodDesc* pMD, unsigned ilSize, UINT32* pCount,
         ICorJitInfo::BlockCounts** pBlockCounts, UINT32* pNumRuns);
 
+    // Retrieve the most likely class for a particular call
+    static CORINFO_CLASS_HANDLE getLikelyClass(MethodDesc* pMD, unsigned ilSize, unsigned ilOffset, UINT32* pLikelihood, UINT32* pNumberOfClasses);
+
+    // Verify address in bounds
+    static void VerifyAddress(void* address);
+
 #ifdef FEATURE_PGO
 
 private:
 
     enum
     {
-        // Number of ICorJitInfo::BlockCount records in the global slab
-        BUFFER_SIZE      = 64 * 1024,
+        // Number of ICorJitInfo::BlockCount records in the global slab.
+        // Currently 4MB for a 64 bit system.
+        //
+        BUFFER_SIZE      = 8 * 64 * 1024,
         MIN_RECORD_COUNT = 3,
         MAX_RECORD_COUNT = BUFFER_SIZE
     };
@@ -57,13 +65,15 @@ private:
     static ICorJitInfo::BlockCounts* s_PgoData;
 
     // Index of next free entry in the global slab
-    static unsigned s_PgoIndex;
+    static unsigned volatile s_PgoIndex;
 
     // Formatting strings for file input/output
     static const char* const s_FileHeaderString;
     static const char* const s_FileTrailerString;
     static const char* const s_MethodHeaderString;
     static const char* const s_RecordString;
+    static const char* const s_ClassProfileHeader;
+    static const char* const s_ClassProfileEntry;
 
 #endif // FEATURE_PGO
 };
index a72c379..036457a 100644 (file)
@@ -1073,6 +1073,16 @@ HRESULT ZapInfo::getMethodBlockCounts (
     return S_OK;
 }
 
+CORINFO_CLASS_HANDLE ZapInfo::getLikelyClass(
+    CORINFO_METHOD_HANDLE ftnHnd,
+    CORINFO_CLASS_HANDLE  baseHnd,
+    UINT32                ilOffset,
+    UINT32*               pLikelihood,
+    UINT32*               pNumberOfClasses)
+{
+    return NULL;
+}
+
 void ZapInfo::allocMem(
     ULONG               hotCodeSize,    /* IN */
     ULONG               coldCodeSize,   /* IN */
index 3a44d2e..74a8c0e 100644 (file)
@@ -313,6 +313,13 @@ public:
             ICorJitInfo::BlockCounts ** pBlockCounts,
             UINT32 * pNumRuns);
 
+    CORINFO_CLASS_HANDLE getLikelyClass(
+            CORINFO_METHOD_HANDLE ftnHnd,
+            CORINFO_CLASS_HANDLE  baseHnd,
+            UINT32                ilOffset,
+            UINT32 *              pLikelihood,
+            UINT32 *              pNumberOfClasses);
+
     DWORD getJitFlags(CORJIT_FLAGS* jitFlags, DWORD sizeInBytes);
 
     bool runWithErrorTrap(void (*function)(void*), void* param);
index 98ca7ab..4371f64 100644 (file)
@@ -55,6 +55,7 @@
       COMPlus_EnableEHWriteThru;
       COMPlus_JitObjectStackAllocation;
       COMPlus_JitInlinePolicyProfile;
+      COMPlus_JitClassProfiling;
       RunningIlasmRoundTrip
     </COMPlusVariables>
   </PropertyGroup>
     <TestEnvironment Include="jitosr_stress" TC_OnStackReplacement="1" TC_QuickJitForLoops="1" TC_OnStackReplacement_InitialCounter="1" OSR_HitLimit="1" TieredCompilation="1" />
     <TestEnvironment Include="jitpgo" TieredPGO="1" TieredCompilation="1" />
     <TestEnvironment Include="jitpgo_inline" TieredPGO="1" TieredCompilation="1" JitInlinePolicyProfile="1"/>
+    <TestEnvironment Include="jitpgo_classes" TieredPGO="1" TieredCompilation="1" JitEnableGuardedDevirtualization="1" JitClassProfiling="1"/>
     <TestEnvironment Include="jitguardeddevirtualization" JitEnableGuardedDevirtualization="1" TieredCompilation="0" />
     <TestEnvironment Include="jitehwritethru" EnableEhWriteThru="1" TieredCompilation="0" />
     <TestEnvironment Include="jitobjectstackallocation" JitObjectStackAllocation="1" TieredCompilation="0" />