Add support for delegate GDV and method-based vtable GDV (#68703)
authorJakob Botsch Nielsen <Jakob.botsch.nielsen@gmail.com>
Fri, 17 Jun 2022 23:58:01 +0000 (01:58 +0200)
committerGitHub <noreply@github.com>
Fri, 17 Jun 2022 23:58:01 +0000 (01:58 +0200)
Add support for instrumenting delegate calls and vtable calls into method handle histograms. Use these histograms to do GDV for delegate calls and also support method-based GDV for vtable calls.

For instrumentation we now support class probes at interface call sites, method probes at delegate call sites and both class probes and method probes at vtable call sites. For vtable calls, when turned on, instrumentation produces both histograms as PGO data so that the JIT can later make the choice about what is the best form of guard to use at that site.

For guarding, there are some things to take into account. Delegate calls currently (practically) always point to precode, so this PR is just guarding on getFunctionFixedEntryPoint which returns the precode address, and this is generally quite cheap (same cost as class-based GDV). That's the case for delegates pointing to instance methods anyway, this PR does not support static methods yet -- those will be more expensive.

For vtable calls the runtime will backpatch the slots when tiering, so the JIT guards the address retrieved from the vtable against an indirection of the slot, which is slightly more expensive than a class-based guard.

Currently the instrumentation is enabled conditionally with COMPlus_JitDelegateProfiling=1 (for delegates) and COMPlus_JitVTableProfiling=1 (for vtable calls). Currently delegate profiling is turned on by default while vtable profiling is off by default.

37 files changed:
eng/pipelines/common/templates/runtimes/run-test-job.yml
eng/pipelines/coreclr/libraries-pgo.yml
eng/pipelines/libraries/run-test-job.yml
src/coreclr/inc/corinfo.h
src/coreclr/inc/corjit.h
src/coreclr/inc/jiteeversionguid.h
src/coreclr/inc/jithelpers.h
src/coreclr/inc/readytorun.h
src/coreclr/jit/ClrJit.PAL.exports
src/coreclr/jit/ClrJit.exports
src/coreclr/jit/block.h
src/coreclr/jit/compiler.cpp
src/coreclr/jit/compiler.h
src/coreclr/jit/fgbasic.cpp
src/coreclr/jit/fgprofile.cpp
src/coreclr/jit/gentree.h
src/coreclr/jit/importer.cpp
src/coreclr/jit/indirectcalltransformer.cpp
src/coreclr/jit/inline.h
src/coreclr/jit/jit.h
src/coreclr/jit/jitconfigvalues.h
src/coreclr/jit/likelyclass.cpp
src/coreclr/jit/morph.cpp
src/coreclr/jit/patchpoint.cpp
src/coreclr/tools/Common/Internal/Runtime/ModuleHeaders.cs
src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs
src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs
src/coreclr/tools/Common/Pgo/PgoFormat.cs
src/coreclr/tools/superpmi/mcs/verbdumpmap.cpp
src/coreclr/tools/superpmi/mcs/verbjitflags.cpp
src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp
src/coreclr/tools/superpmi/superpmi-shared/methodcontext.h
src/coreclr/tools/superpmi/superpmi-shared/spmidumphelper.cpp
src/coreclr/vm/jithelpers.cpp
src/coreclr/vm/method.cpp
src/coreclr/vm/pgo.cpp
src/tests/Common/testenvironment.proj

index 9e75af0..d977f6c 100644 (file)
@@ -227,13 +227,13 @@ jobs:
         timeoutInMinutes: 300
       ${{ else }}:
         timeoutInMinutes: 200
-    ${{ if in(parameters.testGroup, 'outerloop', 'jit-experimental', 'pgo', 'jit-cfg') }}:
+    ${{ if in(parameters.testGroup, 'outerloop', 'jit-experimental', 'jit-cfg') }}:
       timeoutInMinutes: 270
     ${{ if in(parameters.testGroup, 'gc-longrunning', 'gc-simulator') }}:
       timeoutInMinutes: 480
     ${{ if in(parameters.testGroup, 'jitstress', 'jitstress-isas-arm', 'jitstressregs-x86', 'jitstressregs', 'jitstress2-jitstressregs', 'gcstress0x3-gcstress0xc', 'ilasm') }}:
       timeoutInMinutes: 390
-    ${{ if in(parameters.testGroup, 'gcstress-extra', 'r2r-extra', 'clrinterpreter') }}:
+    ${{ if in(parameters.testGroup, 'gcstress-extra', 'r2r-extra', 'clrinterpreter', 'pgo') }}:
       timeoutInMinutes: 510
     ${{ if eq(parameters.testGroup, 'jitstress-isas-x86') }}:
       timeoutInMinutes: 960
@@ -397,7 +397,7 @@ jobs:
           ${{ if eq(parameters.runtimeFlavor, 'mono') }}:
             # tiered compilation isn't done on mono yet
             scenarios:
-            - normal 
+            - normal
           ${{ elseif eq(variables['Build.Reason'], 'PullRequest') }}:
             scenarios:
             - no_tiered_compilation
@@ -545,7 +545,9 @@ jobs:
           - defaultpgo
           - dynamicpgo
           - fullpgo
+          - fullpgo_methodprofiling
           - fullpgo_random_gdv
+          - fullpgo_random_gdv_methodprofiling_only
           - fullpgo_random_edge
           - fullpgo_random_gdv_edge
         ${{ if in(parameters.testGroup, 'gc-longrunning') }}:
@@ -568,7 +570,6 @@ jobs:
           - jitelthookenabled_tiered
         ${{ if in(parameters.testGroup, 'jit-experimental') }}:
           scenarios:
-          - jitosr
           - jitosr_stress
           - jitosr_pgo
           - jitosr_stress_random
index 0914451..0a33461 100644 (file)
@@ -47,7 +47,7 @@ jobs:
     helixQueueGroup: libraries
     helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml
     jobParameters:
-      timeoutInMinutes: 150
+      timeoutInMinutes: 600
       testScope: innerloop
       liveRuntimeBuildConfig: checked
       dependsOnTestBuildConfiguration: Release
index 17e3a63..c6fbe83 100644 (file)
@@ -173,10 +173,11 @@ jobs:
               - defaultpgo
               - dynamicpgo
               - fullpgo
+              - fullpgo_methodprofiling
               - fullpgo_random_gdv
+              - fullpgo_random_gdv_methodprofiling_only
               - fullpgo_random_edge
               - fullpgo_random_gdv_edge
-              - jitosr
               - jitosr_stress
               - jitosr_stress_random
               - jitosr_pgo
index 8cb56a6..91c5734 100644 (file)
@@ -640,9 +640,14 @@ enum CorInfoHelpFunc
     CORINFO_HELP_STACK_PROBE,               // Probes each page of the allocated stack frame
 
     CORINFO_HELP_PATCHPOINT,                // Notify runtime that code has reached a patchpoint
+    CORINFO_HELP_PARTIAL_COMPILATION_PATCHPOINT,  // Notify runtime that code has reached a part of the method that wasn't originally jitted.
+
     CORINFO_HELP_CLASSPROFILE32,            // Update 32-bit class profile for a call site
     CORINFO_HELP_CLASSPROFILE64,            // Update 64-bit class profile for a call site
-    CORINFO_HELP_PARTIAL_COMPILATION_PATCHPOINT,  // Notify runtime that code has reached a part of the method that wasn't originally jitted.
+    CORINFO_HELP_DELEGATEPROFILE32,         // Update 32-bit method profile for a delegate call site
+    CORINFO_HELP_DELEGATEPROFILE64,         // Update 64-bit method profile for a delegate call site
+    CORINFO_HELP_VTABLEPROFILE32,           // Update 32-bit method profile for a vtable call site
+    CORINFO_HELP_VTABLEPROFILE64,           // Update 64-bit method profile for a vtable call site
 
     CORINFO_HELP_VALIDATE_INDIRECT_CALL,    // CFG: Validate function pointer
     CORINFO_HELP_DISPATCH_INDIRECT_CALL,    // CFG: Validate and dispatch to pointer
index 54aaded..380db27 100644 (file)
@@ -330,7 +330,8 @@ public:
 
     // Data structure for a single class probe using 32-bit count.
     //
-    // CLASS_FLAG and INTERFACE_FLAG are placed into the Other field in the schema
+    // CLASS_FLAG, INTERFACE_FLAG and DELEGATE_FLAG are placed into the Other field in the schema.
+    // If CLASS_FLAG is set the handle table consists of type handles, and otherwise method handles.
     //
     // Count is the number of times a call was made at that call site.
     //
@@ -338,8 +339,8 @@ public:
     //
     // SAMPLE_INTERVAL must be >= SIZE. SAMPLE_INTERVAL / SIZE
     // gives the average number of calls between table updates.
-    //
-    struct ClassProfile32
+    // 
+    struct HandleHistogram32
     {
         enum
         {
@@ -347,17 +348,18 @@ public:
             SAMPLE_INTERVAL = 32,
             CLASS_FLAG     = 0x80000000,
             INTERFACE_FLAG = 0x40000000,
-            OFFSET_MASK    = 0x3FFFFFFF
+            DELEGATE_FLAG  = 0x20000000,
+            OFFSET_MASK    = 0x0FFFFFFF
         };
 
         uint32_t Count;
-        CORINFO_CLASS_HANDLE ClassTable[SIZE];
+        void* HandleTable[SIZE];
     };
 
-    struct ClassProfile64
+    struct HandleHistogram64
     {
         uint64_t Count;
-        CORINFO_CLASS_HANDLE ClassTable[ClassProfile32::SIZE];
+        void* HandleTable[HandleHistogram32::SIZE];
     };
 
     enum class PgoInstrumentationKind
@@ -387,7 +389,7 @@ public:
         Done = None, // All instrumentation schemas must end with a record which is "Done"
         BasicBlockIntCount = (DescriptorMin * 1) | FourByte, // basic block counter using unsigned 4 byte int
         BasicBlockLongCount = (DescriptorMin * 1) | EightByte, // basic block counter using unsigned 8 byte int
-        HandleHistogramIntCount = (DescriptorMin * 2) | FourByte | AlignPointer, // 4 byte counter that is part of a type histogram. Aligned to match ClassProfile32's alignment.
+        HandleHistogramIntCount = (DescriptorMin * 2) | FourByte | AlignPointer, // 4 byte counter that is part of a type histogram. Aligned to match HandleHistogram32's alignment.
         HandleHistogramLongCount = (DescriptorMin * 2) | EightByte, // 8 byte counter that is part of a type histogram
         HandleHistogramTypes = (DescriptorMin * 3) | TypeHandle, // Histogram of type handles
         HandleHistogramMethods = (DescriptorMin * 3) | MethodHandle, // Histogram of method handles
@@ -396,6 +398,7 @@ public:
         EdgeIntCount = (DescriptorMin * 6) | FourByte, // edge counter using unsigned 4 byte int
         EdgeLongCount = (DescriptorMin * 6) | EightByte, // edge counter using unsigned 8 byte int
         GetLikelyClass = (DescriptorMin * 7) | TypeHandle, // Compressed get likely class data
+        GetLikelyMethod = (DescriptorMin * 7) | MethodHandle, // Compressed get likely method data
     };
 
     struct PgoInstrumentationSchema
@@ -418,7 +421,7 @@ public:
         Sampling= 6,    // PGO data derived from sampling
     };
 
-#define DEFAULT_UNKNOWN_TYPEHANDLE 1
+#define DEFAULT_UNKNOWN_HANDLE 1
 #define UNKNOWN_HANDLE_MIN 1
 #define UNKNOWN_HANDLE_MAX 33
 
index 27f75a4..9a6cbc0 100644 (file)
@@ -43,11 +43,11 @@ typedef const GUID *LPCGUID;
 #define GUID_DEFINED
 #endif // !GUID_DEFINED
 
-constexpr GUID JITEEVersionIdentifier = { /* af5b6632-6fbe-4a2e-82d6-24487a138e4a */
-    0xaf5b6632,
-    0x6fbe,
-    0x4a2e,
-    {0x82, 0xd6, 0x24, 0x48, 0x7a, 0x13, 0x8e, 0x4a}
+constexpr GUID JITEEVersionIdentifier = { /* f2faa5fc-a1ec-4244-aebb-5597bfd7153a */
+    0xf2faa5fc,
+    0xa1ec,
+    0x4244,
+    {0xae, 0xbb, 0x55, 0x97, 0xbf, 0xd7, 0x15, 0x3a}
   };
 
 //////////////////////////////////////////////////////////////////////////////////////////////////////////
index e40eb41..a500c29 100644 (file)
 #endif
 
     JITHELPER(CORINFO_HELP_PATCHPOINT, JIT_Patchpoint, CORINFO_HELP_SIG_REG_ONLY)
+    JITHELPER(CORINFO_HELP_PARTIAL_COMPILATION_PATCHPOINT, JIT_PartialCompilationPatchpoint, CORINFO_HELP_SIG_REG_ONLY)
+
     JITHELPER(CORINFO_HELP_CLASSPROFILE32, JIT_ClassProfile32, CORINFO_HELP_SIG_REG_ONLY)
     JITHELPER(CORINFO_HELP_CLASSPROFILE64, JIT_ClassProfile64, CORINFO_HELP_SIG_REG_ONLY)
-    JITHELPER(CORINFO_HELP_PARTIAL_COMPILATION_PATCHPOINT, JIT_PartialCompilationPatchpoint, CORINFO_HELP_SIG_REG_ONLY)
+    JITHELPER(CORINFO_HELP_DELEGATEPROFILE32, JIT_DelegateProfile32, CORINFO_HELP_SIG_REG_ONLY)
+    JITHELPER(CORINFO_HELP_DELEGATEPROFILE64, JIT_DelegateProfile64, CORINFO_HELP_SIG_REG_ONLY)
+    JITHELPER(CORINFO_HELP_VTABLEPROFILE32, JIT_VTableProfile32, CORINFO_HELP_SIG_4_STACK)
+    JITHELPER(CORINFO_HELP_VTABLEPROFILE64, JIT_VTableProfile64, CORINFO_HELP_SIG_4_STACK)
 
 #if defined(TARGET_AMD64) || defined(TARGET_ARM64)
     JITHELPER(CORINFO_HELP_VALIDATE_INDIRECT_CALL, JIT_ValidateIndirectCall, CORINFO_HELP_SIG_REG_ONLY)
index 7635436..20db292 100644 (file)
@@ -16,7 +16,7 @@
 
 // Keep these in sync with src/coreclr/tools/Common/Internal/Runtime/ModuleHeaders.cs
 #define READYTORUN_MAJOR_VERSION 0x0006
-#define READYTORUN_MINOR_VERSION 0x0001
+#define READYTORUN_MINOR_VERSION 0x0002
 
 #define MINIMUM_READYTORUN_MAJOR_VERSION 0x006
 
index 2625e98..e4e6064 100644 (file)
@@ -1,4 +1,5 @@
 getJit
 jitStartup
 getLikelyClasses
+getLikelyMethods
 jitBuildString
index c6a22db..5430f7b 100644 (file)
@@ -5,4 +5,5 @@ EXPORTS
     getJit
     jitStartup
     getLikelyClasses
+    getLikelyMethods
     jitBuildString
index 88dcb79..e9a539a 100644 (file)
@@ -526,32 +526,32 @@ enum BasicBlockFlags : unsigned __int64
 
 #endif // defined(FEATURE_EH_FUNCLETS) && defined(TARGET_ARM)
 
-    BBF_BACKWARD_JUMP        = MAKE_BBFLAG(24), // BB is surrounded by a backward jump/switch arc
-    BBF_RETLESS_CALL         = MAKE_BBFLAG(25), // BBJ_CALLFINALLY that will never return (and therefore, won't need a paired
-                                                // BBJ_ALWAYS); see isBBCallAlwaysPair().
-    BBF_LOOP_PREHEADER       = MAKE_BBFLAG(26), // BB is a loop preheader block
-    BBF_COLD                 = MAKE_BBFLAG(27), // BB is cold
-
-    BBF_PROF_WEIGHT          = MAKE_BBFLAG(28), // BB weight is computed from profile data
-    BBF_IS_LIR               = MAKE_BBFLAG(29), // Set if the basic block contains LIR (as opposed to HIR)
-    BBF_KEEP_BBJ_ALWAYS      = MAKE_BBFLAG(30), // A special BBJ_ALWAYS block, used by EH code generation. Keep the jump kind
-                                                // as BBJ_ALWAYS. Used for the paired BBJ_ALWAYS block following the
-                                                // BBJ_CALLFINALLY block, as well as, on x86, the final step block out of a
-                                                // finally.
-    BBF_CLONED_FINALLY_BEGIN = MAKE_BBFLAG(31), // First block of a cloned finally region
-
-    BBF_CLONED_FINALLY_END   = MAKE_BBFLAG(32), // Last block of a cloned finally region
-    BBF_HAS_CALL             = MAKE_BBFLAG(33), // BB contains a call
+    BBF_BACKWARD_JUMP                  = MAKE_BBFLAG(24), // BB is surrounded by a backward jump/switch arc
+    BBF_RETLESS_CALL                   = MAKE_BBFLAG(25), // BBJ_CALLFINALLY that will never return (and therefore, won't need a paired
+                                                          // BBJ_ALWAYS); see isBBCallAlwaysPair().
+    BBF_LOOP_PREHEADER                 = MAKE_BBFLAG(26), // BB is a loop preheader block
+    BBF_COLD                           = MAKE_BBFLAG(27), // BB is cold
+
+    BBF_PROF_WEIGHT                    = MAKE_BBFLAG(28), // BB weight is computed from profile data
+    BBF_IS_LIR                         = MAKE_BBFLAG(29), // Set if the basic block contains LIR (as opposed to HIR)
+    BBF_KEEP_BBJ_ALWAYS                = MAKE_BBFLAG(30), // A special BBJ_ALWAYS block, used by EH code generation. Keep the jump kind
+                                                          // as BBJ_ALWAYS. Used for the paired BBJ_ALWAYS block following the
+                                                          // BBJ_CALLFINALLY block, as well as, on x86, the final step block out of a
+                                                          // finally.
+    BBF_CLONED_FINALLY_BEGIN           = MAKE_BBFLAG(31), // First block of a cloned finally region
+
+    BBF_CLONED_FINALLY_END             = MAKE_BBFLAG(32), // Last block of a cloned finally region
+    BBF_HAS_CALL                       = MAKE_BBFLAG(33), // BB contains a call
     BBF_DOMINATED_BY_EXCEPTIONAL_ENTRY = MAKE_BBFLAG(34), // Block is dominated by exceptional entry.
-    BBF_BACKWARD_JUMP_TARGET = MAKE_BBFLAG(35), // Block is a target of a backward jump
+    BBF_BACKWARD_JUMP_TARGET           = MAKE_BBFLAG(35), // Block is a target of a backward jump
 
-    BBF_PATCHPOINT           = MAKE_BBFLAG(36), // Block is a patchpoint
-    BBF_HAS_CLASS_PROFILE    = MAKE_BBFLAG(37), // BB contains a call needing a class profile
-    BBF_PARTIAL_COMPILATION_PATCHPOINT  = MAKE_BBFLAG(38), // Block is a partial compilation patchpoint
-    BBF_HAS_ALIGN            = MAKE_BBFLAG(39), // BB ends with 'align' instruction
-    BBF_TAILCALL_SUCCESSOR   = MAKE_BBFLAG(40), // BB has pred that has potential tail call
+    BBF_PATCHPOINT                     = MAKE_BBFLAG(36), // Block is a patchpoint
+    BBF_HAS_HISTOGRAM_PROFILE          = MAKE_BBFLAG(37), // BB contains a call needing a histogram profile
+    BBF_PARTIAL_COMPILATION_PATCHPOINT = MAKE_BBFLAG(38), // Block is a partial compilation patchpoint
+    BBF_HAS_ALIGN                      = MAKE_BBFLAG(39), // BB ends with 'align' instruction
+    BBF_TAILCALL_SUCCESSOR             = MAKE_BBFLAG(40), // BB has pred that has potential tail call
 
-    BBF_BACKWARD_JUMP_SOURCE = MAKE_BBFLAG(41), // Block is a source of a backward jump
+    BBF_BACKWARD_JUMP_SOURCE           = MAKE_BBFLAG(41), // Block is a source of a backward jump
 
     // The following are sets of flags.
 
@@ -582,7 +582,7 @@ enum BasicBlockFlags : unsigned __int64
     // TODO: Should BBF_RUN_RARELY be added to BBF_SPLIT_GAINED ?
 
     BBF_SPLIT_GAINED = BBF_DONT_REMOVE | BBF_HAS_JMP | BBF_BACKWARD_JUMP | BBF_HAS_IDX_LEN | BBF_HAS_NEWARRAY | BBF_PROF_WEIGHT | \
-                       BBF_HAS_NEWOBJ | BBF_KEEP_BBJ_ALWAYS | BBF_CLONED_FINALLY_END | BBF_HAS_NULLCHECK | BBF_HAS_CLASS_PROFILE,
+                       BBF_HAS_NEWOBJ | BBF_KEEP_BBJ_ALWAYS | BBF_CLONED_FINALLY_END | BBF_HAS_NULLCHECK | BBF_HAS_HISTOGRAM_PROFILE,
 };
 
 inline constexpr BasicBlockFlags operator ~(BasicBlockFlags a)
@@ -918,8 +918,8 @@ struct BasicBlock : private LIR::Range
     };
 
     union {
-        unsigned bbStkTempsOut;      // base# for output stack temps
-        int      bbClassSchemaIndex; // schema index for class instrumentation
+        unsigned bbStkTempsOut;          // base# for output stack temps
+        int      bbHistogramSchemaIndex; // schema index for histogram instrumentation
     };
 
 #define MAX_XCPTN_INDEX (USHRT_MAX - 1)
index 652d560..3051591 100644 (file)
@@ -6384,10 +6384,10 @@ int Compiler::compCompileHelper(CORINFO_MODULE_HANDLE classPtr,
     compHndBBtabCount      = 0;
     compHndBBtabAllocCount = 0;
 
-    info.compNativeCodeSize    = 0;
-    info.compTotalHotCodeSize  = 0;
-    info.compTotalColdCodeSize = 0;
-    info.compClassProbeCount   = 0;
+    info.compNativeCodeSize            = 0;
+    info.compTotalHotCodeSize          = 0;
+    info.compTotalColdCodeSize         = 0;
+    info.compHandleHistogramProbeCount = 0;
 
     compHasBackwardJump          = false;
     compHasBackwardJumpInHandler = false;
index b864a9a..d7178fb 100644 (file)
@@ -1821,6 +1821,7 @@ class Compiler
     friend class MorphInitBlockHelper;
     friend class MorphCopyBlockHelper;
     friend class CallArgs;
+    friend class IndirectCallTransformer;
 
 #ifdef FEATURE_HW_INTRINSICS
     friend struct HWIntrinsicInfo;
@@ -3539,6 +3540,18 @@ public:
                              bool                    isExplicitTailCall,
                              IL_OFFSET               ilOffset = BAD_IL_OFFSET);
 
+    bool impConsiderCallProbe(GenTreeCall* call, IL_OFFSET ilOffset);
+
+    enum class GDVProbeType
+    {
+        None,
+        ClassProfile,
+        MethodProfile,
+        MethodAndClassProfile,
+    };
+
+    GDVProbeType compClassifyGDVProbeType(GenTreeCall* call);
+
     //=========================================================================
     //                          PROTECTED
     //=========================================================================
@@ -5427,7 +5440,7 @@ protected:
     bool fgGetProfileWeightForBasicBlock(IL_OFFSET offset, weight_t* weight);
 
     Instrumentor* fgCountInstrumentor;
-    Instrumentor* fgClassInstrumentor;
+    Instrumentor* fgHistogramInstrumentor;
 
     PhaseStatus fgPrepareToInstrumentMethod();
     PhaseStatus fgInstrumentMethod();
@@ -5435,11 +5448,13 @@ protected:
     void        fgIncorporateBlockCounts();
     void        fgIncorporateEdgeCounts();
 
-    CORINFO_CLASS_HANDLE getRandomClass(ICorJitInfo::PgoInstrumentationSchema* schema,
-                                        UINT32                                 countSchemaItems,
-                                        BYTE*                                  pInstrumentationData,
-                                        int32_t                                ilOffset,
-                                        CLRRandom*                             random);
+    void getRandomGDV(ICorJitInfo::PgoInstrumentationSchema* schema,
+                      UINT32                                 countSchemaItems,
+                      BYTE*                                  pInstrumentationData,
+                      int32_t                                ilOffset,
+                      CLRRandom*                             random,
+                      CORINFO_CLASS_HANDLE*                  classGuess,
+                      CORINFO_METHOD_HANDLE*                 methodGuess);
 
 public:
     const char*                            fgPgoFailReason;
@@ -5679,6 +5694,7 @@ private:
                                                      Statement*       paramAssignmentInsertionPoint);
     GenTree* fgMorphCall(GenTreeCall* call);
     GenTree* fgExpandVirtualVtableCallTarget(GenTreeCall* call);
+
     void fgMorphCallInline(GenTreeCall* call, InlineResult* result);
     void fgMorphCallInlineHelper(GenTreeCall* call, InlineResult* result, InlineContext** createdContext);
 #if DEBUG
@@ -6806,13 +6822,21 @@ public:
         optMethodFlags |= OMF_HAS_GUARDEDDEVIRT;
     }
 
+    void pickGDV(GenTreeCall*           call,
+                 IL_OFFSET              ilOffset,
+                 bool                   isInterface,
+                 CORINFO_CLASS_HANDLE*  classGuess,
+                 CORINFO_METHOD_HANDLE* methodGuess,
+                 unsigned*              likelihood);
+
     void considerGuardedDevirtualization(GenTreeCall*            call,
                                          IL_OFFSET               ilOffset,
                                          bool                    isInterface,
                                          CORINFO_METHOD_HANDLE   baseMethod,
                                          CORINFO_CLASS_HANDLE    baseClass,
-                                         CORINFO_CONTEXT_HANDLE* pContextHandle DEBUGARG(CORINFO_CLASS_HANDLE objClass)
-                                             DEBUGARG(const char* objClassName));
+                                         CORINFO_CONTEXT_HANDLE* pContextHandle);
+
+    bool isCompatibleMethodGDV(GenTreeCall* call, CORINFO_METHOD_HANDLE gdvTarget);
 
     void addGuardedDevirtualizationCandidate(GenTreeCall*          call,
                                              CORINFO_METHOD_HANDLE methodHandle,
@@ -9548,7 +9572,7 @@ public:
         unsigned genCPU; // What CPU are we running on
 
         // Number of class profile probes in this method
-        unsigned compClassProbeCount;
+        unsigned compHandleHistogramProbeCount;
 
     } info;
 
index 148dc02..541bdc2 100644 (file)
@@ -186,7 +186,7 @@ void Compiler::fgInit()
     fgPgoInlineeNoPgo            = 0;
     fgPgoInlineeNoPgoSingleBlock = 0;
     fgCountInstrumentor          = nullptr;
-    fgClassInstrumentor          = nullptr;
+    fgHistogramInstrumentor      = nullptr;
     fgPredListSortVector         = nullptr;
 }
 
index bfd7000..4eb555a 100644 (file)
@@ -1425,11 +1425,11 @@ void EfficientEdgeCountInstrumentor::Instrument(BasicBlock* block, Schema& schem
 }
 
 //------------------------------------------------------------------------
-// ClassProbeVisitor: invoke functor on each virtual call or cast-related
+// HandleHistogramProbeVisitor: invoke functor on each virtual call or cast-related
 //     helper calls in a tree
 //
 template <class TFunctor>
-class ClassProbeVisitor final : public GenTreeVisitor<ClassProbeVisitor<TFunctor>>
+class HandleHistogramProbeVisitor final : public GenTreeVisitor<HandleHistogramProbeVisitor<TFunctor>>
 {
 public:
     enum
@@ -1440,26 +1440,17 @@ public:
     TFunctor& m_functor;
     Compiler* m_compiler;
 
-    ClassProbeVisitor(Compiler* compiler, TFunctor& functor)
-        : GenTreeVisitor<ClassProbeVisitor>(compiler), m_functor(functor), m_compiler(compiler)
+    HandleHistogramProbeVisitor(Compiler* compiler, TFunctor& functor)
+        : GenTreeVisitor<HandleHistogramProbeVisitor>(compiler), m_functor(functor), m_compiler(compiler)
     {
     }
     Compiler::fgWalkResult PreOrderVisit(GenTree** use, GenTree* user)
     {
         GenTree* const node = *use;
-        if (node->IsCall() && (node->AsCall()->gtClassProfileCandidateInfo != nullptr))
+        if (node->IsCall() && (m_compiler->compClassifyGDVProbeType(node->AsCall()) != Compiler::GDVProbeType::None))
         {
-            GenTreeCall* const call = node->AsCall();
-            if (call->IsVirtual() && (call->gtCallType != CT_INDIRECT))
-            {
-                // virtual call
-                m_functor(m_compiler, call);
-            }
-            else if (m_compiler->impIsCastHelperEligibleForClassProbe(call))
-            {
-                // isinst/cast helper
-                m_functor(m_compiler, call);
-            }
+            assert(node->AsCall()->gtHandleHistogramProfileCandidateInfo != nullptr);
+            m_functor(m_compiler, node->AsCall());
         }
 
         return Compiler::WALK_CONTINUE;
@@ -1467,44 +1458,65 @@ public:
 };
 
 //------------------------------------------------------------------------
-// BuildClassProbeSchemaGen: functor that creates class probe schema elements
+// BuildHandleHistogramProbeSchemaGen: functor that creates class probe schema elements
 //
-class BuildClassProbeSchemaGen
+class BuildHandleHistogramProbeSchemaGen
 {
 private:
     Schema&   m_schema;
     unsigned& m_schemaCount;
 
 public:
-    BuildClassProbeSchemaGen(Schema& schema, unsigned& schemaCount) : m_schema(schema), m_schemaCount(schemaCount)
+    BuildHandleHistogramProbeSchemaGen(Schema& schema, unsigned& schemaCount)
+        : m_schema(schema), m_schemaCount(schemaCount)
     {
     }
 
     void operator()(Compiler* compiler, GenTreeCall* call)
     {
-        ICorJitInfo::PgoInstrumentationSchema schemaElem;
-        schemaElem.Count = 1;
-        schemaElem.Other = ICorJitInfo::ClassProfile32::CLASS_FLAG;
+        Compiler::GDVProbeType probeType = compiler->compClassifyGDVProbeType(call);
+
+        if ((probeType == Compiler::GDVProbeType::ClassProfile) ||
+            (probeType == Compiler::GDVProbeType::MethodAndClassProfile))
+        {
+            CreateHistogramSchemaEntries(compiler, call, true /* isTypeHistogram */);
+        }
+
+        if ((probeType == Compiler::GDVProbeType::MethodProfile) ||
+            (probeType == Compiler::GDVProbeType::MethodAndClassProfile))
+        {
+            CreateHistogramSchemaEntries(compiler, call, false /* isTypeHistogram */);
+        }
+    }
+
+    void CreateHistogramSchemaEntries(Compiler* compiler, GenTreeCall* call, bool isTypeHistogram)
+    {
+        ICorJitInfo::PgoInstrumentationSchema schemaElem = {};
+        schemaElem.Count                                 = 1;
+        schemaElem.Other = isTypeHistogram ? ICorJitInfo::HandleHistogram32::CLASS_FLAG : 0;
         if (call->IsVirtualStub())
         {
-            schemaElem.Other |= ICorJitInfo::ClassProfile32::INTERFACE_FLAG;
+            schemaElem.Other |= ICorJitInfo::HandleHistogram32::INTERFACE_FLAG;
         }
-        else
+        else if (call->IsDelegateInvoke())
         {
-            assert(call->IsVirtualVtable() || compiler->impIsCastHelperEligibleForClassProbe(call));
+            schemaElem.Other |= ICorJitInfo::HandleHistogram32::DELEGATE_FLAG;
         }
 
         schemaElem.InstrumentationKind = JitConfig.JitCollect64BitCounts()
                                              ? ICorJitInfo::PgoInstrumentationKind::HandleHistogramLongCount
                                              : ICorJitInfo::PgoInstrumentationKind::HandleHistogramIntCount;
-        schemaElem.ILOffset = (int32_t)call->gtClassProfileCandidateInfo->ilOffset;
+        schemaElem.ILOffset = (int32_t)call->gtHandleHistogramProfileCandidateInfo->ilOffset;
         schemaElem.Offset   = 0;
 
         m_schema.push_back(schemaElem);
 
+        m_schemaCount++;
+
         // Re-using ILOffset and Other fields from schema item for TypeHandleHistogramCount
-        schemaElem.InstrumentationKind = ICorJitInfo::PgoInstrumentationKind::HandleHistogramTypes;
-        schemaElem.Count               = ICorJitInfo::ClassProfile32::SIZE;
+        schemaElem.InstrumentationKind = isTypeHistogram ? ICorJitInfo::PgoInstrumentationKind::HandleHistogramTypes
+                                                         : ICorJitInfo::PgoInstrumentationKind::HandleHistogramMethods;
+        schemaElem.Count = ICorJitInfo::HandleHistogram32::SIZE;
         m_schema.push_back(schemaElem);
 
         m_schemaCount++;
@@ -1512,9 +1524,9 @@ public:
 };
 
 //------------------------------------------------------------------------
-// ClassProbeInserter: functor that adds class probe instrumentation
+// HandleHistogramProbeInserter: functor that adds class/method probe instrumentation
 //
-class ClassProbeInserter
+class HandleHistogramProbeInserter
 {
     Schema&   m_schema;
     uint8_t*  m_profileMemory;
@@ -1522,7 +1534,7 @@ class ClassProbeInserter
     unsigned& m_instrCount;
 
 public:
-    ClassProbeInserter(Schema& schema, uint8_t* profileMemory, int* pCurrentSchemaIndex, unsigned& instrCount)
+    HandleHistogramProbeInserter(Schema& schema, uint8_t* profileMemory, int* pCurrentSchemaIndex, unsigned& instrCount)
         : m_schema(schema)
         , m_profileMemory(profileMemory)
         , m_currentSchemaIndex(pCurrentSchemaIndex)
@@ -1533,10 +1545,11 @@ public:
     void operator()(Compiler* compiler, GenTreeCall* call)
     {
         JITDUMP("Found call [%06u] with probe index %d and ilOffset 0x%X\n", compiler->dspTreeID(call),
-                call->gtClassProfileCandidateInfo->probeIndex, call->gtClassProfileCandidateInfo->ilOffset);
+                call->gtHandleHistogramProfileCandidateInfo->probeIndex,
+                call->gtHandleHistogramProfileCandidateInfo->ilOffset);
 
         // We transform the call from (CALLVIRT obj, ... args ...) to
-        // to
+        //
         //      (CALLVIRT
         //        (COMMA
         //          (ASG tmp, obj)
@@ -1546,19 +1559,25 @@ public:
         //         ... args ...)
         //
 
-        // Sanity check that we're looking at the right schema entry
-        //
-        assert(m_schema[*m_currentSchemaIndex].ILOffset == (int32_t)call->gtClassProfileCandidateInfo->ilOffset);
-        bool is32 = m_schema[*m_currentSchemaIndex].InstrumentationKind ==
-                    ICorJitInfo::PgoInstrumentationKind::HandleHistogramIntCount;
-        bool is64 = m_schema[*m_currentSchemaIndex].InstrumentationKind ==
-                    ICorJitInfo::PgoInstrumentationKind::HandleHistogramLongCount;
-        assert(is32 || is64);
-
-        // Figure out where the table is located.
-        //
-        uint8_t* classProfile = m_schema[*m_currentSchemaIndex].Offset + m_profileMemory;
-        *m_currentSchemaIndex += 2; // There are 2 schema entries per class probe
+        // Read histograms
+        void* typeHistogram   = nullptr;
+        void* methodHistogram = nullptr;
+
+        bool is32;
+        ReadHistogramAndAdvance(call->gtHandleHistogramProfileCandidateInfo->ilOffset, &typeHistogram, &methodHistogram,
+                                &is32);
+        bool secondIs32;
+        ReadHistogramAndAdvance(call->gtHandleHistogramProfileCandidateInfo->ilOffset, &typeHistogram, &methodHistogram,
+                                &secondIs32);
+
+        assert(((typeHistogram != nullptr) || (methodHistogram != nullptr)) &&
+               "Expected at least one handle histogram when inserting probes");
+
+        if ((typeHistogram != nullptr) && (methodHistogram != nullptr))
+        {
+            // We expect both histograms to be 32-bit or 64-bit, not a mix.
+            assert(is32 == secondIs32);
+        }
 
         assert(!call->gtArgs.AreArgsComplete());
         CallArg* objUse = nullptr;
@@ -1576,20 +1595,57 @@ public:
 
         // Grab a temp to hold the 'this' object as it will be used three times
         //
-        unsigned const tmpNum             = compiler->lvaGrabTemp(true DEBUGARG("class profile tmp"));
+        unsigned const tmpNum             = compiler->lvaGrabTemp(true DEBUGARG("handle histogram profile tmp"));
         compiler->lvaTable[tmpNum].lvType = TYP_REF;
 
+        GenTree* helperCallNode = nullptr;
+
+        if (typeHistogram != nullptr)
+        {
+            GenTree* const tmpNode          = compiler->gtNewLclvNode(tmpNum, TYP_REF);
+            GenTree* const classProfileNode = compiler->gtNewIconNode((ssize_t)typeHistogram, TYP_I_IMPL);
+            helperCallNode =
+                compiler->gtNewHelperCallNode(is32 ? CORINFO_HELP_CLASSPROFILE32 : CORINFO_HELP_CLASSPROFILE64,
+                                              TYP_VOID, tmpNode, classProfileNode);
+        }
+
+        if (methodHistogram != nullptr)
+        {
+            GenTree* const tmpNode           = compiler->gtNewLclvNode(tmpNum, TYP_REF);
+            GenTree* const methodProfileNode = compiler->gtNewIconNode((ssize_t)methodHistogram, TYP_I_IMPL);
+
+            GenTree* methodProfileCallNode;
+            if (call->IsDelegateInvoke())
+            {
+                methodProfileCallNode = compiler->gtNewHelperCallNode(is32 ? CORINFO_HELP_DELEGATEPROFILE32
+                                                                           : CORINFO_HELP_DELEGATEPROFILE64,
+                                                                      TYP_VOID, tmpNode, methodProfileNode);
+            }
+            else
+            {
+                assert(call->IsVirtualVtable());
+                GenTree* const baseMethodNode = compiler->gtNewIconEmbMethHndNode(call->gtCallMethHnd);
+                methodProfileCallNode =
+                    compiler->gtNewHelperCallNode(is32 ? CORINFO_HELP_VTABLEPROFILE32 : CORINFO_HELP_VTABLEPROFILE64,
+                                                  TYP_VOID, tmpNode, baseMethodNode, methodProfileNode);
+            }
+
+            if (helperCallNode == nullptr)
+            {
+                helperCallNode = methodProfileCallNode;
+            }
+            else
+            {
+                helperCallNode = compiler->gtNewOperNode(GT_COMMA, TYP_REF, helperCallNode, methodProfileCallNode);
+            }
+        }
+
         // Generate the IR...
         //
-        GenTree* const     classProfileNode = compiler->gtNewIconNode((ssize_t)classProfile, TYP_I_IMPL);
-        GenTree* const     tmpNode          = compiler->gtNewLclvNode(tmpNum, TYP_REF);
-        GenTreeCall* const helperCallNode =
-            compiler->gtNewHelperCallNode(is32 ? CORINFO_HELP_CLASSPROFILE32 : CORINFO_HELP_CLASSPROFILE64, TYP_VOID,
-                                          tmpNode, classProfileNode);
         GenTree* const tmpNode2      = compiler->gtNewLclvNode(tmpNum, TYP_REF);
         GenTree* const callCommaNode = compiler->gtNewOperNode(GT_COMMA, TYP_REF, helperCallNode, tmpNode2);
         GenTree* const tmpNode3      = compiler->gtNewLclvNode(tmpNum, TYP_REF);
-        GenTree* const asgNode       = compiler->gtNewOperNode(GT_ASG, TYP_REF, tmpNode3, objUse->GetEarlyNode());
+        GenTree* const asgNode       = compiler->gtNewOperNode(GT_ASG, TYP_REF, tmpNode3, objUse->GetNode());
         GenTree* const asgCommaNode  = compiler->gtNewOperNode(GT_COMMA, TYP_REF, asgNode, callCommaNode);
 
         // Update the call
@@ -1601,16 +1657,78 @@ public:
 
         m_instrCount++;
     }
+
+private:
+    void ReadHistogramAndAdvance(IL_OFFSET ilOffset, void** typeHistogram, void** methodHistogram, bool* histogramIs32)
+    {
+        if (*m_currentSchemaIndex >= (int)m_schema.size())
+        {
+            return;
+        }
+
+        ICorJitInfo::PgoInstrumentationSchema& countEntry = m_schema[*m_currentSchemaIndex];
+
+        bool is32 = countEntry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::HandleHistogramIntCount;
+        bool is64 = countEntry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::HandleHistogramLongCount;
+        if (!is32 && !is64)
+        {
+            return;
+        }
+
+        if (countEntry.ILOffset != static_cast<int32_t>(ilOffset))
+        {
+            return;
+        }
+
+        assert(*m_currentSchemaIndex + 2 <= (int)m_schema.size());
+        ICorJitInfo::PgoInstrumentationSchema& tableEntry = m_schema[*m_currentSchemaIndex + 1];
+        assert((tableEntry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::HandleHistogramTypes) ||
+               (tableEntry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::HandleHistogramMethods));
+
+        void** outHistogram;
+        if (tableEntry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::HandleHistogramTypes)
+        {
+            assert(*typeHistogram == nullptr);
+            outHistogram = typeHistogram;
+        }
+        else
+        {
+            assert(*methodHistogram == nullptr);
+            outHistogram = methodHistogram;
+        }
+
+        *outHistogram  = &m_profileMemory[countEntry.Offset];
+        *histogramIs32 = is32;
+
+#ifdef DEBUG
+        if (is32)
+        {
+            ICorJitInfo::HandleHistogram32* h32 =
+                reinterpret_cast<ICorJitInfo::HandleHistogram32*>(&m_profileMemory[countEntry.Offset]);
+            assert(reinterpret_cast<uint8_t*>(&h32->Count) == &m_profileMemory[countEntry.Offset]);
+            assert(reinterpret_cast<uint8_t*>(h32->HandleTable) == &m_profileMemory[tableEntry.Offset]);
+        }
+        else
+        {
+            ICorJitInfo::HandleHistogram64* h64 =
+                reinterpret_cast<ICorJitInfo::HandleHistogram64*>(&m_profileMemory[countEntry.Offset]);
+            assert(reinterpret_cast<uint8_t*>(&h64->Count) == &m_profileMemory[countEntry.Offset]);
+            assert(reinterpret_cast<uint8_t*>(h64->HandleTable) == &m_profileMemory[tableEntry.Offset]);
+        }
+#endif
+
+        *m_currentSchemaIndex += 2;
+    }
 };
 
 //------------------------------------------------------------------------
-// ClassProbeInstrumentor: instrumentor that adds a class probe to each
+// HandleHistogramProbeInstrumentor: instrumentor that adds a class probe to each
 //   virtual call in the basic block
 //
-class ClassProbeInstrumentor : public Instrumentor
+class HandleHistogramProbeInstrumentor : public Instrumentor
 {
 public:
-    ClassProbeInstrumentor(Compiler* comp) : Instrumentor(comp)
+    HandleHistogramProbeInstrumentor(Compiler* comp) : Instrumentor(comp)
     {
     }
     bool ShouldProcess(BasicBlock* block) override
@@ -1623,13 +1741,13 @@ public:
 };
 
 //------------------------------------------------------------------------
-// ClassProbeInstrumentor::Prepare: prepare for class instrumentation
+// HandleHistogramProbeInstrumentor::Prepare: prepare for class instrumentation
 //
 // Arguments:
 //   preImport - true if this is the prepare call that happens before
 //      importation
 //
-void ClassProbeInstrumentor::Prepare(bool isPreImport)
+void HandleHistogramProbeInstrumentor::Prepare(bool isPreImport)
 {
     if (isPreImport)
     {
@@ -1641,33 +1759,33 @@ void ClassProbeInstrumentor::Prepare(bool isPreImport)
     //
     for (BasicBlock* const block : m_comp->Blocks())
     {
-        block->bbClassSchemaIndex = -1;
+        block->bbHistogramSchemaIndex = -1;
     }
 #endif
 }
 
 //------------------------------------------------------------------------
-// ClassProbeInstrumentor::BuildSchemaElements: create schema elements for a class probe
+// HandleHistogramProbeInstrumentor::BuildSchemaElements: create schema elements for a class probe
 //
 // Arguments:
 //   block -- block to instrument
 //   schema -- schema that we're building
 //
-void ClassProbeInstrumentor::BuildSchemaElements(BasicBlock* block, Schema& schema)
+void HandleHistogramProbeInstrumentor::BuildSchemaElements(BasicBlock* block, Schema& schema)
 {
-    if ((block->bbFlags & BBF_HAS_CLASS_PROFILE) == 0)
+    if ((block->bbFlags & BBF_HAS_HISTOGRAM_PROFILE) == 0)
     {
         return;
     }
 
     // Remember the schema index for this block.
     //
-    block->bbClassSchemaIndex = (int)schema.size();
+    block->bbHistogramSchemaIndex = (int)schema.size();
 
     // Scan the statements and identify the class probes
     //
-    BuildClassProbeSchemaGen                    schemaGen(schema, m_schemaCount);
-    ClassProbeVisitor<BuildClassProbeSchemaGen> visitor(m_comp, schemaGen);
+    BuildHandleHistogramProbeSchemaGen                              schemaGen(schema, m_schemaCount);
+    HandleHistogramProbeVisitor<BuildHandleHistogramProbeSchemaGen> visitor(m_comp, schemaGen);
     for (Statement* const stmt : block->Statements())
     {
         visitor.WalkTree(stmt->GetRootNodePointer(), nullptr);
@@ -1675,16 +1793,16 @@ void ClassProbeInstrumentor::BuildSchemaElements(BasicBlock* block, Schema& sche
 }
 
 //------------------------------------------------------------------------
-// ClassProbeInstrumentor::Instrument: add class probes to block
+// HandleHistogramProbeInstrumentor::Instrument: add class probes to block
 //
 // Arguments:
 //   block -- block of interest
 //   schema -- instrumentation schema
 //   profileMemory -- profile data slab
 //
-void ClassProbeInstrumentor::Instrument(BasicBlock* block, Schema& schema, uint8_t* profileMemory)
+void HandleHistogramProbeInstrumentor::Instrument(BasicBlock* block, Schema& schema, uint8_t* profileMemory)
 {
-    if ((block->bbFlags & BBF_HAS_CLASS_PROFILE) == 0)
+    if ((block->bbFlags & BBF_HAS_HISTOGRAM_PROFILE) == 0)
     {
         return;
     }
@@ -1696,11 +1814,11 @@ void ClassProbeInstrumentor::Instrument(BasicBlock* block, Schema& schema, uint8
 
     // Scan the statements and add class probes
     //
-    int classSchemaIndex = block->bbClassSchemaIndex;
-    assert((classSchemaIndex >= 0) && (classSchemaIndex < (int)schema.size()));
+    int histogramSchemaIndex = block->bbHistogramSchemaIndex;
+    assert((histogramSchemaIndex >= 0) && (histogramSchemaIndex < (int)schema.size()));
 
-    ClassProbeInserter                    insertProbes(schema, profileMemory, &classSchemaIndex, m_instrCount);
-    ClassProbeVisitor<ClassProbeInserter> visitor(m_comp, insertProbes);
+    HandleHistogramProbeInserter insertProbes(schema, profileMemory, &histogramSchemaIndex, m_instrCount);
+    HandleHistogramProbeVisitor<HandleHistogramProbeInserter> visitor(m_comp, insertProbes);
     for (Statement* const stmt : block->Statements())
     {
         visitor.WalkTree(stmt->GetRootNodePointer(), nullptr);
@@ -1789,24 +1907,25 @@ PhaseStatus Compiler::fgPrepareToInstrumentMethod()
     // Enable class profiling by default, when jitting.
     // Todo: we may also want this on by default for prejitting.
     //
-    const bool useClassProfiles = (JitConfig.JitClassProfiling() > 0) && !prejit;
-    if (useClassProfiles)
+    const bool useClassProfiles    = (JitConfig.JitClassProfiling() > 0);
+    const bool useDelegateProfiles = (JitConfig.JitDelegateProfiling() > 0);
+    const bool useVTableProfiles   = (JitConfig.JitVTableProfiling() > 0);
+    if (!prejit && (useClassProfiles || useDelegateProfiles || useVTableProfiles))
     {
-        fgClassInstrumentor = new (this, CMK_Pgo) ClassProbeInstrumentor(this);
+        fgHistogramInstrumentor = new (this, CMK_Pgo) HandleHistogramProbeInstrumentor(this);
     }
     else
     {
-        JITDUMP("Not doing class profiling, because %s\n",
-                (JitConfig.JitClassProfiling() > 0) ? "class profiles disabled" : "prejit");
+        JITDUMP("Not doing class/method profiling, because %s\n", prejit ? "prejit" : "class/method profiles disabled");
 
-        fgClassInstrumentor = new (this, CMK_Pgo) NonInstrumentor(this);
+        fgHistogramInstrumentor = new (this, CMK_Pgo) NonInstrumentor(this);
     }
 
     // Make pre-import preparations.
     //
     const bool isPreImport = true;
     fgCountInstrumentor->Prepare(isPreImport);
-    fgClassInstrumentor->Prepare(isPreImport);
+    fgHistogramInstrumentor->Prepare(isPreImport);
 
     return PhaseStatus::MODIFIED_NOTHING;
 }
@@ -1835,7 +1954,7 @@ PhaseStatus Compiler::fgInstrumentMethod()
     //
     const bool isPreImport = false;
     fgCountInstrumentor->Prepare(isPreImport);
-    fgClassInstrumentor->Prepare(isPreImport);
+    fgHistogramInstrumentor->Prepare(isPreImport);
 
     // Walk the flow graph to build up the instrumentation schema.
     //
@@ -1847,27 +1966,12 @@ PhaseStatus Compiler::fgInstrumentMethod()
             fgCountInstrumentor->BuildSchemaElements(block, schema);
         }
 
-        if (fgClassInstrumentor->ShouldProcess(block))
+        if (fgHistogramInstrumentor->ShouldProcess(block))
         {
-            fgClassInstrumentor->BuildSchemaElements(block, schema);
+            fgHistogramInstrumentor->BuildSchemaElements(block, schema);
         }
     }
 
-    // Verify we created schema for the calls needing class probes.
-    // (we counted those when importing)
-    //
-    // This is not true when we do partial compilation; it can/will erase class probes,
-    // and there's no easy way to figure out how many should be left.
-    //
-    if (doesMethodHavePartialCompilationPatchpoints())
-    {
-        assert(fgClassInstrumentor->SchemaCount() <= info.compClassProbeCount);
-    }
-    else
-    {
-        assert(fgClassInstrumentor->SchemaCount() == info.compClassProbeCount);
-    }
-
     // Optionally, when jitting, if there were no class probes and only one count probe,
     // suppress instrumentation.
     //
@@ -1887,7 +1991,7 @@ PhaseStatus Compiler::fgInstrumentMethod()
         minimalProbeMode = (JitConfig.JitMinimalJitProfiling() > 0);
     }
 
-    if (minimalProbeMode && (fgCountInstrumentor->SchemaCount() == 1) && (fgClassInstrumentor->SchemaCount() == 0))
+    if (minimalProbeMode && (fgCountInstrumentor->SchemaCount() == 1) && (fgHistogramInstrumentor->SchemaCount() == 0))
     {
         JITDUMP(
             "Not instrumenting method: minimal probing enabled, and method has only one counter and no class probes\n");
@@ -1895,7 +1999,7 @@ PhaseStatus Compiler::fgInstrumentMethod()
     }
 
     JITDUMP("Instrumenting method: %d count probes and %d class probes\n", fgCountInstrumentor->SchemaCount(),
-            fgClassInstrumentor->SchemaCount());
+            fgHistogramInstrumentor->SchemaCount());
 
     assert(schema.size() > 0);
 
@@ -1928,7 +2032,7 @@ PhaseStatus Compiler::fgInstrumentMethod()
         // Do any cleanup we might need to do...
         //
         fgCountInstrumentor->SuppressProbes();
-        fgClassInstrumentor->SuppressProbes();
+        fgHistogramInstrumentor->SuppressProbes();
 
         // If we needed to create cheap preds, we're done with them now.
         //
@@ -1939,7 +2043,7 @@ PhaseStatus Compiler::fgInstrumentMethod()
 
         // We may have modified control flow preparing for instrumentation.
         //
-        const bool modifiedFlow = fgCountInstrumentor->ModifiedFlow() || fgClassInstrumentor->ModifiedFlow();
+        const bool modifiedFlow = fgCountInstrumentor->ModifiedFlow() || fgHistogramInstrumentor->ModifiedFlow();
         return modifiedFlow ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING;
     }
 
@@ -1954,22 +2058,25 @@ PhaseStatus Compiler::fgInstrumentMethod()
             fgCountInstrumentor->Instrument(block, schema, profileMemory);
         }
 
-        if (fgClassInstrumentor->ShouldProcess(block))
+        if (fgHistogramInstrumentor->ShouldProcess(block))
         {
-            fgClassInstrumentor->Instrument(block, schema, profileMemory);
+            fgHistogramInstrumentor->Instrument(block, schema, profileMemory);
         }
     }
 
     // Verify we instrumented everthing we created schemas for.
     //
     assert(fgCountInstrumentor->InstrCount() == fgCountInstrumentor->SchemaCount());
-    assert(fgClassInstrumentor->InstrCount() == fgClassInstrumentor->SchemaCount());
+
+    // Verify we instrumented for each probe
+    //
+    assert(fgHistogramInstrumentor->InstrCount() == info.compHandleHistogramProbeCount);
 
     // Add any special entry instrumentation. This does not
     // use the schema mechanism.
     //
     fgCountInstrumentor->InstrumentMethodEntry(schema, profileMemory);
-    fgClassInstrumentor->InstrumentMethodEntry(schema, profileMemory);
+    fgHistogramInstrumentor->InstrumentMethodEntry(schema, profileMemory);
 
     // If we needed to create cheap preds, we're done with them now.
     //
@@ -2052,6 +2159,10 @@ PhaseStatus Compiler::fgIncorporateProfileData()
                 fgPgoClassProfiles++;
                 break;
 
+            case ICorJitInfo::PgoInstrumentationKind::GetLikelyMethod:
+                fgPgoMethodProfiles++;
+                break;
+
             case ICorJitInfo::PgoInstrumentationKind::HandleHistogramIntCount:
             case ICorJitInfo::PgoInstrumentationKind::HandleHistogramLongCount:
                 if (iSchema + 1 < fgPgoSchemaCount)
index 8ef60e8..5b88e32 100644 (file)
@@ -150,7 +150,7 @@ struct BasicBlock;
 enum BasicBlockFlags : unsigned __int64;
 struct InlineCandidateInfo;
 struct GuardedDevirtualizationCandidateInfo;
-struct ClassProfileCandidateInfo;
+struct HandleHistogramProfileCandidateInfo;
 struct LateDevirtualizationInfo;
 
 typedef unsigned short AssertionIndex;
@@ -5391,7 +5391,7 @@ struct GenTreeCall final : public GenTree
         // gtInlineCandidateInfo is only used when inlining methods
         InlineCandidateInfo*                  gtInlineCandidateInfo;
         GuardedDevirtualizationCandidateInfo* gtGuardedDevirtualizationCandidateInfo;
-        ClassProfileCandidateInfo*            gtClassProfileCandidateInfo;
+        HandleHistogramProfileCandidateInfo*  gtHandleHistogramProfileCandidateInfo;
         LateDevirtualizationInfo*             gtLateDevirtualizationInfo;
         CORINFO_GENERIC_HANDLE compileTimeHelperArgumentHandle; // Used to track type handle argument of dynamic helpers
         void*                  gtDirectCallAddress; // Used to pass direct call address between lower and codegen
index 64be510..e101511 100644 (file)
@@ -10070,11 +10070,23 @@ var_types Compiler::impImportCall(OPCODE                  opcode,
         call->gtFlags |= obj->gtFlags & GTF_GLOB_EFFECT;
         call->AsCall()->gtArgs.PushFront(this, NewCallArg::Primitive(obj).WellKnown(WellKnownArg::ThisPointer));
 
-        // Is this a virtual or interface call?
+        if (impIsThis(obj))
+        {
+            call->AsCall()->gtCallMoreFlags |= GTF_CALL_M_NONVIRT_SAME_THIS;
+        }
+    }
+
+    bool probing;
+    probing = impConsiderCallProbe(call->AsCall(), rawILOffset);
+
+    // See if we can devirt if we aren't probing.
+    if (!probing && opts.OptimizationEnabled())
+    {
         if (call->AsCall()->IsVirtual())
         {
             // only true object pointers can be virtual
-            assert(obj->gtType == TYP_REF);
+            assert(call->AsCall()->gtArgs.HasThisPointer() &&
+                   call->AsCall()->gtArgs.GetThisArg()->GetNode()->TypeIs(TYP_REF));
 
             // See if we can devirtualize.
 
@@ -10090,10 +10102,10 @@ var_types Compiler::impImportCall(OPCODE                  opcode,
             //
             methHnd = callInfo->hMethod;
         }
-
-        if (impIsThis(obj))
+        else if (call->AsCall()->IsDelegateInvoke())
         {
-            call->AsCall()->gtCallMoreFlags |= GTF_CALL_M_NONVIRT_SAME_THIS;
+            considerGuardedDevirtualization(call->AsCall(), rawILOffset, false, NO_METHOD_HANDLE, NO_CLASS_HANDLE,
+                                            nullptr);
         }
     }
 
@@ -10530,7 +10542,7 @@ DONE_CALL:
                 // important devirtualizations, we'll want to allow both a class probe and a captured context.
                 //
                 if (origCall->IsVirtual() && (origCall->gtCallType != CT_INDIRECT) && (exactContextHnd != nullptr) &&
-                    (origCall->gtClassProfileCandidateInfo == nullptr))
+                    (origCall->gtHandleHistogramProfileCandidateInfo == nullptr))
                 {
                     JITDUMP("\nSaving context %p for call [%06u]\n", exactContextHnd, dspTreeID(origCall));
                     origCall->gtCallMoreFlags |= GTF_CALL_M_HAS_LATE_DEVIRT_INFO;
@@ -12140,10 +12152,10 @@ GenTree* Compiler::impCastClassOrIsInstToTree(
         // Check if this cast helper have some profile data
         if (impIsCastHelperMayHaveProfileData(helper))
         {
-            bool              doRandomDevirt   = false;
-            const int         maxLikelyClasses = 32;
-            int               likelyClassCount = 0;
-            LikelyClassRecord likelyClasses[maxLikelyClasses];
+            bool                    doRandomDevirt   = false;
+            const int               maxLikelyClasses = 32;
+            int                     likelyClassCount = 0;
+            LikelyClassMethodRecord likelyClasses[maxLikelyClasses];
 #ifdef DEBUG
             // Optional stress mode to pick a random known class, rather than
             // the most likely known class.
@@ -12154,11 +12166,14 @@ GenTree* Compiler::impCastClassOrIsInstToTree(
                 // Reuse the random inliner's random state.
                 CLRRandom* const random =
                     impInlineRoot()->m_inlineStrategy->GetRandom(JitConfig.JitRandomGuardedDevirtualization());
-                likelyClasses[0].clsHandle = getRandomClass(fgPgoSchema, fgPgoSchemaCount, fgPgoData, ilOffset, random);
-                likelyClasses[0].likelihood = 100;
-                if (likelyClasses[0].clsHandle != NO_CLASS_HANDLE)
+                CORINFO_CLASS_HANDLE  clsGuess;
+                CORINFO_METHOD_HANDLE methGuess;
+                getRandomGDV(fgPgoSchema, fgPgoSchemaCount, fgPgoData, ilOffset, random, &clsGuess, &methGuess);
+                if (clsGuess != NO_CLASS_HANDLE)
                 {
-                    likelyClassCount = 1;
+                    likelyClasses[0].likelihood = 100;
+                    likelyClasses[0].handle     = (intptr_t)clsGuess;
+                    likelyClassCount            = 1;
                 }
             }
             else
@@ -12170,8 +12185,8 @@ GenTree* Compiler::impCastClassOrIsInstToTree(
 
             if (likelyClassCount > 0)
             {
-                LikelyClassRecord    likelyClass = likelyClasses[0];
-                CORINFO_CLASS_HANDLE likelyCls   = likelyClass.clsHandle;
+                LikelyClassMethodRecord likelyClass = likelyClasses[0];
+                CORINFO_CLASS_HANDLE    likelyCls   = (CORINFO_CLASS_HANDLE)likelyClass.handle;
 
                 if ((likelyCls != NO_CLASS_HANDLE) &&
                     (likelyClass.likelihood > (UINT32)JitConfig.JitGuardedDevirtualizationChainLikelihood()))
@@ -12206,13 +12221,14 @@ GenTree* Compiler::impCastClassOrIsInstToTree(
         op2->gtFlags |= GTF_DONT_CSE;
 
         GenTreeCall* call = gtNewHelperCallNode(helper, TYP_REF, op2, op1);
-        if (impIsCastHelperEligibleForClassProbe(call) && !impIsClassExact(pResolvedToken->hClass))
+        if ((JitConfig.JitClassProfiling() > 0) && impIsCastHelperEligibleForClassProbe(call) &&
+            !impIsClassExact(pResolvedToken->hClass))
         {
-            ClassProfileCandidateInfo* pInfo  = new (this, CMK_Inlining) ClassProfileCandidateInfo;
-            pInfo->ilOffset                   = ilOffset;
-            pInfo->probeIndex                 = info.compClassProbeCount++;
-            call->gtClassProfileCandidateInfo = pInfo;
-            compCurBB->bbFlags |= BBF_HAS_CLASS_PROFILE;
+            HandleHistogramProfileCandidateInfo* pInfo  = new (this, CMK_Inlining) HandleHistogramProfileCandidateInfo;
+            pInfo->ilOffset                             = ilOffset;
+            pInfo->probeIndex                           = info.compHandleHistogramProbeCount++;
+            call->gtHandleHistogramProfileCandidateInfo = pInfo;
+            compCurBB->bbFlags |= BBF_HAS_HISTOGRAM_PROFILE;
         }
         return call;
     }
@@ -21004,7 +21020,7 @@ void Compiler::impMarkInlineCandidateHelper(GenTreeCall*           call,
 
     // Delegate Invoke method doesn't have a body and gets special cased instead.
     // Don't even bother trying to inline it.
-    if (call->IsDelegateInvoke())
+    if (call->IsDelegateInvoke() && !call->IsGuardedDevirtualizationCandidate())
     {
         inlineResult.NoteFatal(InlineObservation::CALLEE_HAS_NO_BODY);
         return;
@@ -21389,51 +21405,7 @@ void Compiler::impDevirtualizeCall(GenTreeCall*            call,
     // This should be a virtual vtable or virtual stub call.
     //
     assert(call->IsVirtual());
-
-    // Possibly instrument. Note for OSR+PGO we will instrument when
-    // optimizing and (currently) won't devirtualize. We may want
-    // to revisit -- if we can devirtualize we should be able to
-    // suppress the probe.
-    //
-    // We strip BBINSTR from inlinees currently, so we'll only
-    // do this for the root method calls.
-    //
-    if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_BBINSTR))
-    {
-        assert(opts.OptimizationDisabled() || opts.IsOSR());
-        assert(!compIsForInlining());
-
-        // During importation, optionally flag this block as one that
-        // contains calls requiring class profiling. Ideally perhaps
-        // we'd just keep track of the calls themselves, so we don't
-        // have to search for them later.
-        //
-        if ((call->gtCallType != CT_INDIRECT) && opts.jitFlags->IsSet(JitFlags::JIT_FLAG_BBINSTR) &&
-            !opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT) && (JitConfig.JitClassProfiling() > 0) &&
-            !isLateDevirtualization)
-        {
-            JITDUMP("\n ... marking [%06u] in " FMT_BB " for class profile instrumentation\n", dspTreeID(call),
-                    compCurBB->bbNum);
-            ClassProfileCandidateInfo* pInfo = new (this, CMK_Inlining) ClassProfileCandidateInfo;
-
-            // Record some info needed for the class profiling probe.
-            //
-            pInfo->ilOffset                   = ilOffset;
-            pInfo->probeIndex                 = info.compClassProbeCount++;
-            call->gtClassProfileCandidateInfo = pInfo;
-
-            // Flag block as needing scrutiny
-            //
-            compCurBB->bbFlags |= BBF_HAS_CLASS_PROFILE;
-        }
-        return;
-    }
-
-    // Bail if optimizations are disabled.
-    if (opts.OptimizationDisabled())
-    {
-        return;
-    }
+    assert(opts.OptimizationEnabled());
 
 #if defined(DEBUG)
     // Bail if devirt is disabled.
@@ -21525,8 +21497,7 @@ void Compiler::impDevirtualizeCall(GenTreeCall*            call,
             return;
         }
 
-        considerGuardedDevirtualization(call, ilOffset, isInterface, baseMethod, baseClass,
-                                        pContextHandle DEBUGARG(objClass) DEBUGARG("unknown"));
+        considerGuardedDevirtualization(call, ilOffset, isInterface, baseMethod, baseClass, pContextHandle);
 
         return;
     }
@@ -21576,8 +21547,7 @@ void Compiler::impDevirtualizeCall(GenTreeCall*            call,
             return;
         }
 
-        considerGuardedDevirtualization(call, ilOffset, isInterface, baseMethod, baseClass,
-                                        pContextHandle DEBUGARG(objClass) DEBUGARG(objClassName));
+        considerGuardedDevirtualization(call, ilOffset, isInterface, baseMethod, baseClass, pContextHandle);
         return;
     }
 
@@ -21693,8 +21663,7 @@ void Compiler::impDevirtualizeCall(GenTreeCall*            call,
             return;
         }
 
-        considerGuardedDevirtualization(call, ilOffset, isInterface, baseMethod, baseClass,
-                                        pContextHandle DEBUGARG(objClass) DEBUGARG(objClassName));
+        considerGuardedDevirtualization(call, ilOffset, isInterface, baseMethod, baseClass, pContextHandle);
         return;
     }
 
@@ -21714,6 +21683,7 @@ void Compiler::impDevirtualizeCall(GenTreeCall*            call,
     call->gtFlags &= ~GTF_CALL_VIRT_STUB;
     call->gtCallMethHnd = derivedMethod;
     call->gtCallType    = CT_USER_FUNC;
+    call->gtControlExpr = nullptr;
     call->gtCallMoreFlags |= GTF_CALL_M_DEVIRTUALIZED;
 
     // Virtual calls include an implicit null check, which we may
@@ -21755,14 +21725,14 @@ void Compiler::impDevirtualizeCall(GenTreeCall*            call,
     if (JitConfig.JitCrossCheckDevirtualizationAndPGO() && canSensiblyCheck)
     {
         // We only can handle a single likely class for now
-        const int         maxLikelyClasses = 1;
-        LikelyClassRecord likelyClasses[maxLikelyClasses];
+        const int               maxLikelyClasses = 1;
+        LikelyClassMethodRecord likelyClasses[maxLikelyClasses];
 
         UINT32 numberOfClasses =
             getLikelyClasses(likelyClasses, maxLikelyClasses, fgPgoSchema, fgPgoSchemaCount, fgPgoData, ilOffset);
         UINT32 likelihood = likelyClasses[0].likelihood;
 
-        CORINFO_CLASS_HANDLE likelyClass = likelyClasses[0].clsHandle;
+        CORINFO_CLASS_HANDLE likelyClass = (CORINFO_CLASS_HANDLE)likelyClasses[0].handle;
 
         if (numberOfClasses > 0)
         {
@@ -22054,6 +22024,117 @@ void Compiler::impDevirtualizeCall(GenTreeCall*            call,
 }
 
 //------------------------------------------------------------------------
+// impConsiderCallProbe: Consider whether a call should get a histogram probe
+// and mark it if so.
+//
+// Arguments:
+//     call - The call
+//     ilOffset - The precise IL offset of the call
+//
+// Returns:
+//     True if the call was marked such that we will add a class or method probe for it.
+//
+bool Compiler::impConsiderCallProbe(GenTreeCall* call, IL_OFFSET ilOffset)
+{
+    // Possibly instrument. Note for OSR+PGO we will instrument when
+    // optimizing and (currently) won't devirtualize. We may want
+    // to revisit -- if we can devirtualize we should be able to
+    // suppress the probe.
+    //
+    // We strip BBINSTR from inlinees currently, so we'll only
+    // do this for the root method calls.
+    //
+    if (!opts.jitFlags->IsSet(JitFlags::JIT_FLAG_BBINSTR))
+    {
+        return false;
+    }
+
+    assert(opts.OptimizationDisabled() || opts.IsOSR());
+    assert(!compIsForInlining());
+
+    // During importation, optionally flag this block as one that
+    // contains calls requiring class profiling. Ideally perhaps
+    // we'd just keep track of the calls themselves, so we don't
+    // have to search for them later.
+    //
+    if (compClassifyGDVProbeType(call) == GDVProbeType::None)
+    {
+        return false;
+    }
+
+    JITDUMP("\n ... marking [%06u] in " FMT_BB " for method/class profile instrumentation\n", dspTreeID(call),
+            compCurBB->bbNum);
+    HandleHistogramProfileCandidateInfo* pInfo = new (this, CMK_Inlining) HandleHistogramProfileCandidateInfo;
+
+    // Record some info needed for the class profiling probe.
+    //
+    pInfo->ilOffset                             = ilOffset;
+    pInfo->probeIndex                           = info.compHandleHistogramProbeCount++;
+    call->gtHandleHistogramProfileCandidateInfo = pInfo;
+
+    // Flag block as needing scrutiny
+    //
+    compCurBB->bbFlags |= BBF_HAS_HISTOGRAM_PROFILE;
+    return true;
+}
+
+//------------------------------------------------------------------------
+// compClassifyGDVProbeType:
+//   Classify the type of GDV probe to use for a call site.
+//
+// Arguments:
+//     call - The call
+//
+// Returns:
+//     The type of probe to use.
+//
+Compiler::GDVProbeType Compiler::compClassifyGDVProbeType(GenTreeCall* call)
+{
+    if (call->gtCallType == CT_INDIRECT)
+    {
+        return GDVProbeType::None;
+    }
+
+    if (!opts.jitFlags->IsSet(JitFlags::JIT_FLAG_BBINSTR) || opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT))
+    {
+        return GDVProbeType::None;
+    }
+
+    bool createTypeHistogram = false;
+    if (JitConfig.JitClassProfiling() > 0)
+    {
+        createTypeHistogram = call->IsVirtualStub() || call->IsVirtualVtable();
+
+        // Cast helpers may conditionally (depending on whether the class is
+        // exact or not) have probes. For those helpers we do not use this
+        // function to classify the probe type until after we have decided on
+        // whether we probe them or not.
+        createTypeHistogram = createTypeHistogram || (impIsCastHelperEligibleForClassProbe(call) &&
+                                                      (call->gtHandleHistogramProfileCandidateInfo != nullptr));
+    }
+
+    bool createMethodHistogram = ((JitConfig.JitDelegateProfiling() > 0) && call->IsDelegateInvoke()) ||
+                                 ((JitConfig.JitVTableProfiling() > 0) && call->IsVirtualVtable());
+
+    if (createTypeHistogram && createMethodHistogram)
+    {
+        return GDVProbeType::MethodAndClassProfile;
+    }
+
+    if (createTypeHistogram)
+    {
+        return GDVProbeType::ClassProfile;
+    }
+
+    if (createMethodHistogram)
+    {
+        return GDVProbeType::MethodProfile;
+    }
+
+    return GDVProbeType::None;
+}
+
+//------------------------------------------------------------------------
 // impGetSpecialIntrinsicExactReturnType: Look for special cases where a call
 //   to an intrinsic returns an exact type
 //
@@ -22063,7 +22144,6 @@ void Compiler::impDevirtualizeCall(GenTreeCall*            call,
 // Returns:
 //     Exact class handle returned by the intrinsic call, if known.
 //     Nullptr if not known, or not likely to lead to beneficial optimization.
-
 CORINFO_CLASS_HANDLE Compiler::impGetSpecialIntrinsicExactReturnType(CORINFO_METHOD_HANDLE methodHnd)
 {
     JITDUMP("Special intrinsic: looking for exact type returned by %s\n", eeGetMethodFullName(methodHnd));
@@ -22225,153 +22305,378 @@ void Compiler::addFatPointerCandidate(GenTreeCall* call)
 }
 
 //------------------------------------------------------------------------
-// considerGuardedDevirtualization: see if we can profitably guess at the
-//    class involved in an interface or virtual call.
+// pickGDV: Use profile information to pick a GDV candidate for a call site.
 //
 // Arguments:
+//    call        - the call
+//    ilOffset    - exact IL offset of the call
+//    isInterface - whether or not the call target is defined on an interface
+//    classGuess  - [out] the class to guess for (mutually exclusive with methodGuess)
+//    methodGuess - [out] the method to guess for (mutually exclusive with classGuess)
+//    likelihood  - [out] an estimate of the likelihood that the guess will succeed
 //
-//    call - potential guarded devirtualization candidate
-//    ilOffset - IL ofset of the call instruction
-//    isInterface - true if this is an interface call
-//    baseMethod - target method of the call
-//    baseClass - class that introduced the target method
-//    pContextHandle - context handle for the call
-//    objClass - class of 'this' in the call
-//    objClassName - name of the obj Class
-//
-// Notes:
-//    Consults with VM to see if there's a likely class at runtime,
-//    if so, adds a candidate for guarded devirtualization.
-//
-void Compiler::considerGuardedDevirtualization(
-    GenTreeCall*            call,
-    IL_OFFSET               ilOffset,
-    bool                    isInterface,
-    CORINFO_METHOD_HANDLE   baseMethod,
-    CORINFO_CLASS_HANDLE    baseClass,
-    CORINFO_CONTEXT_HANDLE* pContextHandle DEBUGARG(CORINFO_CLASS_HANDLE objClass) DEBUGARG(const char* objClassName))
+void Compiler::pickGDV(GenTreeCall*           call,
+                       IL_OFFSET              ilOffset,
+                       bool                   isInterface,
+                       CORINFO_CLASS_HANDLE*  classGuess,
+                       CORINFO_METHOD_HANDLE* methodGuess,
+                       unsigned*              likelihood)
 {
-#if defined(DEBUG)
-    const char* callKind = isInterface ? "interface" : "virtual";
-#endif
+    *classGuess  = NO_CLASS_HANDLE;
+    *methodGuess = NO_METHOD_HANDLE;
+    *likelihood  = 0;
 
-    JITDUMP("Considering guarded devirtualization at IL offset %u (0x%x)\n", ilOffset, ilOffset);
+    const int               maxLikelyClasses = 32;
+    LikelyClassMethodRecord likelyClasses[maxLikelyClasses];
+    unsigned                numberOfClasses = 0;
+    if (call->IsVirtualStub() || call->IsVirtualVtable())
+    {
+        numberOfClasses =
+            getLikelyClasses(likelyClasses, maxLikelyClasses, fgPgoSchema, fgPgoSchemaCount, fgPgoData, ilOffset);
+    }
 
-    // We currently only get likely class guesses when there is PGO data
-    // with class profiles.
+    const int               maxLikelyMethods = 32;
+    LikelyClassMethodRecord likelyMethods[maxLikelyMethods];
+    unsigned                numberOfMethods = 0;
+
+    // TODO-GDV: R2R support requires additional work to reacquire the
+    // entrypoint, similar to what happens at the end of impDevirtualizeCall.
+    // As part of supporting this we should merge the tail of
+    // impDevirtualizeCall and what happens in
+    // GuardedDevirtualizationTransformer::CreateThen for method GDV.
     //
-    if (fgPgoClassProfiles == 0)
+    if (!opts.IsReadyToRun() && (call->IsVirtualVtable() || call->IsDelegateInvoke()))
+    {
+        numberOfMethods =
+            getLikelyMethods(likelyMethods, maxLikelyMethods, fgPgoSchema, fgPgoSchemaCount, fgPgoData, ilOffset);
+    }
+
+    if ((numberOfClasses < 1) && (numberOfMethods < 1))
     {
-        JITDUMP("Not guessing for class: no class profile pgo data, or pgo disabled\n");
+        JITDUMP("No likely class or method, sorry\n");
         return;
     }
 
-    // See if there's a likely guess for the class.
-    //
-    const unsigned likelihoodThreshold = isInterface ? 25 : 30;
-    unsigned       likelihood          = 0;
-    unsigned       numberOfClasses     = 0;
+#ifdef DEBUG
+    if ((verbose || JitConfig.EnableExtraSuperPmiQueries()) && (numberOfClasses > 0))
+    {
+        bool                 isExact;
+        bool                 isNonNull;
+        CallArg*             thisArg            = call->gtArgs.GetThisArg();
+        CORINFO_CLASS_HANDLE declaredThisClsHnd = gtGetClassHandle(thisArg->GetNode(), &isExact, &isNonNull);
+        JITDUMP("Likely classes for call [%06u]", dspTreeID(call));
+        if (declaredThisClsHnd != NO_CLASS_HANDLE)
+        {
+            const char* baseClassName = eeGetClassName(declaredThisClsHnd);
+            JITDUMP(" on class %p (%s)", declaredThisClsHnd, baseClassName);
+        }
+        JITDUMP("\n");
+
+        for (UINT32 i = 0; i < numberOfClasses; i++)
+        {
+            const char* className = eeGetClassName((CORINFO_CLASS_HANDLE)likelyClasses[i].handle);
+            JITDUMP("  %u) %p (%s) [likelihood:%u%%]\n", i + 1, likelyClasses[i].handle, className,
+                    likelyClasses[i].likelihood);
+        }
+    }
 
-    CORINFO_CLASS_HANDLE likelyClass = NO_CLASS_HANDLE;
+    if ((verbose || JitConfig.EnableExtraSuperPmiQueries()) && (numberOfMethods > 0))
+    {
+        assert(call->gtCallType == CT_USER_FUNC);
+        const char* baseMethName = eeGetMethodFullName(call->gtCallMethHnd);
+        JITDUMP("Likely methods for call [%06u] to method %s\n", dspTreeID(call), baseMethName);
 
-    bool doRandomDevirt = false;
+        for (UINT32 i = 0; i < numberOfMethods; i++)
+        {
+            CORINFO_CONST_LOOKUP lookup = {};
+            info.compCompHnd->getFunctionFixedEntryPoint((CORINFO_METHOD_HANDLE)likelyMethods[i].handle, false,
+                                                         &lookup);
 
-    const int         maxLikelyClasses = 32;
-    LikelyClassRecord likelyClasses[maxLikelyClasses];
+            const char* methName = eeGetMethodFullName((CORINFO_METHOD_HANDLE)likelyMethods[i].handle);
+            switch (lookup.accessType)
+            {
+                case IAT_VALUE:
+                    JITDUMP("  %u) %p (%s) [likelihood:%u%%]\n", i + 1, lookup.addr, methName,
+                            likelyMethods[i].likelihood);
+                    break;
+                case IAT_PVALUE:
+                    JITDUMP("  %u) [%p] (%s) [likelihood:%u%%]\n", i + 1, lookup.addr, methName,
+                            likelyMethods[i].likelihood);
+                    break;
+                case IAT_PPVALUE:
+                    JITDUMP("  %u) [[%p]] (%s) [likelihood:%u%%]\n", i + 1, lookup.addr, methName,
+                            likelyMethods[i].likelihood);
+                    break;
+                default:
+                    JITDUMP("  %u) %s [likelihood:%u%%]\n", i + 1, methName, likelyMethods[i].likelihood);
+                    break;
+            }
+        }
+    }
 
-#ifdef DEBUG
     // Optional stress mode to pick a random known class, rather than
     // the most likely known class.
     //
-    doRandomDevirt = JitConfig.JitRandomGuardedDevirtualization() != 0;
-
-    if (doRandomDevirt)
+    if (JitConfig.JitRandomGuardedDevirtualization() != 0)
     {
         // Reuse the random inliner's random state.
         //
         CLRRandom* const random =
             impInlineRoot()->m_inlineStrategy->GetRandom(JitConfig.JitRandomGuardedDevirtualization());
-        likelyClasses[0].clsHandle  = getRandomClass(fgPgoSchema, fgPgoSchemaCount, fgPgoData, ilOffset, random);
-        likelyClasses[0].likelihood = 100;
-        if (likelyClasses[0].clsHandle != NO_CLASS_HANDLE)
+        // TODO-GDV: This can be simplified to just use likelyClasses and
+        // likelyMethods now that we have multiple candidates here.
+        getRandomGDV(fgPgoSchema, fgPgoSchemaCount, fgPgoData, ilOffset, random, classGuess, methodGuess);
+        if (*classGuess != NO_CLASS_HANDLE)
+        {
+            JITDUMP("Picked random class for GDV: %p (%s)\n", *classGuess, eeGetClassName(*classGuess));
+            return;
+        }
+        if (*methodGuess != NO_METHOD_HANDLE)
         {
-            numberOfClasses = 1;
+            JITDUMP("Picked random method for GDV: %p (%s)\n", *methodGuess, eeGetMethodFullName(*methodGuess));
+            return;
         }
     }
-    else
 #endif
+
+    // Prefer class guess as it is cheaper
+    if (numberOfClasses > 0)
     {
-        numberOfClasses =
-            getLikelyClasses(likelyClasses, maxLikelyClasses, fgPgoSchema, fgPgoSchemaCount, fgPgoData, ilOffset);
+        unsigned likelihoodThreshold = isInterface ? 25 : 30;
+        if (likelyClasses[0].likelihood >= likelihoodThreshold)
+        {
+            *classGuess = (CORINFO_CLASS_HANDLE)likelyClasses[0].handle;
+            *likelihood = likelyClasses[0].likelihood;
+            return;
+        }
+
+        JITDUMP("Not guessing for class; likelihood is below %s call threshold %u\n",
+                isInterface ? "interface" : "virtual", likelihoodThreshold);
     }
 
-    // For now we only use the most popular type
+    if (numberOfMethods > 0)
+    {
+        unsigned likelihoodThreshold = 30;
+        if (likelyMethods[0].likelihood >= likelihoodThreshold)
+        {
+            *methodGuess = (CORINFO_METHOD_HANDLE)likelyMethods[0].handle;
+            *likelihood  = likelyMethods[0].likelihood;
+            return;
+        }
 
-    likelihood  = likelyClasses[0].likelihood;
-    likelyClass = likelyClasses[0].clsHandle;
+        JITDUMP("Not guessing for method; likelihood is below %s call threshold %u\n",
+                call->IsDelegateInvoke() ? "delegate" : "virtual", likelihoodThreshold);
+    }
+}
 
-    if (numberOfClasses < 1)
+//------------------------------------------------------------------------
+// isCompatibleMethodGDV:
+//    Check if devirtualizing a call node as a specified target method call is
+//    reasonable.
+//
+// Arguments:
+//    call - the call
+//    gdvTarget - the target method that we want to guess for and devirtualize to
+//
+// Returns:
+//    true if we can proceed with GDV.
+//
+// Notes:
+//    This implements a small simplified signature-compatibility check to
+//    verify that a guess is reasonable. The main goal here is to avoid blowing
+//    up the JIT on PGO data with stale GDV candidates; if they are not
+//    compatible in the ECMA sense then we do not expect the guard to ever pass
+//    at runtime, so we can get by with simplified rules here.
+//
+bool Compiler::isCompatibleMethodGDV(GenTreeCall* call, CORINFO_METHOD_HANDLE gdvTarget)
+{
+    CORINFO_SIG_INFO sig;
+    info.compCompHnd->getMethodSig(gdvTarget, &sig);
+
+    CORINFO_ARG_LIST_HANDLE sigParam  = sig.args;
+    unsigned                numParams = sig.numArgs;
+    unsigned                numArgs   = 0;
+    for (CallArg& arg : call->gtArgs.Args())
     {
-        JITDUMP("No likely class, sorry\n");
-        return;
-    }
+        switch (arg.GetWellKnownArg())
+        {
+            case WellKnownArg::RetBuffer:
+            case WellKnownArg::ThisPointer:
+                // Not part of signature but we still expect to see it here
+                continue;
+            case WellKnownArg::None:
+                break;
+            default:
+                assert(!"Unexpected well known arg to method GDV candidate");
+                continue;
+        }
+
+        numArgs++;
+        if (numArgs > numParams)
+        {
+            JITDUMP("Incompatible method GDV: call [%06u] has more arguments than signature (sig has %d parameters)\n",
+                    dspTreeID(call), numParams);
+            return false;
+        }
+
+        CORINFO_CLASS_HANDLE classHnd = NO_CLASS_HANDLE;
+        CorInfoType          corType  = strip(info.compCompHnd->getArgType(&sig, sigParam, &classHnd));
+        var_types            sigType  = JITtype2varType(corType);
+
+        if (!impCheckImplicitArgumentCoercion(sigType, arg.GetNode()->TypeGet()))
+        {
+            JITDUMP("Incompatible method GDV: arg [%06u] is type-incompatible with signature of target\n",
+                    dspTreeID(arg.GetNode()));
+            return false;
+        }
 
-    assert(likelyClass != NO_CLASS_HANDLE);
+        // Best-effort check for struct compatibility here.
+        if (varTypeIsStruct(sigType) && (arg.GetSignatureClassHandle() != classHnd))
+        {
+            ClassLayout* callLayout = typGetObjLayout(arg.GetSignatureClassHandle());
+            ClassLayout* tarLayout  = typGetObjLayout(classHnd);
 
-    // Print all likely classes
-    JITDUMP("%s classes for %p (%s):\n", doRandomDevirt ? "Random" : "Likely", dspPtr(objClass), objClassName)
-    for (UINT32 i = 0; i < numberOfClasses; i++)
+            if (!ClassLayout::AreCompatible(callLayout, tarLayout))
+            {
+                JITDUMP("Incompatible method GDV: struct arg [%06u] is layout-incompatible with signature of target\n",
+                        dspTreeID(arg.GetNode()));
+                return false;
+            }
+        }
+
+        sigParam = info.compCompHnd->getArgNext(sigParam);
+    }
+
+    if (numArgs < numParams)
     {
-        JITDUMP("  %u) %p (%s) [likelihood:%u%%]\n", i + 1, likelyClasses[i].clsHandle,
-                eeGetClassName(likelyClasses[i].clsHandle), likelyClasses[i].likelihood);
+        JITDUMP("Incompatible method GDV: call [%06u] has fewer arguments (%d) than signature (%d)\n", dspTreeID(call),
+                numArgs, numParams);
+        return false;
     }
 
-    // Todo: a more advanced heuristic using likelihood, number of
-    // classes, and the profile count for this block.
-    //
-    // For now we will guess if the likelihood is at least 25%/30% (intfc/virt), as studies
-    // have shown this transformation should pay off even if we guess wrong sometimes.
+    return true;
+}
+
+//------------------------------------------------------------------------
+// considerGuardedDevirtualization: see if we can profitably guess at the
+//    class involved in an interface or virtual call.
+//
+// Arguments:
+//
+//    call - potential guarded devirtualization candidate
+//    ilOffset - IL ofset of the call instruction
+//    baseMethod - target method of the call
+//    baseClass - class that introduced the target method
+//    pContextHandle - context handle for the call
+//
+// Notes:
+//    Consults with VM to see if there's a likely class at runtime,
+//    if so, adds a candidate for guarded devirtualization.
+//
+void Compiler::considerGuardedDevirtualization(GenTreeCall*            call,
+                                               IL_OFFSET               ilOffset,
+                                               bool                    isInterface,
+                                               CORINFO_METHOD_HANDLE   baseMethod,
+                                               CORINFO_CLASS_HANDLE    baseClass,
+                                               CORINFO_CONTEXT_HANDLE* pContextHandle)
+{
+    JITDUMP("Considering guarded devirtualization at IL offset %u (0x%x)\n", ilOffset, ilOffset);
+
+    // We currently only get likely class guesses when there is PGO data
+    // with class profiles.
     //
-    if (likelihood < likelihoodThreshold)
+    if ((fgPgoClassProfiles == 0) && (fgPgoMethodProfiles == 0))
     {
-        JITDUMP("Not guessing for class; likelihood is below %s call threshold %u\n", callKind, likelihoodThreshold);
+        JITDUMP("Not guessing for class or method: no GDV profile pgo data, or pgo disabled\n");
         return;
     }
 
-    uint32_t const likelyClassAttribs = info.compCompHnd->getClassAttribs(likelyClass);
+    CORINFO_CLASS_HANDLE  likelyClass;
+    CORINFO_METHOD_HANDLE likelyMethod;
+    unsigned              likelihood;
+    pickGDV(call, ilOffset, isInterface, &likelyClass, &likelyMethod, &likelihood);
 
-    if ((likelyClassAttribs & CORINFO_FLG_ABSTRACT) != 0)
+    if ((likelyClass == NO_CLASS_HANDLE) && (likelyMethod == NO_METHOD_HANDLE))
     {
-        // We may see an abstract likely class, if we have a stale profile.
-        // No point guessing for this.
-        //
-        JITDUMP("Not guessing for class; abstract (stale profile)\n");
         return;
     }
 
-    // Figure out which method will be called.
-    //
-    CORINFO_DEVIRTUALIZATION_INFO dvInfo;
-    dvInfo.virtualMethod               = baseMethod;
-    dvInfo.objClass                    = likelyClass;
-    dvInfo.context                     = *pContextHandle;
-    dvInfo.exactContext                = *pContextHandle;
-    dvInfo.pResolvedTokenVirtualMethod = nullptr;
+    uint32_t likelyClassAttribs = 0;
+    if (likelyClass != NO_CLASS_HANDLE)
+    {
+        likelyClassAttribs = info.compCompHnd->getClassAttribs(likelyClass);
 
-    const bool canResolve = info.compCompHnd->resolveVirtualMethod(&dvInfo);
+        if ((likelyClassAttribs & CORINFO_FLG_ABSTRACT) != 0)
+        {
+            // We may see an abstract likely class, if we have a stale profile.
+            // No point guessing for this.
+            //
+            JITDUMP("Not guessing for class; abstract (stale profile)\n");
+            return;
+        }
+
+        // Figure out which method will be called.
+        //
+        CORINFO_DEVIRTUALIZATION_INFO dvInfo;
+        dvInfo.virtualMethod               = baseMethod;
+        dvInfo.objClass                    = likelyClass;
+        dvInfo.context                     = *pContextHandle;
+        dvInfo.exactContext                = *pContextHandle;
+        dvInfo.pResolvedTokenVirtualMethod = nullptr;
+
+        const bool canResolve = info.compCompHnd->resolveVirtualMethod(&dvInfo);
+
+        if (!canResolve)
+        {
+            JITDUMP("Can't figure out which method would be invoked, sorry\n");
+            return;
+        }
 
-    if (!canResolve)
+        likelyMethod = dvInfo.devirtualizedMethod;
+    }
+
+    uint32_t likelyMethodAttribs = info.compCompHnd->getMethodAttribs(likelyMethod);
+
+    if (likelyClass == NO_CLASS_HANDLE)
     {
-        JITDUMP("Can't figure out which method would be invoked, sorry\n");
-        return;
+        // For method GDV do a few more checks that we get for free in the
+        // resolve call above for class-based GDV.
+        if ((likelyMethodAttribs & CORINFO_FLG_STATIC) != 0)
+        {
+            assert(call->IsDelegateInvoke());
+            JITDUMP("Cannot currently handle devirtualizing static delegate calls, sorry\n");
+            return;
+        }
+
+        // Verify that the call target and args look reasonable so that the JIT
+        // does not blow up during inlining/call morphing.
+        //
+        // NOTE: Once we want to support devirtualization of delegate calls to
+        // static methods and remove the check above we will start failing here
+        // for delegates pointing to static methods that have the first arg
+        // bound. For example:
+        //
+        // public static void E(this C c) ...
+        // Action a = new C().E;
+        //
+        // The delegate instance looks exactly like one pointing to an instance
+        // method in this case and the call will have zero args while the
+        // signature has 1 arg.
+        //
+        if (!isCompatibleMethodGDV(call, likelyMethod))
+        {
+            JITDUMP("Target for method-based GDV is incompatible (stale profile?)\n");
+            assert((fgPgoSource != ICorJitInfo::PgoSource::Dynamic) && "Unexpected stale profile in dynamic PGO data");
+            return;
+        }
     }
 
-    CORINFO_METHOD_HANDLE likelyMethod = dvInfo.devirtualizedMethod;
-    JITDUMP("%s call would invoke method %s\n", callKind, eeGetMethodName(likelyMethod, nullptr));
+    JITDUMP("%s call would invoke method %s\n",
+            isInterface ? "interface" : call->IsDelegateInvoke() ? "delegate" : "virtual",
+            eeGetMethodName(likelyMethod, nullptr));
 
     // Add this as a potential candidate.
     //
-    uint32_t const likelyMethodAttribs = info.compCompHnd->getMethodAttribs(likelyMethod);
     addGuardedDevirtualizationCandidate(call, likelyMethod, likelyClass, likelyMethodAttribs, likelyClassAttribs,
                                         likelihood);
 }
@@ -22404,8 +22709,8 @@ void Compiler::addGuardedDevirtualizationCandidate(GenTreeCall*          call,
                                                    unsigned              classAttr,
                                                    unsigned              likelihood)
 {
-    // This transformation only makes sense for virtual calls
-    assert(call->IsVirtual());
+    // This transformation only makes sense for delegate and virtual calls
+    assert(call->IsDelegateInvoke() || call->IsVirtual());
 
     // Only mark calls if the feature is enabled.
     const bool isEnabled = JitConfig.JitEnableGuardedDevirtualization() > 0;
@@ -22455,8 +22760,9 @@ void Compiler::addGuardedDevirtualizationCandidate(GenTreeCall*          call,
 
     // We're all set, proceed with candidate creation.
     //
-    JITDUMP("Marking call [%06u] as guarded devirtualization candidate; will guess for class %s\n", dspTreeID(call),
-            eeGetClassName(classHandle));
+    JITDUMP("Marking call [%06u] as guarded devirtualization candidate; will guess for %s %s\n", dspTreeID(call),
+            classHandle != NO_CLASS_HANDLE ? "class" : "method",
+            classHandle != NO_CLASS_HANDLE ? eeGetClassName(classHandle) : eeGetMethodFullName(methodHandle));
     setMethodHasGuardedDevirtualization();
     call->SetGuardedDevirtualizationCandidate();
 
index 14242b8..e3d799f 100644 (file)
@@ -450,9 +450,11 @@ private:
 
     class GuardedDevirtualizationTransformer final : public Transformer
     {
+        unsigned m_targetLclNum;
+
     public:
         GuardedDevirtualizationTransformer(Compiler* compiler, BasicBlock* block, Statement* stmt)
-            : Transformer(compiler, block, stmt), returnTemp(BAD_VAR_NUM)
+            : Transformer(compiler, block, stmt), m_targetLclNum(BAD_VAR_NUM), returnTemp(BAD_VAR_NUM)
         {
         }
 
@@ -538,23 +540,26 @@ private:
             checkBlock             = currBlock;
             checkBlock->bbJumpKind = BBJ_COND;
 
-            // Fetch method table from object arg to call.
-            GenTree* thisTree = compiler->gtCloneExpr(origCall->gtArgs.GetThisArg()->GetNode());
+            CallArg* thisArg  = origCall->gtArgs.GetThisArg();
+            GenTree* thisTree = thisArg->GetNode();
 
             // Create temp for this if the tree is costly.
-            if (!thisTree->IsLocal())
+            if (thisTree->IsLocal())
+            {
+                thisTree = compiler->gtCloneExpr(thisTree);
+            }
+            else
             {
                 const unsigned thisTempNum = compiler->lvaGrabTemp(true DEBUGARG("guarded devirt this temp"));
-                // lvaSetClass(thisTempNum, ...);
-                GenTree*   asgTree = compiler->gtNewTempAssign(thisTempNum, thisTree);
-                Statement* asgStmt = compiler->fgNewStmtFromTree(asgTree, stmt->GetDebugInfo());
+                GenTree*       asgTree     = compiler->gtNewTempAssign(thisTempNum, thisTree);
+                Statement*     asgStmt     = compiler->fgNewStmtFromTree(asgTree, stmt->GetDebugInfo());
                 compiler->fgInsertStmtAtEnd(checkBlock, asgStmt);
 
                 thisTree = compiler->gtNewLclvNode(thisTempNum, TYP_REF);
 
                 // Propagate the new this to the call. Must be a new expr as the call
                 // will live on in the else block and thisTree is used below.
-                origCall->gtArgs.GetThisArg()->SetEarlyNode(compiler->gtNewLclvNode(thisTempNum, TYP_REF));
+                thisArg->SetEarlyNode(compiler->gtNewLclvNode(thisTempNum, TYP_REF));
             }
 
             // Remember the current last statement. If we're doing a chained GDV, we'll clone/copy
@@ -565,18 +570,96 @@ private:
             //
             lastStmt = checkBlock->lastStmt();
 
-            // Find target method table
-            //
-            GenTree*                              methodTable       = compiler->gtNewMethodTableLookup(thisTree);
-            GuardedDevirtualizationCandidateInfo* guardedInfo       = origCall->gtGuardedDevirtualizationCandidateInfo;
-            CORINFO_CLASS_HANDLE                  clsHnd            = guardedInfo->guardedClassHandle;
-            GenTree*                              targetMethodTable = compiler->gtNewIconEmbClsHndNode(clsHnd);
+            GuardedDevirtualizationCandidateInfo* guardedInfo = origCall->gtGuardedDevirtualizationCandidateInfo;
 
-            // Compare and jump to else (which does the indirect call) if NOT equal
-            //
-            GenTree*   methodTableCompare = compiler->gtNewOperNode(GT_NE, TYP_INT, targetMethodTable, methodTable);
-            GenTree*   jmpTree            = compiler->gtNewOperNode(GT_JTRUE, TYP_VOID, methodTableCompare);
-            Statement* jmpStmt            = compiler->fgNewStmtFromTree(jmpTree, stmt->GetDebugInfo());
+            // Create comparison. On success we will jump to do the indirect call.
+            GenTree* compare;
+            if (guardedInfo->guardedClassHandle != NO_CLASS_HANDLE)
+            {
+                // Find target method table
+                //
+                GenTree*             methodTable       = compiler->gtNewMethodTableLookup(thisTree);
+                CORINFO_CLASS_HANDLE clsHnd            = guardedInfo->guardedClassHandle;
+                GenTree*             targetMethodTable = compiler->gtNewIconEmbClsHndNode(clsHnd);
+
+                compare = compiler->gtNewOperNode(GT_NE, TYP_INT, targetMethodTable, methodTable);
+            }
+            else
+            {
+                assert(origCall->IsVirtualVtable() || origCall->IsDelegateInvoke());
+                // We reuse the target except if this is a chained GDV, in
+                // which case the check will be moved into the success case of
+                // a previous GDV and thus may not execute when we hit the cold
+                // path.
+                // TODO-GDV: Consider duplicating the store at the end of the
+                // cold case for the previous GDV. Then we can reuse the target
+                // if the second check of a chained GDV fails.
+                bool reuseTarget = (origCall->gtCallMoreFlags & GTF_CALL_M_GUARDED_DEVIRT_CHAIN) == 0;
+                if (origCall->IsVirtualVtable())
+                {
+                    GenTree* tarTree = compiler->fgExpandVirtualVtableCallTarget(origCall);
+
+                    if (reuseTarget)
+                    {
+                        m_targetLclNum = compiler->lvaGrabTemp(false DEBUGARG("guarded devirt call target temp"));
+
+                        GenTree*   asgTree = compiler->gtNewTempAssign(m_targetLclNum, tarTree);
+                        Statement* asgStmt = compiler->fgNewStmtFromTree(asgTree, stmt->GetDebugInfo());
+                        compiler->fgInsertStmtAtEnd(checkBlock, asgStmt);
+
+                        tarTree = compiler->gtNewLclvNode(m_targetLclNum, TYP_I_IMPL);
+                    }
+
+                    CORINFO_METHOD_HANDLE methHnd = guardedInfo->guardedMethodHandle;
+                    CORINFO_CONST_LOOKUP  lookup;
+                    compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &lookup);
+
+                    GenTree* compareTarTree = CreateTreeForLookup(methHnd, lookup);
+                    compare                 = compiler->gtNewOperNode(GT_NE, TYP_INT, compareTarTree, tarTree);
+                }
+                else
+                {
+                    // Reusing the call target for delegates is more
+                    // complicated. Essentially we need to do the
+                    // transformation done in LowerDelegateInvoke by converting
+                    // the call to CT_INDIRECT and reusing the target address.
+                    // We will do that transformation in CreateElse, but here
+                    // we need to stash the target.
+                    CLANG_FORMAT_COMMENT_ANCHOR;
+#ifdef TARGET_ARM
+                    // Not impossible to support, but would additionally
+                    // require us to load the wrapper delegate cell when
+                    // expanding.
+                    reuseTarget &= (origCall->gtCallMoreFlags & GTF_CALL_M_WRAPPER_DELEGATE_INV) == 0;
+#endif
+
+                    GenTree* offset =
+                        compiler->gtNewIconNode((ssize_t)compiler->eeGetEEInfo()->offsetOfDelegateFirstTarget,
+                                                TYP_I_IMPL);
+                    GenTree* tarTree = compiler->gtNewOperNode(GT_ADD, TYP_BYREF, thisTree, offset);
+                    tarTree          = compiler->gtNewIndir(TYP_I_IMPL, tarTree);
+
+                    if (reuseTarget)
+                    {
+                        m_targetLclNum = compiler->lvaGrabTemp(false DEBUGARG("guarded devirt call target temp"));
+
+                        GenTree*   asgTree = compiler->gtNewTempAssign(m_targetLclNum, tarTree);
+                        Statement* asgStmt = compiler->fgNewStmtFromTree(asgTree, stmt->GetDebugInfo());
+                        compiler->fgInsertStmtAtEnd(checkBlock, asgStmt);
+                        tarTree = compiler->gtNewLclvNode(m_targetLclNum, TYP_I_IMPL);
+                    }
+
+                    CORINFO_METHOD_HANDLE methHnd = guardedInfo->guardedMethodHandle;
+                    CORINFO_CONST_LOOKUP  lookup;
+                    compiler->info.compCompHnd->getFunctionFixedEntryPoint(methHnd, false, &lookup);
+
+                    GenTree* compareTarTree = CreateTreeForLookup(methHnd, lookup);
+                    compare                 = compiler->gtNewOperNode(GT_NE, TYP_INT, compareTarTree, tarTree);
+                }
+            }
+
+            GenTree*   jmpTree = compiler->gtNewOperNode(GT_JTRUE, TYP_VOID, compare);
+            Statement* jmpStmt = compiler->fgNewStmtFromTree(jmpTree, stmt->GetDebugInfo());
             compiler->fgInsertStmtAtEnd(checkBlock, jmpStmt);
         }
 
@@ -682,35 +765,94 @@ private:
             InlineCandidateInfo* inlineInfo = origCall->gtInlineCandidateInfo;
             CORINFO_CLASS_HANDLE clsHnd     = inlineInfo->guardedClassHandle;
 
-            // copy 'this' to temp with exact type.
+            //
+            // Copy the 'this' for the devirtualized call to a new temp. For
+            // class-based GDV this will allow us to set the exact type on that
+            // temp. For delegate GDV, this will be the actual 'this' object
+            // stored in the delegate.
+            //
             const unsigned thisTemp  = compiler->lvaGrabTemp(false DEBUGARG("guarded devirt this exact temp"));
             GenTree*       clonedObj = compiler->gtCloneExpr(origCall->gtArgs.GetThisArg()->GetNode());
-            GenTree*       assign    = compiler->gtNewTempAssign(thisTemp, clonedObj);
-            compiler->lvaSetClass(thisTemp, clsHnd, true);
+            GenTree*       newThisObj;
+            if (origCall->IsDelegateInvoke())
+            {
+                GenTree* offset =
+                    compiler->gtNewIconNode((ssize_t)compiler->eeGetEEInfo()->offsetOfDelegateInstance, TYP_I_IMPL);
+                newThisObj = compiler->gtNewOperNode(GT_ADD, TYP_BYREF, clonedObj, offset);
+                newThisObj = compiler->gtNewIndir(TYP_REF, newThisObj);
+            }
+            else
+            {
+                newThisObj = clonedObj;
+            }
+            GenTree* assign = compiler->gtNewTempAssign(thisTemp, newThisObj);
+
+            if (clsHnd != NO_CLASS_HANDLE)
+            {
+                compiler->lvaSetClass(thisTemp, clsHnd, true);
+            }
+            else
+            {
+                compiler->lvaSetClass(thisTemp,
+                                      compiler->info.compCompHnd->getMethodClass(inlineInfo->guardedMethodHandle));
+            }
+
             compiler->fgNewStmtAtEnd(thenBlock, assign);
 
-            // Clone call. Note we must use the special candidate helper.
+            // Clone call for the devirtualized case. Note we must use the
+            // special candidate helper and we need to use the new 'this'.
             GenTreeCall* call = compiler->gtCloneCandidateCall(origCall);
             call->gtArgs.GetThisArg()->SetEarlyNode(compiler->gtNewLclvNode(thisTemp, TYP_REF));
             call->SetIsGuarded();
 
             JITDUMP("Direct call [%06u] in block " FMT_BB "\n", compiler->dspTreeID(call), thenBlock->bbNum);
 
-            // Then invoke impDevirtualizeCall to actually transform the call for us,
-            // given the original (base) method and the exact guarded class. It should succeed.
-            //
-            CORINFO_METHOD_HANDLE  methodHnd              = call->gtCallMethHnd;
-            unsigned               methodFlags            = compiler->info.compCompHnd->getMethodAttribs(methodHnd);
-            CORINFO_CONTEXT_HANDLE context                = inlineInfo->exactContextHnd;
-            const bool             isLateDevirtualization = true;
-            const bool explicitTailCall = (call->AsCall()->gtCallMoreFlags & GTF_CALL_M_EXPLICIT_TAILCALL) != 0;
-            compiler->impDevirtualizeCall(call, nullptr, &methodHnd, &methodFlags, &context, nullptr,
-                                          isLateDevirtualization, explicitTailCall);
+            CORINFO_METHOD_HANDLE  methodHnd = call->gtCallMethHnd;
+            CORINFO_CONTEXT_HANDLE context   = inlineInfo->exactContextHnd;
+            if (clsHnd != NO_CLASS_HANDLE)
+            {
+                // Then invoke impDevirtualizeCall to actually transform the call for us,
+                // given the original (base) method and the exact guarded class. It should succeed.
+                //
+                unsigned   methodFlags            = compiler->info.compCompHnd->getMethodAttribs(methodHnd);
+                const bool isLateDevirtualization = true;
+                const bool explicitTailCall = (call->AsCall()->gtCallMoreFlags & GTF_CALL_M_EXPLICIT_TAILCALL) != 0;
+                compiler->impDevirtualizeCall(call, nullptr, &methodHnd, &methodFlags, &context, nullptr,
+                                              isLateDevirtualization, explicitTailCall);
+            }
+            else
+            {
+                // Otherwise we know the exact method already, so just change
+                // the call as necessary here.
+                call->gtFlags &= ~GTF_CALL_VIRT_KIND_MASK;
+                call->gtCallMethHnd = methodHnd = inlineInfo->guardedMethodHandle;
+                call->gtCallType                = CT_USER_FUNC;
+                call->gtCallMoreFlags |= GTF_CALL_M_DEVIRTUALIZED;
+                call->gtCallMoreFlags &= ~GTF_CALL_M_DELEGATE_INV;
+                // TODO-GDV: To support R2R we need to get the entry point
+                // here. We should unify with the tail of impDevirtualizeCall.
+
+                if (origCall->IsVirtual())
+                {
+                    // Virtual calls include an implicit null check, which we may
+                    // now need to make explicit.
+                    bool isExact;
+                    bool objIsNonNull;
+                    compiler->gtGetClassHandle(newThisObj, &isExact, &objIsNonNull);
+
+                    if (!objIsNonNull)
+                    {
+                        call->gtFlags |= GTF_CALL_NULLCHECK;
+                    }
+                }
+
+                context = MAKE_METHODCONTEXT(methodHnd);
+            }
 
             // We know this call can devirtualize or we would not have set up GDV here.
-            // So impDevirtualizeCall should succeed in devirtualizing.
+            // So above code should succeed in devirtualizing.
             //
-            assert(!call->IsVirtual());
+            assert(!call->IsVirtual() && !call->IsDelegateInvoke());
 
             // If the devirtualizer was unable to transform the call to invoke the unboxed entry, the inline info
             // we set up may be invalid. We won't be able to inline anyways. So demote the call as an inline candidate.
@@ -776,7 +918,7 @@ private:
         }
 
         //------------------------------------------------------------------------
-        // CreateElse: create else block. This executes the unaltered indirect call.
+        // CreateElse: create else block. This executes the original indirect call.
         //
         virtual void CreateElse()
         {
@@ -796,6 +938,38 @@ private:
                 newStmt->SetRootNode(assign);
             }
 
+            if (m_targetLclNum != BAD_VAR_NUM)
+            {
+                if (call->IsVirtualVtable())
+                {
+                    // We already loaded the target once for the check, so reuse it from the temp.
+                    call->gtControlExpr = compiler->gtNewLclvNode(m_targetLclNum, TYP_I_IMPL);
+                    call->SetExpandedEarly();
+                }
+                else if (call->IsDelegateInvoke())
+                {
+                    // Target was saved into a temp during check. We expand the
+                    // delegate call to a CT_INDIRECT call that uses the target
+                    // directly, somewhat similarly to LowerDelegateInvoke.
+                    call->gtCallType   = CT_INDIRECT;
+                    call->gtCallAddr   = compiler->gtNewLclvNode(m_targetLclNum, TYP_I_IMPL);
+                    call->gtCallCookie = nullptr;
+                    call->gtCallMoreFlags &= ~GTF_CALL_M_DELEGATE_INV;
+
+                    GenTree* thisOffset =
+                        compiler->gtNewIconNode((ssize_t)compiler->eeGetEEInfo()->offsetOfDelegateInstance, TYP_I_IMPL);
+                    CallArg* thisArg     = call->gtArgs.GetThisArg();
+                    GenTree* delegateObj = thisArg->GetNode();
+
+                    assert(delegateObj->OperIsLocal());
+                    GenTree* newThis =
+                        compiler->gtNewOperNode(GT_ADD, TYP_BYREF, compiler->gtCloneExpr(delegateObj), thisOffset);
+                    newThis = compiler->gtNewIndir(TYP_REF, newThis);
+
+                    thisArg->SetEarlyNode(newThis);
+                }
+            }
+
             compiler->fgInsertStmtAtEnd(elseBlock, newStmt);
 
             // Set the original statement to a nop.
@@ -1005,6 +1179,62 @@ private:
     private:
         unsigned   returnTemp;
         Statement* lastStmt;
+
+        //------------------------------------------------------------------------
+        // CreateTreeForLookup: Create a tree representing a lookup of a method address.
+        //
+        // Arguments:
+        //   methHnd - the handle for the method the lookup is for
+        //   lookup  - lookup information for the address
+        //
+        // Returns:
+        //   A node representing the lookup.
+        //
+        GenTree* CreateTreeForLookup(CORINFO_METHOD_HANDLE methHnd, const CORINFO_CONST_LOOKUP& lookup)
+        {
+            switch (lookup.accessType)
+            {
+                case IAT_VALUE:
+                {
+                    return CreateFunctionTargetAddr(methHnd, lookup);
+                }
+                case IAT_PVALUE:
+                {
+                    GenTree* tree = CreateFunctionTargetAddr(methHnd, lookup);
+                    tree          = compiler->gtNewIndir(TYP_I_IMPL, tree);
+                    tree->gtFlags |= GTF_IND_NONFAULTING | GTF_IND_INVARIANT;
+                    tree->gtFlags &= ~GTF_EXCEPT;
+                    return tree;
+                }
+                case IAT_PPVALUE:
+                {
+                    noway_assert(!"Unexpected IAT_PPVALUE");
+                    return nullptr;
+                }
+                case IAT_RELPVALUE:
+                {
+                    GenTree* addr = CreateFunctionTargetAddr(methHnd, lookup);
+                    GenTree* tree = CreateFunctionTargetAddr(methHnd, lookup);
+                    tree          = compiler->gtNewIndir(TYP_I_IMPL, tree);
+                    tree->gtFlags |= GTF_IND_NONFAULTING | GTF_IND_INVARIANT;
+                    tree->gtFlags &= ~GTF_EXCEPT;
+                    tree = compiler->gtNewOperNode(GT_ADD, TYP_I_IMPL, tree, addr);
+                    return tree;
+                }
+                default:
+                {
+                    noway_assert(!"Bad accessType");
+                    return nullptr;
+                }
+            }
+        }
+
+        GenTree* CreateFunctionTargetAddr(CORINFO_METHOD_HANDLE methHnd, const CORINFO_CONST_LOOKUP& lookup)
+        {
+            GenTree* con = compiler->gtNewIconHandleNode((size_t)lookup.addr, GTF_ICON_FTN_ADDR);
+            INDEBUG(con->AsIntCon()->gtTargetHandle = (size_t)methHnd);
+            return con;
+        }
     };
 
     // Runtime lookup with dynamic dictionary expansion transformer,
index 763f5bc..f21a77d 100644 (file)
@@ -577,10 +577,10 @@ private:
     bool                  m_reportFailureAsVmFailure;
 };
 
-// ClassProfileCandidateInfo provides information about
+// HandleHistogramProfileCandidateInfo provides information about
 // profiling an indirect or virtual call.
 //
-struct ClassProfileCandidateInfo
+struct HandleHistogramProfileCandidateInfo
 {
     IL_OFFSET ilOffset;
     unsigned  probeIndex;
@@ -589,7 +589,7 @@ struct ClassProfileCandidateInfo
 // GuardedDevirtualizationCandidateInfo provides information about
 // a potential target of a virtual or interface call.
 //
-struct GuardedDevirtualizationCandidateInfo : ClassProfileCandidateInfo
+struct GuardedDevirtualizationCandidateInfo : HandleHistogramProfileCandidateInfo
 {
     CORINFO_CLASS_HANDLE  guardedClassHandle;
     CORINFO_METHOD_HANDLE guardedMethodHandle;
index cf0e799..110c079 100644 (file)
@@ -327,8 +327,9 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 
 typedef class ICorJitInfo* COMP_HANDLE;
 
-const CORINFO_CLASS_HANDLE NO_CLASS_HANDLE = nullptr;
-const CORINFO_FIELD_HANDLE NO_FIELD_HANDLE = nullptr;
+const CORINFO_CLASS_HANDLE  NO_CLASS_HANDLE  = nullptr;
+const CORINFO_FIELD_HANDLE  NO_FIELD_HANDLE  = nullptr;
+const CORINFO_METHOD_HANDLE NO_METHOD_HANDLE = nullptr;
 
 /*****************************************************************************/
 
@@ -839,19 +840,26 @@ T dspOffset(T o)
 
 #endif // !defined(DEBUG)
 
-struct LikelyClassRecord
+struct LikelyClassMethodRecord
 {
-    CORINFO_CLASS_HANDLE clsHandle;
-    UINT32               likelihood;
+    intptr_t handle;
+    UINT32   likelihood;
 };
 
-extern "C" UINT32 WINAPI getLikelyClasses(LikelyClassRecord*                     pLikelyClasses,
+extern "C" UINT32 WINAPI getLikelyClasses(LikelyClassMethodRecord*               pLikelyClasses,
                                           UINT32                                 maxLikelyClasses,
                                           ICorJitInfo::PgoInstrumentationSchema* schema,
                                           UINT32                                 countSchemaItems,
                                           BYTE*                                  pInstrumentationData,
                                           int32_t                                ilOffset);
 
+extern "C" UINT32 WINAPI getLikelyMethods(LikelyClassMethodRecord*               pLikelyMethods,
+                                          UINT32                                 maxLikelyMethods,
+                                          ICorJitInfo::PgoInstrumentationSchema* schema,
+                                          UINT32                                 countSchemaItems,
+                                          BYTE*                                  pInstrumentationData,
+                                          int32_t                                ilOffset);
+
 /*****************************************************************************/
 #endif //_JIT_H_
 /*****************************************************************************/
index 569b030..aa562c2 100644 (file)
@@ -552,6 +552,8 @@ CONFIG_INTEGER(JitConsumeProfileForCasts, W("JitConsumeProfileForCasts"), 1) //
                                                                              // castclass/isinst
 
 CONFIG_INTEGER(JitClassProfiling, W("JitClassProfiling"), 1)         // Profile virtual and interface calls
+CONFIG_INTEGER(JitDelegateProfiling, W("JitDelegateProfiling"), 1)   // Profile resolved delegate call targets
+CONFIG_INTEGER(JitVTableProfiling, W("JitVTableProfiling"), 0)       // Profile resolved vtable call targets
 CONFIG_INTEGER(JitEdgeProfiling, W("JitEdgeProfiling"), 1)           // Profile edges instead of blocks
 CONFIG_INTEGER(JitCollect64BitCounts, W("JitCollect64BitCounts"), 0) // Collect counts as 64-bit values.
 
index 632c9ce..277d382 100644 (file)
@@ -26,45 +26,45 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 
 // Data item in class profile histogram
 //
-struct LikelyClassHistogramEntry
+struct LikelyClassMethodHistogramEntry
 {
-    // Class that was observed at runtime
-    INT_PTR m_mt; // This may be an "unknown type handle"
+    // Handle that was observed at runtime
+    INT_PTR m_handle; // This may be an "unknown handle"
     // Number of observations in the table
     unsigned m_count;
 };
 
 // Summarizes a ClassProfile table by forming a Histogram
 //
-struct LikelyClassHistogram
+struct LikelyClassMethodHistogram
 {
-    LikelyClassHistogram(INT_PTR* histogramEntries, unsigned entryCount);
+    LikelyClassMethodHistogram(INT_PTR* histogramEntries, unsigned entryCount);
 
     // Sum of counts from all entries in the histogram. This includes "unknown" entries which are not captured in
     // m_histogram
     unsigned m_totalCount;
-    // Rough guess at count of unknown types
-    unsigned m_unknownTypes;
+    // Rough guess at count of unknown handles
+    unsigned m_unknownHandles;
     // Histogram entries, in no particular order.
-    LikelyClassHistogramEntry m_histogram[HISTOGRAM_MAX_SIZE_COUNT];
-    UINT32                    countHistogramElements = 0;
+    LikelyClassMethodHistogramEntry m_histogram[HISTOGRAM_MAX_SIZE_COUNT];
+    UINT32                          countHistogramElements = 0;
 
-    LikelyClassHistogramEntry HistogramEntryAt(unsigned index)
+    LikelyClassMethodHistogramEntry HistogramEntryAt(unsigned index)
     {
         return m_histogram[index];
     }
 };
 
 //------------------------------------------------------------------------
-// LikelyClassHistogram::LikelyClassHistgram: construct a new histogram
+// LikelyClassMethodHistogram::LikelyClassMethodHistgram: construct a new histogram
 //
 // Arguments:
 //    histogramEntries - pointer to the table portion of a ClassProfile* object (see corjit.h)
 //    entryCount - number of entries in the table to examine
 //
-LikelyClassHistogram::LikelyClassHistogram(INT_PTR* histogramEntries, unsigned entryCount)
+LikelyClassMethodHistogram::LikelyClassMethodHistogram(INT_PTR* histogramEntries, unsigned entryCount)
 {
-    m_unknownTypes                 = 0;
+    m_unknownHandles               = 0;
     m_totalCount                   = 0;
     uint32_t unknownTypeHandleMask = 0;
 
@@ -83,7 +83,7 @@ LikelyClassHistogram::LikelyClassHistogram(INT_PTR* histogramEntries, unsigned e
         unsigned h     = 0;
         for (; h < countHistogramElements; h++)
         {
-            if (m_histogram[h].m_mt == currentEntry)
+            if (m_histogram[h].m_handle == currentEntry)
             {
                 m_histogram[h].m_count++;
                 found = true;
@@ -97,8 +97,8 @@ LikelyClassHistogram::LikelyClassHistogram(INT_PTR* histogramEntries, unsigned e
             {
                 continue;
             }
-            LikelyClassHistogramEntry newEntry;
-            newEntry.m_mt                         = currentEntry;
+            LikelyClassMethodHistogramEntry newEntry;
+            newEntry.m_handle                     = currentEntry;
             newEntry.m_count                      = 1;
             m_histogram[countHistogramElements++] = newEntry;
         }
@@ -106,42 +106,28 @@ LikelyClassHistogram::LikelyClassHistogram(INT_PTR* histogramEntries, unsigned e
 }
 
 //------------------------------------------------------------------------
-// getLikelyClasses: find class profile data for an IL offset, and return the most likely classes
+// getLikelyClassesOrMethods:
+//   Find class/method profile data for an IL offset, and return the most
+//   likely classes/methods.
 //
-// Arguments:
-//    pLikelyClasses - [OUT] array of likely classes sorted by likelihood (descending). It must be
-//                     at least of 'maxLikelyClasses' (next argument) length.
-//                     The array consists of pairs "clsHandle - likelihood" ordered by likelihood
-//                     (descending) where likelihood can be any value in [0..100] range. clsHandle
-//                     is never null for [0..<return value of this function>) range, Items in
-//                     [<return value of this function>..maxLikelyClasses) are zeroed if the number
-//                     of classes seen is less than maxLikelyClasses provided.
-//    maxLikelyClasses - limit for likely classes to output
-//    schema - profile schema
-//    countSchemaItems - number of items in the schema
-//    pInstrumentationData - associated data
-//    ilOffset - il offset of the callvirt
+//   This is a common entrypoint for getLikelyClasses and getLikelyMethods.
+//   See documentation for those for more information.
 //
-// Returns:
-//    Estimated number of classes seen at runtime
-//
-// Notes:
-//    A "monomorphic" call site will return likelihood 100 and number of entries = 1.
-//
-//   This is used by the devirtualization logic below, and by crossgen2 when producing
-//   the R2R image (to reduce the sizecost of carrying the type histogram)
-//
-//   This code can runs without a jit instance present, so JITDUMP and related
-//   cannot be used.
-//
-extern "C" DLLEXPORT UINT32 WINAPI getLikelyClasses(LikelyClassRecord*                     pLikelyClasses,
-                                                    UINT32                                 maxLikelyClasses,
-                                                    ICorJitInfo::PgoInstrumentationSchema* schema,
-                                                    UINT32                                 countSchemaItems,
-                                                    BYTE*                                  pInstrumentationData,
-                                                    int32_t                                ilOffset)
+static unsigned getLikelyClassesOrMethods(LikelyClassMethodRecord*               pLikelyEntries,
+                                          UINT32                                 maxLikelyClasses,
+                                          ICorJitInfo::PgoInstrumentationSchema* schema,
+                                          UINT32                                 countSchemaItems,
+                                          BYTE*                                  pInstrumentationData,
+                                          int32_t                                ilOffset,
+                                          bool                                   types)
 {
-    ZeroMemory(pLikelyClasses, maxLikelyClasses * sizeof(*pLikelyClasses));
+    ICorJitInfo::PgoInstrumentationKind histogramKind =
+        types ? ICorJitInfo::PgoInstrumentationKind::HandleHistogramTypes
+              : ICorJitInfo::PgoInstrumentationKind::HandleHistogramMethods;
+    ICorJitInfo::PgoInstrumentationKind compressedKind = types ? ICorJitInfo::PgoInstrumentationKind::GetLikelyClass
+                                                               : ICorJitInfo::PgoInstrumentationKind::GetLikelyMethod;
+
+    memset(pLikelyEntries, 0, maxLikelyClasses * sizeof(*pLikelyEntries));
 
     if (schema == nullptr)
     {
@@ -153,17 +139,16 @@ extern "C" DLLEXPORT UINT32 WINAPI getLikelyClasses(LikelyClassRecord*
         if (schema[i].ILOffset != ilOffset)
             continue;
 
-        if ((schema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::GetLikelyClass) &&
-            (schema[i].Count == 1))
+        if ((schema[i].InstrumentationKind == compressedKind) && (schema[i].Count == 1))
         {
-            INT_PTR result = *(INT_PTR*)(pInstrumentationData + schema[i].Offset);
+            intptr_t result = *(intptr_t*)(pInstrumentationData + schema[i].Offset);
             if (ICorJitInfo::IsUnknownHandle(result))
             {
                 return 0;
             }
-            assert(result != 0); // we don't expect zero in GetLikelyClass
-            pLikelyClasses[0].likelihood = (UINT32)(schema[i].Other & 0xFF);
-            pLikelyClasses[0].clsHandle  = (CORINFO_CLASS_HANDLE)result;
+            assert(result != 0); // we don't expect zero in GetLikelyClass/GetLikelyMethod
+            pLikelyEntries[0].likelihood = (UINT32)(schema[i].Other & 0xFF);
+            pLikelyEntries[0].handle     = result;
             return 1;
         }
 
@@ -172,11 +157,11 @@ extern "C" DLLEXPORT UINT32 WINAPI getLikelyClasses(LikelyClassRecord*
             (schema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::HandleHistogramLongCount);
 
         if (isHistogramCount && (schema[i].Count == 1) && ((i + 1) < countSchemaItems) &&
-            (schema[i + 1].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::HandleHistogramTypes))
+            (schema[i + 1].InstrumentationKind == histogramKind))
         {
             // Form a histogram
             //
-            LikelyClassHistogram h((INT_PTR*)(pInstrumentationData + schema[i + 1].Offset), schema[i + 1].Count);
+            LikelyClassMethodHistogram h((INT_PTR*)(pInstrumentationData + schema[i + 1].Offset), schema[i + 1].Count);
 
             // Use histogram count as number of classes estimate
             // Report back what we've learned
@@ -189,45 +174,45 @@ extern "C" DLLEXPORT UINT32 WINAPI getLikelyClasses(LikelyClassRecord*
 
                 case 1:
                 {
-                    LikelyClassHistogramEntry const hist0 = h.HistogramEntryAt(0);
+                    LikelyClassMethodHistogramEntry const hist0 = h.HistogramEntryAt(0);
                     // Fast path for monomorphic cases
-                    if (ICorJitInfo::IsUnknownHandle(hist0.m_mt))
+                    if (ICorJitInfo::IsUnknownHandle(hist0.m_handle))
                     {
                         return 0;
                     }
-                    pLikelyClasses[0].likelihood = 100;
-                    pLikelyClasses[0].clsHandle  = (CORINFO_CLASS_HANDLE)hist0.m_mt;
+                    pLikelyEntries[0].likelihood = 100;
+                    pLikelyEntries[0].handle     = hist0.m_handle;
                     return 1;
                 }
 
                 case 2:
                 {
-                    LikelyClassHistogramEntry const hist0 = h.HistogramEntryAt(0);
-                    LikelyClassHistogramEntry const hist1 = h.HistogramEntryAt(1);
                     // Fast path for two classes
-                    if ((hist0.m_count >= hist1.m_count) && !ICorJitInfo::IsUnknownHandle(hist0.m_mt))
+                    LikelyClassMethodHistogramEntry const hist0 = h.HistogramEntryAt(0);
+                    LikelyClassMethodHistogramEntry const hist1 = h.HistogramEntryAt(1);
+                    if ((hist0.m_count >= hist1.m_count) && !ICorJitInfo::IsUnknownHandle(hist0.m_handle))
                     {
-                        pLikelyClasses[0].likelihood = (100 * hist0.m_count) / h.m_totalCount;
-                        pLikelyClasses[0].clsHandle  = (CORINFO_CLASS_HANDLE)hist0.m_mt;
+                        pLikelyEntries[0].likelihood = (100 * hist0.m_count) / h.m_totalCount;
+                        pLikelyEntries[0].handle     = hist0.m_handle;
 
-                        if ((maxLikelyClasses > 1) && !ICorJitInfo::IsUnknownHandle(hist1.m_mt))
+                        if ((maxLikelyClasses > 1) && !ICorJitInfo::IsUnknownHandle(hist1.m_handle))
                         {
-                            pLikelyClasses[1].likelihood = (100 * hist1.m_count) / h.m_totalCount;
-                            pLikelyClasses[1].clsHandle  = (CORINFO_CLASS_HANDLE)hist1.m_mt;
+                            pLikelyEntries[1].likelihood = (100 * hist1.m_count) / h.m_totalCount;
+                            pLikelyEntries[1].handle     = hist1.m_handle;
                             return 2;
                         }
                         return 1;
                     }
 
-                    if (!ICorJitInfo::IsUnknownHandle(hist1.m_mt))
+                    if (!ICorJitInfo::IsUnknownHandle(hist1.m_handle))
                     {
-                        pLikelyClasses[0].likelihood = (100 * hist1.m_count) / h.m_totalCount;
-                        pLikelyClasses[0].clsHandle  = (CORINFO_CLASS_HANDLE)hist1.m_mt;
+                        pLikelyEntries[0].likelihood = (100 * hist1.m_count) / h.m_totalCount;
+                        pLikelyEntries[0].handle     = hist1.m_handle;
 
-                        if ((maxLikelyClasses > 1) && !ICorJitInfo::IsUnknownHandle(hist0.m_mt))
+                        if ((maxLikelyClasses > 1) && !ICorJitInfo::IsUnknownHandle(hist0.m_handle))
                         {
-                            pLikelyClasses[1].likelihood = (100 * hist0.m_count) / h.m_totalCount;
-                            pLikelyClasses[1].clsHandle  = (CORINFO_CLASS_HANDLE)hist0.m_mt;
+                            pLikelyEntries[1].likelihood = (100 * hist0.m_count) / h.m_totalCount;
+                            pLikelyEntries[1].handle     = hist0.m_handle;
                             return 2;
                         }
                         return 1;
@@ -237,14 +222,14 @@ extern "C" DLLEXPORT UINT32 WINAPI getLikelyClasses(LikelyClassRecord*
 
                 default:
                 {
-                    LikelyClassHistogramEntry sortedEntries[HISTOGRAM_MAX_SIZE_COUNT];
+                    LikelyClassMethodHistogramEntry sortedEntries[HISTOGRAM_MAX_SIZE_COUNT];
 
                     // Since this method can be invoked without a jit instance we can't use any existing allocators
                     unsigned knownHandles = 0;
                     for (unsigned m = 0; m < h.countHistogramElements; m++)
                     {
-                        LikelyClassHistogramEntry const hist = h.HistogramEntryAt(m);
-                        if (!ICorJitInfo::IsUnknownHandle(hist.m_mt))
+                        LikelyClassMethodHistogramEntry const hist = h.HistogramEntryAt(m);
+                        if (!ICorJitInfo::IsUnknownHandle(hist.m_handle))
                         {
                             sortedEntries[knownHandles++] = hist;
                         }
@@ -252,7 +237,8 @@ extern "C" DLLEXPORT UINT32 WINAPI getLikelyClasses(LikelyClassRecord*
 
                     // sort by m_count (descending)
                     jitstd::sort(sortedEntries, sortedEntries + knownHandles,
-                                 [](const LikelyClassHistogramEntry& h1, const LikelyClassHistogramEntry& h2) -> bool {
+                                 [](const LikelyClassMethodHistogramEntry& h1,
+                                    const LikelyClassMethodHistogramEntry& h2) -> bool {
                                      return h1.m_count > h2.m_count;
                                  });
 
@@ -260,9 +246,9 @@ extern "C" DLLEXPORT UINT32 WINAPI getLikelyClasses(LikelyClassRecord*
 
                     for (size_t hIdx = 0; hIdx < numberOfClasses; hIdx++)
                     {
-                        LikelyClassHistogramEntry const hc = sortedEntries[hIdx];
-                        pLikelyClasses[hIdx].clsHandle     = (CORINFO_CLASS_HANDLE)hc.m_mt;
-                        pLikelyClasses[hIdx].likelihood    = hc.m_count * 100 / h.m_totalCount;
+                        LikelyClassMethodHistogramEntry const hc = sortedEntries[hIdx];
+                        pLikelyEntries[hIdx].handle              = hc.m_handle;
+                        pLikelyEntries[hIdx].likelihood          = hc.m_count * 100 / h.m_totalCount;
                     }
                     return numberOfClasses;
                 }
@@ -276,8 +262,64 @@ extern "C" DLLEXPORT UINT32 WINAPI getLikelyClasses(LikelyClassRecord*
 }
 
 //------------------------------------------------------------------------
-// getRandomClass: find class profile data for an IL offset, and return
-//   one of the possible classes at random
+// getLikelyClasses: find class profile data for an IL offset, and return the most likely classes
+//
+// Arguments:
+//    pLikelyClasses - [OUT] array of likely classes sorted by likelihood (descending). It must be
+//                     at least of 'maxLikelyClasses' (next argument) length.
+//                     The array consists of pairs "clsHandle - likelihood" ordered by likelihood
+//                     (descending) where likelihood can be any value in [0..100] range. clsHandle
+//                     is never null for [0..<return value of this function>) range, Items in
+//                     [<return value of this function>..maxLikelyClasses) are zeroed if the number
+//                     of classes seen is less than maxLikelyClasses provided.
+//    maxLikelyClasses - limit for likely classes to output
+//    schema - profile schema
+//    countSchemaItems - number of items in the schema
+//    pInstrumentationData - associated data
+//    ilOffset - il offset of the callvirt
+//
+// Returns:
+//    Estimated number of classes seen at runtime
+//
+// Notes:
+//    A "monomorphic" call site will return likelihood 100 and number of entries = 1.
+//
+//   This is used by the devirtualization logic below, and by crossgen2 when producing
+//   the R2R image (to reduce the sizecost of carrying the type histogram)
+//
+//   This code can runs without a jit instance present, so JITDUMP and related
+//   cannot be used.
+//
+extern "C" DLLEXPORT UINT32 WINAPI getLikelyClasses(LikelyClassMethodRecord*               pLikelyClasses,
+                                                    UINT32                                 maxLikelyClasses,
+                                                    ICorJitInfo::PgoInstrumentationSchema* schema,
+                                                    UINT32                                 countSchemaItems,
+                                                    BYTE*                                  pInstrumentationData,
+                                                    int32_t                                ilOffset)
+{
+    return getLikelyClassesOrMethods(pLikelyClasses, maxLikelyClasses, schema, countSchemaItems, pInstrumentationData,
+                                     ilOffset, true);
+}
+
+//------------------------------------------------------------------------
+// getLikelyMethods: find method profile data for an IL offset, and return the most likely methods
+//
+// See documentation on getLikelyClasses above.
+//
+extern "C" DLLEXPORT UINT32 WINAPI getLikelyMethods(LikelyClassMethodRecord*               pLikelyMethods,
+                                                    UINT32                                 maxLikelyMethods,
+                                                    ICorJitInfo::PgoInstrumentationSchema* schema,
+                                                    UINT32                                 countSchemaItems,
+                                                    BYTE*                                  pInstrumentationData,
+                                                    int32_t                                ilOffset)
+{
+    return getLikelyClassesOrMethods(pLikelyMethods, maxLikelyMethods, schema, countSchemaItems, pInstrumentationData,
+                                     ilOffset, false);
+}
+
+//------------------------------------------------------------------------
+// getRandomGDV: find GDV profile data for an IL offset, and return
+//   one of the possible methods/classes at random
 //
 // Arguments:
 //    schema - profile schema
@@ -289,17 +331,48 @@ extern "C" DLLEXPORT UINT32 WINAPI getLikelyClasses(LikelyClassRecord*
 // Returns:
 //    Randomly observed class, or nullptr.
 //
-CORINFO_CLASS_HANDLE Compiler::getRandomClass(ICorJitInfo::PgoInstrumentationSchema* schema,
-                                              UINT32                                 countSchemaItems,
-                                              BYTE*                                  pInstrumentationData,
-                                              int32_t                                ilOffset,
-                                              CLRRandom*                             random)
+void Compiler::getRandomGDV(ICorJitInfo::PgoInstrumentationSchema* schema,
+                            UINT32                                 countSchemaItems,
+                            BYTE*                                  pInstrumentationData,
+                            int32_t                                ilOffset,
+                            CLRRandom*                             random,
+                            CORINFO_CLASS_HANDLE*                  classGuess,
+                            CORINFO_METHOD_HANDLE*                 methodGuess)
 {
+    *classGuess  = NO_CLASS_HANDLE;
+    *methodGuess = NO_METHOD_HANDLE;
+
     if (schema == nullptr)
     {
-        return NO_CLASS_HANDLE;
+        return;
     }
 
+    // We can have multiple histograms for the same IL offset. Use reservoir
+    // sampling to pick an entry at random.
+    int  numElementsSeen = 0;
+    auto addElement      = [random, classGuess, methodGuess, &numElementsSeen](intptr_t handle, bool isClass) {
+        if (ICorJitInfo::IsUnknownHandle(handle))
+        {
+            return;
+        }
+
+        numElementsSeen++;
+        bool replace = (numElementsSeen == 1) || (random->Next(numElementsSeen) == 0);
+        if (replace)
+        {
+            if (isClass)
+            {
+                *classGuess  = (CORINFO_CLASS_HANDLE)handle;
+                *methodGuess = NO_METHOD_HANDLE;
+            }
+            else
+            {
+                *classGuess  = NO_CLASS_HANDLE;
+                *methodGuess = (CORINFO_METHOD_HANDLE)handle;
+            }
+        }
+    };
+
     for (COUNT_T i = 0; i < countSchemaItems; i++)
     {
         if (schema[i].ILOffset != (int32_t)ilOffset)
@@ -307,18 +380,13 @@ CORINFO_CLASS_HANDLE Compiler::getRandomClass(ICorJitInfo::PgoInstrumentationSch
             continue;
         }
 
-        if ((schema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::GetLikelyClass) &&
+        if (((schema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::GetLikelyClass) ||
+             (schema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::GetLikelyMethod)) &&
             (schema[i].Count == 1))
         {
             INT_PTR result = *(INT_PTR*)(pInstrumentationData + schema[i].Offset);
-            if (ICorJitInfo::IsUnknownHandle(result))
-            {
-                return NO_CLASS_HANDLE;
-            }
-            else
-            {
-                return (CORINFO_CLASS_HANDLE)result;
-            }
+            addElement(result, true);
+            continue;
         }
 
         bool isHistogramCount =
@@ -326,30 +394,21 @@ CORINFO_CLASS_HANDLE Compiler::getRandomClass(ICorJitInfo::PgoInstrumentationSch
             (schema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::HandleHistogramLongCount);
 
         if (isHistogramCount && (schema[i].Count == 1) && ((i + 1) < countSchemaItems) &&
-            (schema[i + 1].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::HandleHistogramTypes))
+            ((schema[i + 1].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::HandleHistogramTypes) ||
+             (schema[i + 1].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::HandleHistogramMethods)))
         {
-            // Form a histogram
+            // Form a histogram. Note that even though we use reservoir
+            // sampling we want to weigh distinct handles equally, regardless
+            // of count.
             //
-            LikelyClassHistogram h((INT_PTR*)(pInstrumentationData + schema[i + 1].Offset), schema[i + 1].Count);
+            LikelyClassMethodHistogram h((INT_PTR*)(pInstrumentationData + schema[i + 1].Offset), schema[i + 1].Count);
 
-            if (h.countHistogramElements == 0)
+            bool isClass =
+                schema[i + 1].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::HandleHistogramTypes;
+            for (UINT32 i = 0; i < h.countHistogramElements; i++)
             {
-                return NO_CLASS_HANDLE;
+                addElement(h.HistogramEntryAt(i).m_handle, isClass);
             }
-
-            // Choose an entry at random.
-            //
-            unsigned                  randomEntryIndex = random->Next(0, h.countHistogramElements);
-            LikelyClassHistogramEntry randomEntry      = h.HistogramEntryAt(randomEntryIndex);
-
-            if (ICorJitInfo::IsUnknownHandle(randomEntry.m_mt))
-            {
-                return NO_CLASS_HANDLE;
-            }
-
-            return (CORINFO_CLASS_HANDLE)randomEntry.m_mt;
         }
     }
-
-    return NO_CLASS_HANDLE;
 }
index 75dd526..6315a86 100644 (file)
@@ -6724,9 +6724,11 @@ GenTree* Compiler::fgMorphPotentialTailCall(GenTreeCall* call)
                                              (call->gtCallType == CT_USER_FUNC) ? call->gtCallMethHnd : nullptr,
                                              call->IsTailPrefixedCall(), tailCallResult, nullptr);
 
-    // Are we currently planning to expand the gtControlExpr as an early virtual call target?
+    // Do some profitability checks for whether we should expand a vtable call
+    // target early. Note that we may already have expanded it due to GDV at
+    // this point, so make sure we do not undo that work.
     //
-    if (call->IsExpandedEarly() && call->IsVirtualVtable())
+    if (call->IsExpandedEarly() && call->IsVirtualVtable() && (call->gtControlExpr == nullptr))
     {
         assert(call->gtArgs.HasThisPointer());
         // It isn't alway profitable to expand a virtual call early
@@ -8497,18 +8499,18 @@ GenTree* Compiler::fgMorphCall(GenTreeCall* call)
     //
     if (call->IsExpandedEarly() && call->IsVirtualVtable())
     {
-        // We only expand the Vtable Call target once in the global morph phase
-        if (fgGlobalMorph)
+        // We expand the Vtable Call target either in the global morph phase or
+        // in guarded devirt if we need it for the guard.
+        if (fgGlobalMorph && (call->gtControlExpr == nullptr))
         {
-            assert(call->gtControlExpr == nullptr); // We only call this method and assign gtControlExpr once
             call->gtControlExpr = fgExpandVirtualVtableCallTarget(call);
         }
         // We always have to morph or re-morph the control expr
         //
         call->gtControlExpr = fgMorphTree(call->gtControlExpr);
 
-        // Propagate any gtFlags into the call
-        call->gtFlags |= call->gtControlExpr->gtFlags;
+        // Propagate any side effect flags into the call
+        call->gtFlags |= call->gtControlExpr->gtFlags & GTF_ALL_EFFECT;
     }
 
     // Morph stelem.ref helper call to store a null value, into a store into an array without the helper.
index d1478c5..bf7560d 100644 (file)
@@ -78,7 +78,7 @@ public:
                 // If we're instrumenting, we should not have decided to
                 // put class probes here, as that is driven by looking at IL.
                 //
-                assert((block->bbFlags & BBF_HAS_CLASS_PROFILE) == 0);
+                assert((block->bbFlags & BBF_HAS_HISTOGRAM_PROFILE) == 0);
 
                 // Clear the partial comp flag.
                 //
index 30d8af4..d9671ef 100644 (file)
@@ -15,7 +15,7 @@ namespace Internal.Runtime
         public const uint Signature = 0x00525452; // 'RTR'
 
         public const ushort CurrentMajorVersion = 6;
-        public const ushort CurrentMinorVersion = 1;
+        public const ushort CurrentMinorVersion = 2;
     }
 
 #pragma warning disable 0169
index b6d7649..3bb774e 100644 (file)
@@ -286,9 +286,14 @@ namespace Internal.JitInterface
         CORINFO_HELP_STACK_PROBE,               // Probes each page of the allocated stack frame
 
         CORINFO_HELP_PATCHPOINT,                // Notify runtime that code has reached a patchpoint
+        CORINFO_HELP_PARTIAL_COMPILATION_PATCHPOINT,  // Notify runtime that code has reached a part of the method that wasn't originally jitted.
+
         CORINFO_HELP_CLASSPROFILE32,            // Update 32-bit class profile for a call site
         CORINFO_HELP_CLASSPROFILE64,            // Update 64-bit class profile for a call site
-        CORINFO_HELP_PARTIAL_COMPILATION_PATCHPOINT,  // Notify runtime that code has reached a part of the method that wasn't originally jitted.
+        CORINFO_HELP_DELEGATEPROFILE32,         // Update 32-bit method profile for a delegate call site
+        CORINFO_HELP_DELEGATEPROFILE64,         // Update 64-bit method profile for a delegate call site
+        CORINFO_HELP_VTABLEPROFILE32,           // Update 32-bit method profile for a vtable call site
+        CORINFO_HELP_VTABLEPROFILE64,           // Update 64-bit method profile for a vtable call site
 
         CORINFO_HELP_VALIDATE_INDIRECT_CALL,    // CFG: Validate function pointer
         CORINFO_HELP_DISPATCH_INDIRECT_CALL,    // CFG: Validate and dispatch to pointer
index 3c1eb89..27dbbfb 100644 (file)
@@ -101,20 +101,23 @@ namespace Internal.JitInterface
             private static readonly IntPtr s_jit;
         }
 
-        private struct LikelyClassRecord
+        private struct LikelyClassMethodRecord
         {
-            public IntPtr clsHandle;
+            public IntPtr handle;
             public uint likelihood;
 
-            public LikelyClassRecord(IntPtr clsHandle, uint likelihood)
+            public LikelyClassMethodRecord(IntPtr handle, uint likelihood)
             {
-                this.clsHandle = clsHandle;
+                this.handle = handle;
                 this.likelihood = likelihood;
             }
         }
 
         [DllImport(JitLibrary)]
-        private extern static uint getLikelyClasses(LikelyClassRecord* pLikelyClasses, uint maxLikelyClasses, PgoInstrumentationSchema* schema, uint countSchemaItems, byte*pInstrumentationData, int ilOffset);
+        private extern static uint getLikelyClasses(LikelyClassMethodRecord* pLikelyClasses, uint maxLikelyClasses, PgoInstrumentationSchema* schema, uint countSchemaItems, byte*pInstrumentationData, int ilOffset);
+
+        [DllImport(JitLibrary)]
+        private extern static uint getLikelyMethods(LikelyClassMethodRecord* pLikelyMethods, uint maxLikelyMethods, PgoInstrumentationSchema* schema, uint countSchemaItems, byte*pInstrumentationData, int ilOffset);
 
         [DllImport(JitSupportLibrary)]
         private extern static IntPtr GetJitHost(IntPtr configProvider);
@@ -192,17 +195,18 @@ namespace Internal.JitInterface
 
         public static IEnumerable<PgoSchemaElem> ConvertTypeHandleHistogramsToCompactTypeHistogramFormat(PgoSchemaElem[] pgoData, CompilationModuleGroup compilationModuleGroup)
         {
-            bool hasTypeHistogram = false;
+            bool hasHistogram = false;
             foreach (var elem in pgoData)
             {
-                if (elem.InstrumentationKind == PgoInstrumentationKind.HandleHistogramTypes)
+                if (elem.InstrumentationKind == PgoInstrumentationKind.HandleHistogramTypes ||
+                    elem.InstrumentationKind == PgoInstrumentationKind.HandleHistogramMethods)
                 {
                     // found histogram
-                    hasTypeHistogram = true;
+                    hasHistogram = true;
                     break;
                 }
             }
-            if (!hasTypeHistogram)
+            if (!hasHistogram)
             {
                 foreach (var elem in pgoData)
                 {
@@ -222,9 +226,10 @@ namespace Internal.JitInterface
                     if ((i + 1 < pgoData.Length) &&
                         (pgoData[i].InstrumentationKind == PgoInstrumentationKind.HandleHistogramIntCount ||
                          pgoData[i].InstrumentationKind == PgoInstrumentationKind.HandleHistogramLongCount) &&
-                        (pgoData[i + 1].InstrumentationKind == PgoInstrumentationKind.HandleHistogramTypes))
+                        (pgoData[i + 1].InstrumentationKind == PgoInstrumentationKind.HandleHistogramTypes ||
+                         pgoData[i + 1].InstrumentationKind == PgoInstrumentationKind.HandleHistogramMethods))
                     {
-                        PgoSchemaElem? newElem = ComputeLikelyClass(i, handleToObject, nativeSchema, instrumentationData, compilationModuleGroup);
+                        PgoSchemaElem? newElem = ComputeLikelyClassMethod(i, handleToObject, nativeSchema, instrumentationData, compilationModuleGroup);
                         if (newElem.HasValue)
                         {
                             yield return newElem.Value;
@@ -249,33 +254,63 @@ namespace Internal.JitInterface
             }
         }
 
-        private static PgoSchemaElem? ComputeLikelyClass(int index, Dictionary<IntPtr, object> handleToObject, PgoInstrumentationSchema[] nativeSchema, byte[] instrumentationData, CompilationModuleGroup compilationModuleGroup)
+        private static PgoSchemaElem? ComputeLikelyClassMethod(int index, Dictionary<IntPtr, object> handleToObject, PgoInstrumentationSchema[] nativeSchema, byte[] instrumentationData, CompilationModuleGroup compilationModuleGroup)
         {
             // getLikelyClasses will use two entries from the native schema table. There must be at least two present to avoid overruning the buffer
             if (index > (nativeSchema.Length - 2))
                 return null;
 
+            bool isType = nativeSchema[index + 1].InstrumentationKind == PgoInstrumentationKind.HandleHistogramTypes;
+
             fixed(PgoInstrumentationSchema* pSchema = &nativeSchema[index])
             {
                 fixed(byte* pInstrumentationData = &instrumentationData[0])
                 {
-                    // We're going to store only the most popular type to reduce size of the profile
-                    LikelyClassRecord* likelyClasses = stackalloc LikelyClassRecord[1];
-                    uint numberOfClasses = getLikelyClasses(likelyClasses, 1, pSchema, 2, pInstrumentationData, nativeSchema[index].ILOffset);
+                    // We're going to store only the most popular type/method to reduce size of the profile
+                    LikelyClassMethodRecord* likelyClassMethods = stackalloc LikelyClassMethodRecord[1];
+                    uint numberOfRecords;
+                    if (isType)
+                    {
+                        numberOfRecords = getLikelyClasses(likelyClassMethods, 1, pSchema, 2, pInstrumentationData, nativeSchema[index].ILOffset);
+                    }
+                    else
+                    {
+                        numberOfRecords = getLikelyMethods(likelyClassMethods, 1, pSchema, 2, pInstrumentationData, nativeSchema[index].ILOffset);
+                    }
 
-                    if (numberOfClasses > 0)
+                    if (numberOfRecords > 0)
                     {
-                        TypeDesc type = (TypeDesc)handleToObject[likelyClasses->clsHandle];
+                        TypeSystemEntityOrUnknown[] newData = null;
+                        if (isType)
+                        {
+                            TypeDesc type = (TypeDesc)handleToObject[likelyClassMethods->handle];
+#if READYTORUN
+                            if (compilationModuleGroup.VersionsWithType(type))
+#endif
+                            {
+                                newData = new[] { new TypeSystemEntityOrUnknown(type) };
+                            }
+                        }
+                        else
+                        {
+                            MethodDesc method = (MethodDesc)handleToObject[likelyClassMethods->handle];
+
 #if READYTORUN
-                        if (compilationModuleGroup.VersionsWithType(type))
+                            if (compilationModuleGroup.VersionsWithMethodBody(method))
 #endif
+                            {
+                                newData = new[] { new TypeSystemEntityOrUnknown(method) };
+                            }
+                        }
+
+                        if (newData != null)
                         {
                             PgoSchemaElem likelyClassElem = new PgoSchemaElem();
-                            likelyClassElem.InstrumentationKind = PgoInstrumentationKind.GetLikelyClass;
+                            likelyClassElem.InstrumentationKind = isType ? PgoInstrumentationKind.GetLikelyClass : PgoInstrumentationKind.GetLikelyMethod;
                             likelyClassElem.ILOffset = nativeSchema[index].ILOffset;
                             likelyClassElem.Count = 1;
-                            likelyClassElem.Other = (int)(likelyClasses->likelihood | (numberOfClasses << 8));
-                            likelyClassElem.DataObject = new TypeSystemEntityOrUnknown[] { new TypeSystemEntityOrUnknown(type) };
+                            likelyClassElem.Other = (int)(likelyClassMethods->likelihood | (numberOfRecords << 8));
+                            likelyClassElem.DataObject = newData;
                             return likelyClassElem;
                         }
                     }
index a4e2328..5dc847b 100644 (file)
@@ -49,6 +49,7 @@ namespace Internal.Pgo
         EdgeIntCount = (DescriptorMin * 6) | FourByte, // edge counter using unsigned 4 byte int
         EdgeLongCount = (DescriptorMin * 6) | EightByte, // edge counter using unsigned 8 byte int
         GetLikelyClass = (DescriptorMin * 7) | TypeHandle, // Compressed get likely class data
+        GetLikelyMethod = (DescriptorMin * 7) | MethodHandle, // Compressed get likely method data
     }
 
     public interface IPgoSchemaDataLoader<TType, TMethod>
index 1cfbc59..2ff9b3c 100644 (file)
@@ -93,8 +93,9 @@ void DumpMap(int index, MethodContext* mc)
     bool hasClassProfile = false;
     bool hasMethodProfile = false;
     bool hasLikelyClass = false;
+    bool hasLikelyMethod = false;
     ICorJitInfo::PgoSource pgoSource = ICorJitInfo::PgoSource::Unknown;
-    if (mc->hasPgoData(hasEdgeProfile, hasClassProfile, hasMethodProfile, hasLikelyClass, pgoSource))
+    if (mc->hasPgoData(hasEdgeProfile, hasClassProfile, hasMethodProfile, hasLikelyClass, hasLikelyMethod, pgoSource))
     {
         rawFlags |= 1ULL << (EXTRA_JIT_FLAGS::HAS_PGO);
 
@@ -118,6 +119,11 @@ void DumpMap(int index, MethodContext* mc)
             rawFlags |= 1ULL << (EXTRA_JIT_FLAGS::HAS_LIKELY_CLASS);
         }
 
+        if (hasLikelyMethod)
+        {
+            rawFlags |= 1ULL << (EXTRA_JIT_FLAGS::HAS_LIKELY_METHOD);
+        }
+
         if (pgoSource == ICorJitInfo::PgoSource::Static)
         {
             rawFlags |= 1ULL << (EXTRA_JIT_FLAGS::HAS_STATIC_PROFILE);
index a3e31a1..33190d3 100644 (file)
@@ -31,8 +31,9 @@ int verbJitFlags::DoWork(const char* nameOfInput)
         bool hasClassProfile = false;
         bool hasMethodProfile = false;
         bool hasLikelyClass = false;
+        bool hasLikelyMethod = false;
         ICorJitInfo::PgoSource pgoSource = ICorJitInfo::PgoSource::Unknown;
-        if (mc->hasPgoData(hasEdgeProfile, hasClassProfile, hasMethodProfile, hasLikelyClass, pgoSource))
+        if (mc->hasPgoData(hasEdgeProfile, hasClassProfile, hasMethodProfile, hasLikelyClass, hasLikelyMethod, pgoSource))
         {
             rawFlags |= 1ULL << (EXTRA_JIT_FLAGS::HAS_PGO);
 
@@ -56,6 +57,11 @@ int verbJitFlags::DoWork(const char* nameOfInput)
                 rawFlags |= 1ULL << (EXTRA_JIT_FLAGS::HAS_LIKELY_CLASS);
             }
 
+            if (hasLikelyMethod)
+            {
+                rawFlags |= 1ULL << (EXTRA_JIT_FLAGS::HAS_LIKELY_METHOD);
+            }
+
             if (pgoSource == ICorJitInfo::PgoSource::Static)
             {
                 rawFlags |= 1ULL << (EXTRA_JIT_FLAGS::HAS_STATIC_PROFILE);
index 97be9fb..df50466 100644 (file)
@@ -5607,9 +5607,10 @@ void MethodContext::dmpGetPgoInstrumentationResults(DWORDLONG key, const Agnosti
                     }
                     break;
                 case ICorJitInfo::PgoInstrumentationKind::GetLikelyClass:
+                case ICorJitInfo::PgoInstrumentationKind::GetLikelyMethod:
                     {
-                        // (N)umber, (L)ikelihood, (C)lass
-                        printf("N %u L %u C %016llX", (unsigned)(pBuf[i].Other >> 8), (unsigned)(pBuf[i].Other && 0xFF), CastHandle(*(uintptr_t*)(pInstrumentationData + pBuf[i].Offset)));
+                        // (N)umber, (L)ikelihood, (H)andle
+                        printf("N %u L %u H %016llX", (unsigned)(pBuf[i].Other >> 8), (unsigned)(pBuf[i].Other && 0xFF), CastHandle(*(uintptr_t*)(pInstrumentationData + pBuf[i].Offset)));
                     }
                     break;
                 default:
@@ -7072,12 +7073,13 @@ int MethodContext::dumpMD5HashToBuffer(BYTE* pBuffer, int bufLen, char* hash, in
     return m_hash.HashBuffer(pBuffer, bufLen, hash, hashLen);
 }
 
-bool MethodContext::hasPgoData(bool& hasEdgeProfile, bool& hasClassProfile, bool& hasMethodProfile, bool& hasLikelyClass, ICorJitInfo::PgoSource& pgoSource)
+bool MethodContext::hasPgoData(bool& hasEdgeProfile, bool& hasClassProfile, bool& hasMethodProfile, bool& hasLikelyClass, bool& hasLikelyMethod, ICorJitInfo::PgoSource& pgoSource)
 {
     hasEdgeProfile = false;
     hasClassProfile = false;
     hasMethodProfile = false;
     hasLikelyClass = false;
+    hasLikelyMethod = false;
 
     // Obtain the Method Info structure for this method
     CORINFO_METHOD_INFO  info;
@@ -7102,8 +7104,9 @@ bool MethodContext::hasPgoData(bool& hasEdgeProfile, bool& hasClassProfile, bool
                 hasClassProfile |= (schema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::HandleHistogramTypes);
                 hasMethodProfile |= (schema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::HandleHistogramMethods);
                 hasLikelyClass |= (schema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::GetLikelyClass);
+                hasLikelyMethod |= (schema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::GetLikelyMethod);
 
-                if (hasEdgeProfile && hasClassProfile && hasLikelyClass)
+                if (hasEdgeProfile && hasClassProfile && hasLikelyClass && hasLikelyMethod)
                 {
                     break;
                 }
index 816682b..5009d6b 100644 (file)
@@ -54,6 +54,7 @@ enum EXTRA_JIT_FLAGS
     HAS_STATIC_PROFILE = 59,
     HAS_DYNAMIC_PROFILE = 58,
     HAS_METHOD_PROFILE = 57,
+    HAS_LIKELY_METHOD = 56,
 };
 
 // Asserts to catch changes in corjit flags definitions.
@@ -64,6 +65,8 @@ static_assert((int)EXTRA_JIT_FLAGS::HAS_CLASS_PROFILE == (int)CORJIT_FLAGS::CorJ
 static_assert((int)EXTRA_JIT_FLAGS::HAS_LIKELY_CLASS == (int)CORJIT_FLAGS::CorJitFlag::CORJIT_FLAG_UNUSED33, "Jit Flags Mismatch");
 static_assert((int)EXTRA_JIT_FLAGS::HAS_STATIC_PROFILE == (int)CORJIT_FLAGS::CorJitFlag::CORJIT_FLAG_UNUSED32, "Jit Flags Mismatch");
 static_assert((int)EXTRA_JIT_FLAGS::HAS_DYNAMIC_PROFILE == (int)CORJIT_FLAGS::CorJitFlag::CORJIT_FLAG_UNUSED31, "Jit Flags Mismatch");
+static_assert((int)EXTRA_JIT_FLAGS::HAS_METHOD_PROFILE == (int)CORJIT_FLAGS::CorJitFlag::CORJIT_FLAG_UNUSED30, "Jit Flags Mismatch");
+static_assert((int)EXTRA_JIT_FLAGS::HAS_LIKELY_METHOD == (int)CORJIT_FLAGS::CorJitFlag::CORJIT_FLAG_UNUSED29, "Jit Flags Mismatch");
 
 class MethodContext
 {
@@ -106,7 +109,7 @@ public:
     int dumpMethodIdentityInfoToBuffer(char* buff, int len, bool ignoreMethodName = false, CORINFO_METHOD_INFO* optInfo = nullptr, unsigned optFlags = 0);
     int dumpMethodMD5HashToBuffer(char* buff, int len, bool ignoreMethodName = false, CORINFO_METHOD_INFO* optInfo = nullptr, unsigned optFlags = 0);
 
-    bool hasPgoData(bool& hasEdgeProfile, bool& hasClassProfile, bool& hasMethodProfile, bool& hasLikelyClass, ICorJitInfo::PgoSource& pgoSource);
+    bool hasPgoData(bool& hasEdgeProfile, bool& hasClassProfile, bool& hasMethodProfile, bool& hasLikelyClass, bool& hasLikelyMethod, ICorJitInfo::PgoSource& pgoSource);
 
     void recGlobalContext(const MethodContext& other);
 
index 1d1d4d5..b51a54b 100644 (file)
@@ -285,6 +285,7 @@ std::string SpmiDumpHelper::DumpJitFlags(unsigned long long flags)
     AddFlagNumeric(HAS_CLASS_PROFILE, EXTRA_JIT_FLAGS::HAS_CLASS_PROFILE);
     AddFlagNumeric(HAS_METHOD_PROFILE, EXTRA_JIT_FLAGS::HAS_METHOD_PROFILE);
     AddFlagNumeric(HAS_LIKELY_CLASS, EXTRA_JIT_FLAGS::HAS_LIKELY_CLASS);
+    AddFlagNumeric(HAS_LIKELY_METHOD, EXTRA_JIT_FLAGS::HAS_LIKELY_METHOD);
     AddFlagNumeric(HAS_STATIC_PROFILE, EXTRA_JIT_FLAGS::HAS_STATIC_PROFILE);
     AddFlagNumeric(HAS_DYNAMIC_PROFILE, EXTRA_JIT_FLAGS::HAS_DYNAMIC_PROFILE);
 
index e1802b6..0e2cbd7 100644 (file)
@@ -5328,7 +5328,7 @@ void JIT_PartialCompilationPatchpoint(int* counter, int ilOffset)
 
 #endif // FEATURE_ON_STACK_REPLACEMENT
 
-static unsigned ClassProfileRand()
+static unsigned HandleHistogramProfileRand()
 {
     // generate a random number (xorshift32)
     //
@@ -5345,7 +5345,43 @@ static unsigned ClassProfileRand()
     return x;
 }
 
-HCIMPL2(void, JIT_ClassProfile32, Object *obj, void* tableAddress)
+template<typename T>
+static int CheckSample(T index)
+{
+    const unsigned S = ICorJitInfo::HandleHistogram32::SIZE;
+    const unsigned N = ICorJitInfo::HandleHistogram32::SAMPLE_INTERVAL;
+    static_assert_no_msg(N >= S);
+    static_assert_no_msg((std::is_same<T, uint32_t>::value || std::is_same<T, uint64_t>::value));
+
+    // If table is not yet full, just add entries in.
+    //
+    if (index < S)
+    {
+        return static_cast<int>(index);
+    }
+
+    unsigned x = HandleHistogramProfileRand();
+    // N is the sampling window size,
+    // it should be larger than the table size.
+    //
+    // If we let N == count then we are building an entire
+    // run sample -- probability of update decreases over time.
+    // Would be a good strategy for an AOT profiler.
+    //
+    // But for TieredPGO we would prefer something that is more
+    // weighted to recent observations.
+    //
+    // For S=4, N=128, we'll sample (on average) every 32nd call.
+    //
+    if ((x % N) >= S)
+    {
+        return -1;
+    }
+
+    return static_cast<int>(x % S);
+}
+
+HCIMPL2(void, JIT_ClassProfile32, Object *obj, ICorJitInfo::HandleHistogram32* classProfile)
 {
     FCALL_CONTRACT;
     FC_GC_POLL_NOT_NEEDED();
@@ -5353,12 +5389,14 @@ HCIMPL2(void, JIT_ClassProfile32, Object *obj, void* tableAddress)
     OBJECTREF objRef = ObjectToOBJECTREF(obj);
     VALIDATEOBJECTREF(objRef);
 
-    ICorJitInfo::ClassProfile32* const classProfile = (ICorJitInfo::ClassProfile32*) tableAddress;
     volatile unsigned* pCount = (volatile unsigned*) &classProfile->Count;
-    const unsigned count = (*pCount)++;
-    const unsigned S = ICorJitInfo::ClassProfile32::SIZE;
-    const unsigned N = ICorJitInfo::ClassProfile32::SAMPLE_INTERVAL;
-    _ASSERTE(N >= S);
+    const unsigned callIndex = (*pCount)++;
+
+    int sampleIndex = CheckSample(callIndex);
+    if (sampleIndex == -1)
+    {
+        return;
+    }
 
     if (objRef == NULL)
     {
@@ -5373,7 +5411,7 @@ HCIMPL2(void, JIT_ClassProfile32, Object *obj, void* tableAddress)
     //
     if (pMT->GetLoaderAllocator()->IsCollectible())
     {
-        pMT = (MethodTable*)DEFAULT_UNKNOWN_TYPEHANDLE;
+        pMT = (MethodTable*)DEFAULT_UNKNOWN_HANDLE;
     }
 
 #ifdef _DEBUG
@@ -5381,39 +5419,159 @@ HCIMPL2(void, JIT_ClassProfile32, Object *obj, void* tableAddress)
     PgoManager::VerifyAddress(classProfile + 1);
 #endif
 
+    classProfile->HandleTable[sampleIndex] = (CORINFO_CLASS_HANDLE)pMT;
+}
+HCIMPLEND
+
+// Version of helper above used when the count is 64-bit
+HCIMPL2(void, JIT_ClassProfile64, Object *obj, ICorJitInfo::HandleHistogram64* classProfile)
+{
+    FCALL_CONTRACT;
+    FC_GC_POLL_NOT_NEEDED();
+
+    OBJECTREF objRef = ObjectToOBJECTREF(obj);
+    VALIDATEOBJECTREF(objRef);
+
+    volatile uint64_t* pCount = (volatile uint64_t*) &classProfile->Count;
+    const uint64_t callIndex = (*pCount)++;
+
+    int sampleIndex = CheckSample(callIndex);
+    if (sampleIndex == -1)
+    {
+        return;
+    }
+
+    if (objRef == NULL)
+    {
+        return;
+    }
+
+    MethodTable* pMT = objRef->GetMethodTable();
+
+    if (pMT->GetLoaderAllocator()->IsCollectible())
+    {
+        pMT = (MethodTable*)DEFAULT_UNKNOWN_HANDLE;
+    }
+
+#ifdef _DEBUG
+    PgoManager::VerifyAddress(classProfile);
+    PgoManager::VerifyAddress(classProfile + 1);
+#endif
+
+    classProfile->HandleTable[sampleIndex] = (CORINFO_CLASS_HANDLE)pMT;
+}
+HCIMPLEND
+
+HCIMPL2(void, JIT_DelegateProfile32, Object *obj, ICorJitInfo::HandleHistogram32* methodProfile)
+{
+    FCALL_CONTRACT;
+    FC_GC_POLL_NOT_NEEDED();
+
+    OBJECTREF objRef = ObjectToOBJECTREF(obj);
+    VALIDATEOBJECTREF(objRef);
+
+    volatile unsigned* pMethodCount = (volatile unsigned*) &methodProfile->Count;
+    const unsigned methodCallIndex = (*pMethodCount)++;
+    int methodSampleIndex = CheckSample(methodCallIndex);
+
+    if (methodSampleIndex == -1)
+    {
+        return;
+    }
+
+    if (objRef == NULL)
+    {
+        return;
+    }
+
+    MethodTable* pMT = objRef->GetMethodTable();
+
+    _ASSERTE(pMT->IsDelegate());
+
+    // Resolve method. We handle only the common "direct" delegate as that is
+    // in any case the only one we can reasonably do GDV for. For instance,
+    // open delegates are filtered out here, and many cases with inner
+    // "complicated" logic as well (e.g. static functions, multicast, unmanaged
+    // functions).
+    //
+    MethodDesc* pRecordedMD = (MethodDesc*)DEFAULT_UNKNOWN_HANDLE;
+    DELEGATEREF del = (DELEGATEREF)objRef;
+    if ((del->GetInvocationCount() == 0) && (del->GetMethodPtrAux() == NULL))
+    {
+        MethodDesc* pMD = NonVirtualEntry2MethodDesc(del->GetMethodPtr());
+        if ((pMD != nullptr) && !pMD->GetLoaderAllocator()->IsCollectible() && !pMD->IsDynamicMethod())
+        {
+            pRecordedMD = pMD;
+        }
+    }
+
+#ifdef _DEBUG
+    PgoManager::VerifyAddress(methodProfile);
+    PgoManager::VerifyAddress(methodProfile + 1);
+#endif
+
     // If table is not yet full, just add entries in.
     //
-    if (count < S)
+    methodProfile->HandleTable[methodSampleIndex] = (CORINFO_METHOD_HANDLE)pRecordedMD;
+}
+HCIMPLEND
+
+// Version of helper above used when the count is 64-bit
+HCIMPL3(void, JIT_DelegateProfile64, Object *obj, CORINFO_METHOD_HANDLE baseMethod, ICorJitInfo::HandleHistogram64* methodProfile)
+{
+    FCALL_CONTRACT;
+    FC_GC_POLL_NOT_NEEDED();
+
+    OBJECTREF objRef = ObjectToOBJECTREF(obj);
+    VALIDATEOBJECTREF(objRef);
+
+    volatile uint64_t* pMethodCount = (volatile uint64_t*) &methodProfile->Count;
+    const uint64_t methodCallIndex = (*pMethodCount)++;
+    int methodSampleIndex = CheckSample(methodCallIndex);
+
+    if (methodSampleIndex == -1)
     {
-        classProfile->ClassTable[count] = (CORINFO_CLASS_HANDLE)pMT;
+        return;
     }
-    else
+
+    if (objRef == NULL)
     {
-        unsigned x = ClassProfileRand();
+        return;
+    }
 
-        // N is the sampling window size,
-        // it should be larger than the table size.
-        //
-        // If we let N == count then we are building an entire
-        // run sample -- probability of update decreases over time.
-        // Would be a good strategy for an AOT profiler.
-        //
-        // But for TieredPGO we would prefer something that is more
-        // weighted to recent observations.
-        //
-        // For S=4, N=128, we'll sample (on average) every 32nd call.
-        //
-        if ((x % N) < S)
+    MethodTable* pMT = objRef->GetMethodTable();
+
+    _ASSERTE(pMT->IsDelegate());
+
+    // Resolve method. We handle only the common "direct" delegate as that is
+    // in any case the only one we can reasonably do GDV for. For instance,
+    // open delegates are filtered out here, and many cases with inner
+    // "complicated" logic as well (e.g. static functions, multicast, unmanaged
+    // functions).
+    //
+    MethodDesc* pRecordedMD = (MethodDesc*)DEFAULT_UNKNOWN_HANDLE;
+    DELEGATEREF del = (DELEGATEREF)objRef;
+    if ((del->GetInvocationCount() == 0) && (del->GetMethodPtrAux() == NULL))
+    {
+        MethodDesc* pMD = NonVirtualEntry2MethodDesc(del->GetMethodPtr());
+        if ((pMD != nullptr) && !pMD->GetLoaderAllocator()->IsCollectible() && !pMD->IsDynamicMethod())
         {
-            unsigned i = x % S;
-            classProfile->ClassTable[i] = (CORINFO_CLASS_HANDLE)pMT;
+            pRecordedMD = pMD;
         }
     }
+
+#ifdef _DEBUG
+    PgoManager::VerifyAddress(methodProfile);
+    PgoManager::VerifyAddress(methodProfile + 1);
+#endif
+
+    // If table is not yet full, just add entries in.
+    //
+    methodProfile->HandleTable[methodSampleIndex] = (CORINFO_METHOD_HANDLE)pRecordedMD;
 }
 HCIMPLEND
 
-// Version of helper above used when the count is 64-bit
-HCIMPL2(void, JIT_ClassProfile64, Object *obj, void* tableAddress)
+HCIMPL3(void, JIT_VTableProfile32, Object* obj, CORINFO_METHOD_HANDLE baseMethod, ICorJitInfo::HandleHistogram32* methodProfile)
 {
     FCALL_CONTRACT;
     FC_GC_POLL_NOT_NEEDED();
@@ -5421,44 +5579,109 @@ HCIMPL2(void, JIT_ClassProfile64, Object *obj, void* tableAddress)
     OBJECTREF objRef = ObjectToOBJECTREF(obj);
     VALIDATEOBJECTREF(objRef);
 
-    ICorJitInfo::ClassProfile64* const classProfile = (ICorJitInfo::ClassProfile64*) tableAddress;
-    volatile uint64_t* pCount = (volatile uint64_t*) &classProfile->Count;
-    const uint64_t count = (*pCount)++;
-    const unsigned S = ICorJitInfo::ClassProfile32::SIZE;
-    const unsigned N = ICorJitInfo::ClassProfile32::SAMPLE_INTERVAL;
-    _ASSERTE(N >= S);
+    volatile unsigned* pMethodCount = (volatile unsigned*) &methodProfile->Count;
+    const unsigned methodCallIndex = (*pMethodCount)++;
+    int methodSampleIndex = CheckSample(methodCallIndex);
+
+    if (methodSampleIndex == -1)
+    {
+        return;
+    }
 
     if (objRef == NULL)
     {
         return;
     }
 
+    MethodDesc* pBaseMD = GetMethod(baseMethod);
+
+    // Method better be virtual
+    _ASSERTE(pBaseMD->IsVirtual());
+
+    // We do not expect to see interface methods here as we cannot efficiently
+    // use method handle information for these anyway.
+    _ASSERTE(!pBaseMD->IsInterface());
+
+    // Shouldn't be doing this for instantiated methods as they live elsewhere
+    _ASSERTE(!pBaseMD->HasMethodInstantiation());
+
     MethodTable* pMT = objRef->GetMethodTable();
 
-    if (pMT->GetLoaderAllocator()->IsCollectible())
+    // Resolve method
+    WORD slot = pBaseMD->GetSlot();
+    _ASSERTE(slot < pBaseMD->GetMethodTable()->GetNumVirtuals());
+
+    MethodDesc* pMD = pMT->GetMethodDescForSlot(slot);
+
+    MethodDesc* pRecordedMD = (MethodDesc*)DEFAULT_UNKNOWN_HANDLE;
+    if (!pMD->GetLoaderAllocator()->IsCollectible() && !pMD->IsDynamicMethod())
     {
-        pMT = (MethodTable*)DEFAULT_UNKNOWN_TYPEHANDLE;
+        pRecordedMD = pMD;
     }
 
 #ifdef _DEBUG
-    PgoManager::VerifyAddress(classProfile);
-    PgoManager::VerifyAddress(classProfile + 1);
+    PgoManager::VerifyAddress(methodProfile);
+    PgoManager::VerifyAddress(methodProfile + 1);
 #endif
 
-    if (count < S)
+    methodProfile->HandleTable[methodSampleIndex] = (CORINFO_METHOD_HANDLE)pRecordedMD;
+}
+HCIMPLEND
+
+HCIMPL3(void, JIT_VTableProfile64, Object* obj, CORINFO_METHOD_HANDLE baseMethod, ICorJitInfo::HandleHistogram64* methodProfile)
+{
+    FCALL_CONTRACT;
+    FC_GC_POLL_NOT_NEEDED();
+
+    OBJECTREF objRef = ObjectToOBJECTREF(obj);
+    VALIDATEOBJECTREF(objRef);
+
+    volatile uint64_t* pMethodCount = (volatile uint64_t*) &methodProfile->Count;
+    const uint64_t methodCallIndex = (*pMethodCount)++;
+    int methodSampleIndex = CheckSample(methodCallIndex);
+
+    if (methodSampleIndex == -1)
     {
-        classProfile->ClassTable[count] = (CORINFO_CLASS_HANDLE)pMT;
+        return;
     }
-    else
+
+    if (objRef == NULL)
     {
-        unsigned x = ClassProfileRand();
+        return;
+    }
 
-        if ((x % N) < S)
-        {
-            unsigned i = x % S;
-            classProfile->ClassTable[i] = (CORINFO_CLASS_HANDLE)pMT;
-        }
+    MethodDesc* pBaseMD = GetMethod(baseMethod);
+
+    // Method better be virtual
+    _ASSERTE(pBaseMD->IsVirtual());
+
+    // We do not expect to see interface methods here as we cannot efficiently
+    // use method handle information for these anyway.
+    _ASSERTE(!pBaseMD->IsInterface());
+
+    // Shouldn't be doing this for instantiated methods as they live elsewhere
+    _ASSERTE(!pBaseMD->HasMethodInstantiation());
+
+    MethodTable* pMT = objRef->GetMethodTable();
+
+    // Resolve method
+    WORD slot = pBaseMD->GetSlot();
+    _ASSERTE(slot < pBaseMD->GetMethodTable()->GetNumVirtuals());
+
+    MethodDesc* pMD = pMT->GetMethodDescForSlot(slot);
+
+    MethodDesc* pRecordedMD = (MethodDesc*)DEFAULT_UNKNOWN_HANDLE;
+    if (!pMD->GetLoaderAllocator()->IsCollectible() && !pMD->IsDynamicMethod())
+    {
+        pRecordedMD = pMD;
     }
+
+#ifdef _DEBUG
+    PgoManager::VerifyAddress(methodProfile);
+    PgoManager::VerifyAddress(methodProfile + 1);
+#endif
+
+    methodProfile->HandleTable[methodSampleIndex] = (CORINFO_METHOD_HANDLE)pRecordedMD;
 }
 HCIMPLEND
 
index aab5409..221be23 100644 (file)
@@ -2117,6 +2117,13 @@ MethodDesc* NonVirtualEntry2MethodDesc(PCODE entryPoint)
             return (MethodDesc*)((FixupPrecode*)pInstr)->GetMethodDesc();
         }
 
+        // Is it an FCALL?
+        MethodDesc* pFCallMD = ECall::MapTargetBackToMethod(entryPoint);
+        if (pFCallMD != NULL)
+        {
+            return pFCallMD;
+        }
+
         return NULL;
     }
 
@@ -2153,11 +2160,6 @@ MethodDesc* Entry2MethodDesc(PCODE entryPoint, MethodTable *pMT)
     if (pMD != NULL)
         RETURN(pMD);
 
-    // Is it an FCALL?
-    pMD = ECall::MapTargetBackToMethod(entryPoint);
-    if (pMD != NULL)
-        RETURN(pMD);
-
     // We should never get here
     _ASSERTE(!"Entry2MethodDesc failed");
     RETURN (NULL);
index 7f7115d..a4bad1f 100644 (file)
@@ -281,11 +281,11 @@ void PgoManager::WritePgoData()
                             MethodDesc* md = reinterpret_cast<MethodDesc*>(methodHandleData);
                             if (md == nullptr)
                             {
-                                fprintf(pgoDataFile, "MethodHandle: NULL");
+                                fprintf(pgoDataFile, "MethodHandle: NULL\n");
                             }
                             else if (ICorJitInfo::IsUnknownHandle(methodHandleData))
                             {
-                                fprintf(pgoDataFile, "MethodHandle: UNKNOWN");
+                                fprintf(pgoDataFile, "MethodHandle: UNKNOWN\n");
                             }
                             else
                             {
@@ -297,13 +297,13 @@ void PgoManager::WritePgoData()
                                 // MethodName|@|fully_qualified_type_name
                                 if (tTypeName.GetCount() + 1 + tMethodName.GetCount() > 8192)
                                 {
-                                    fprintf(pgoDataFile, "MethodHandle: UNKNOWN");
+                                    fprintf(pgoDataFile, "MethodHandle: UNKNOWN\n");
                                 }
                                 else
                                 {
                                     StackScratchBuffer methodNameBuffer;
                                     StackScratchBuffer typeBuffer;
-                                    fprintf(pgoDataFile, "MethodHandle: %s|@|%s", tMethodName.GetUTF8(methodNameBuffer), tTypeName.GetUTF8(typeBuffer));
+                                    fprintf(pgoDataFile, "MethodHandle: %s|@|%s\n", tMethodName.GetUTF8(methodNameBuffer), tTypeName.GetUTF8(typeBuffer));
                                 }
                             }
                             break;
index 4cc6216..f60eb9b 100644 (file)
@@ -60,6 +60,8 @@
       COMPlus_JitObjectStackAllocation;
       COMPlus_JitInlinePolicyProfile;
       COMPlus_JitClassProfiling;
+      COMPlus_JitDelegateProfiling;
+      COMPlus_JitVTableProfiling;
       COMPlus_JitEdgeProfiling;
       COMPlus_JitRandomGuardedDevirtualization;
       COMPlus_JitRandomEdgeCounts;
     <TestEnvironment Include="gcstress0xc_jitstress2" GCStress="0xC" JitStress="2" />
     <TestEnvironment Include="gcstress0xc_tailcallstress" GCStress="0xC" TailcallStress="1" />
     <TestEnvironment Include="gcstress0xc_jitminopts_heapverify1" GCStress="0xC" JITMinOpts="1" HeapVerify="1" />
-    <TestEnvironment Include="jitosr" TC_OnStackReplacement="1" TC_QuickJitForLoops="1" TieredCompilation="1" />
     <TestEnvironment Include="jitosr_stress" TC_OnStackReplacement="1" TC_QuickJitForLoops="1" TC_OnStackReplacement_InitialCounter="1" OSR_HitLimit="1" TieredCompilation="1" />
     <TestEnvironment Include="jitosr_stress_random" TC_OnStackReplacement="1" TC_QuickJitForLoops="1" TC_OnStackReplacement_InitialCounter="1" OSR_HitLimit="2" TieredCompilation="1" JitRandomOnStackReplacement="15"/>
     <TestEnvironment Include="jit_stress_splitting" JitFakeProcedureSplitting="1" JitStressProcedureSplitting="1" />
     <TestEnvironment Include="defaultpgo" TieredPGO="1" TieredCompilation="1" />
     <TestEnvironment Include="dynamicpgo" TieredPGO="1" TieredCompilation="1" TC_QuickJitForLoops="1" />
     <TestEnvironment Include="fullpgo" TieredPGO="1" TieredCompilation="1" TC_QuickJitForLoops="1" ReadyToRun="0"/>
+    <TestEnvironment Include="fullpgo_methodprofiling" TieredPGO="1" TieredCompilation="1" TC_QuickJitForLoops="1" ReadyToRun="0" JitDelegateProfiling="1" JitVTableProfiling="1" />
     <TestEnvironment Include="fullpgo_random_gdv" TieredPGO="1" TieredCompilation="1" TC_QuickJitForLoops="1" ReadyToRun="0" JitRandomGuardedDevirtualization="1"/>
+    <TestEnvironment Include="fullpgo_random_gdv_methodprofiling_only" TieredPGO="1" TieredCompilation="1" TC_QuickJitForLoops="1" ReadyToRun="0" JitRandomGuardedDevirtualization="1" JitClassProfiling="0" JitDelegateProfiling="1" JitVTableProfiling="1" />
     <TestEnvironment Include="fullpgo_random_edge" TieredPGO="1" TieredCompilation="1" TC_QuickJitForLoops="1" ReadyToRun="0" JitRandomEdgeCounts="1"/>
     <TestEnvironment Include="fullpgo_random_gdv_edge" TieredPGO="1" TieredCompilation="1" TC_QuickJitForLoops="1" ReadyToRun="0" JitRandomGuardedDevirtualization="1" JitRandomEdgeCounts="1"/>
     <TestEnvironment Include="gcstandalone" Condition="'$(TargetsWindows)' == 'true'" GCName="clrgc.dll"/>