Multicorejit unification (#48326)
authorGleb Balykov <g.balykov@samsung.com>
Fri, 14 May 2021 00:16:08 +0000 (03:16 +0300)
committerGitHub <noreply@github.com>
Fri, 14 May 2021 00:16:08 +0000 (17:16 -0700)
* Unify simple and generic methods in multicorejit

* Disable NDirect methods in MulticoreJit

Current multicorejit implementation in master has multiple flaws with NDirect methods:
- exception might be thrown inside GetStubForInteropMethod at some point for NDirect method, which will kill background thread, thus, reducing effectiveness of multicorejit (for example, occurs when multicorejit is used with crossgen2)
- some NDirect methods can lead to asserts during load inside GetStubForInteropMethod (for example, EvpMdCtxDestroy (0x6000044 token) from System.Security.Cryptography.Algorithms.dll)

* Add MultiCoreJitMinNumCpus env variable with default value =2 to configure minimum allowed number of cpus for MultiCoreJit.

On arm with cpu hotplug it should be set to 1.

* Do not save overall record length for methods

This change reduces mcj profile size

* Store non-generic methods using token instead of signatures

This change reduces mcj profile size

src/coreclr/inc/clrconfigvalues.h
src/coreclr/vm/multicorejit.cpp
src/coreclr/vm/multicorejitimpl.h
src/coreclr/vm/multicorejitplayer.cpp

index 62a7665..8498c87 100644 (file)
@@ -370,6 +370,7 @@ RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_TrackDynamicMethodDebugInfo, W("TrackDynami
 
 RETAIL_CONFIG_STRING_INFO(INTERNAL_MultiCoreJitProfile, W("MultiCoreJitProfile"), "If set, use the file to store/control multi-core JIT.")
 RETAIL_CONFIG_DWORD_INFO(INTERNAL_MultiCoreJitProfileWriteDelay, W("MultiCoreJitProfileWriteDelay"), 12, "Set the delay after which the multi-core JIT profile will be written to disk.")
+RETAIL_CONFIG_DWORD_INFO(INTERNAL_MultiCoreJitMinNumCpus, W("MultiCoreJitMinNumCpus"), 2, "Minimum number of cpus that must be present to allow MultiCoreJit usage.")
 
 #endif
 
index f58db1c..9a9ec83 100644 (file)
@@ -386,20 +386,22 @@ HRESULT MulticoreJitRecorder::WriteOutput(IStream * pStream)
 
     HRESULT hr = S_OK;
 
-    // Preprocessing Generic Methods
-    // - Add ModuleDependency to JITInfo
-    // - Increment MethodCount in Module
-    RecorderGenericInfo* genericInfoArray = new (nothrow) RecorderGenericInfo[m_GenericInfoCount]();
-    LONG skippedGeneric = 0;
-    if (genericInfoArray != nullptr)
-    {
-        for (LONG i = 0 ; i < m_GenericInfoCount; i++)
+    // Preprocessing Methods
+    LONG skipped = 0;
+
+    for (LONG i = 0 ; i < m_JitInfoCount; i++)
+    {
+        if (m_JitInfoArray[i].IsModuleInfo())
+        {
+            // Module records don't need preprocessing
+            continue;
+        }
+
+        MethodDesc * pMethod = m_JitInfoArray[i].GetMethodDescAndClean();
+
+        if (m_JitInfoArray[i].IsGenericMethodInfo())
         {
             SigBuilder sigBuilder;
-            MethodDesc * pMethod = m_GenericInfoArray[i];
-            Module * pModule = pMethod->GetModule();
-            unsigned moduleIndex = FindModule(pModule);
-            _ASSERTE(moduleIndex < UINT_MAX);
 
             BOOL fSuccess = false;
             EX_TRY
@@ -413,30 +415,35 @@ HRESULT MulticoreJitRecorder::WriteOutput(IStream * pStream)
 
             if (!fSuccess)
             {
-                skippedGeneric++;
+                skipped++;
                 continue;
             }
 
             DWORD dwLength;
             BYTE * pBlob = (BYTE*)sigBuilder.GetSignature(&dwLength);
-            _ASSERTE(dwLength <= 0xFFFFFF);
+            if (dwLength >= SIGNATURE_LENGTH_MASK + 1)
+            {
+                skipped++;
+                continue;
+            }
+
             BYTE * pSignature = new (nothrow) BYTE[dwLength];
             if (pSignature == nullptr)
             {
-                skippedGeneric++;
+                skipped++;
                 continue;
             }
 
             memcpy(pSignature, pBlob, dwLength);
-            unsigned info = Pack8_24(moduleIndex, dwLength & 0xFFFFFF);
-            genericInfoArray[i].genericInfo = info;
-            genericInfoArray[i].genericSignature = pSignature;
-            m_ModuleList[moduleIndex].methodCount ++;
+            m_JitInfoArray[i].PackSignatureForGenericMethod(pSignature, dwLength);
+        }
+        else
+        {
+            _ASSERTE(m_JitInfoArray[i].IsNonGenericMethodInfo());
+
+            unsigned token = pMethod->GetMemberDef_NoLogging();
+            m_JitInfoArray[i].PackTokenForNonGenericMethod(token);
         }
-    }
-    else
-    {
-        skippedGeneric = m_GenericInfoCount;
     }
 
     {
@@ -447,10 +454,7 @@ HRESULT MulticoreJitRecorder::WriteOutput(IStream * pStream)
         header.recordID       = Pack8_24(MULTICOREJIT_HEADER_RECORD_ID, sizeof(HeaderRecord));
         header.version        = MULTICOREJIT_PROFILE_VERSION;
         header.moduleCount    = m_ModuleCount;
-        header.methodCount    = m_JitInfoCount
-                              + m_GenericInfoCount - skippedGeneric
-                              - m_ModuleDepCount;
-
+        header.methodCount    = m_JitInfoCount - skipped - m_ModuleDepCount;
         header.moduleDepCount = m_ModuleDepCount;
 
         MulticoreJitCodeStorage & curStorage =  m_pDomain->GetMulticoreJitManager().GetMulticoreJitCodeStorage();
@@ -486,77 +490,69 @@ HRESULT MulticoreJitRecorder::WriteOutput(IStream * pStream)
         hr = WriteModuleRecord(pStream, m_ModuleList[i]);
     }
 
-    if (SUCCEEDED(hr))
+    for (LONG i = 0 ; i < m_JitInfoCount && SUCCEEDED(hr); i++)
     {
-        unsigned remain = m_JitInfoCount;
-
-        const unsigned * pInfo = m_JitInfoArray;
+        if (m_JitInfoArray[i].IsModuleInfo())
+        {
+            // Module record
+            _ASSERTE(m_JitInfoArray[i].IsFullyInitialized());
 
-        while (SUCCEEDED(hr) && (remain > 0))
+            DWORD data1 = m_JitInfoArray[i].GetRawModuleData();
+            hr = WriteData(pStream, &data1, sizeof(data1));
+        }
+        else if (m_JitInfoArray[i].IsGenericMethodInfo())
         {
-            unsigned count = remain;
+            // Method record
+            DWORD data1 = m_JitInfoArray[i].GetRawMethodData1();
+            unsigned short data2 = m_JitInfoArray[i].GetRawMethodData2Generic();
+            BYTE * pSignature = m_JitInfoArray[i].GetRawMethodSignature();
 
-            if (count > MAX_JIT_COUNT)
+            if (pSignature == nullptr)
             {
-                count = MAX_JIT_COUNT;
+                // Skipped method
+                continue;
             }
 
-            dwData = Pack8_24(MULTICOREJIT_JITINF_RECORD_ID,  count * sizeof(DWORD) + sizeof(DWORD));
-
-            hr = WriteData(pStream, & dwData, sizeof(dwData));
+            DWORD sigSize = m_JitInfoArray[i].GetMethodSignatureSize();
+            DWORD paddingSize = m_JitInfoArray[i].GetMethodRecordPaddingSize();
 
+            hr = WriteData(pStream, &data1, sizeof(data1));
             if (SUCCEEDED(hr))
             {
-                hr = WriteData(pStream, pInfo, sizeof(unsigned) * count);
-            }
-
-            pInfo  += count;
-            remain -= count;
-        }
-    }
-
-    if (SUCCEEDED(hr) && genericInfoArray != nullptr)
-    {
-        for (LONG i = 0 ; i < m_GenericInfoCount && SUCCEEDED(hr); i++)
-        {
-            unsigned info = genericInfoArray[i].genericInfo;
-            BYTE * pSignature = genericInfoArray[i].genericSignature;
-            if (info == 0 && pSignature == nullptr)
-            {
-                continue;
+                hr = WriteData(pStream, &data2, sizeof(data2));
             }
-
-            DWORD sigSize = info & 0xFFFFFF;
-            DWORD dataSize = sigSize * sizeof(BYTE) + sizeof(DWORD) * 2;
-            DWORD dwSize = ((DWORD)(dataSize + sizeof(DWORD) - 1) / sizeof(DWORD)) * sizeof(DWORD);
-            _ASSERTE(dwSize <= 0xFFFFFF);
-            dwData = Pack8_24(MULTICOREJIT_GENERICINF_RECORD_ID, dwSize);
-            hr = WriteData(pStream, &dwData, sizeof(dwData));
             if (SUCCEEDED(hr))
             {
-                hr = WriteData(pStream, &info, sizeof(unsigned));
+                hr = WriteData(pStream, pSignature, sigSize);
             }
-
-            if (SUCCEEDED(hr))
+            if (SUCCEEDED(hr) && paddingSize > 0)
             {
-                hr = WriteData(pStream, pSignature, sizeof(BYTE) * sigSize);
+                DWORD tmp = 0;
+                hr = WriteData(pStream, &tmp, paddingSize);
             }
+        }
+        else
+        {
+            _ASSERTE(m_JitInfoArray[i].IsNonGenericMethodInfo());
+
+            // Method record
+            DWORD data1 = m_JitInfoArray[i].GetRawMethodData1();
+            unsigned data2 = m_JitInfoArray[i].GetRawMethodData2NonGeneric();
 
+            hr = WriteData(pStream, &data1, sizeof(data1));
             if (SUCCEEDED(hr))
             {
-                DWORD init = 0;
-                hr = WriteData(pStream, &init, dwSize - dataSize);
+                hr = WriteData(pStream, &data2, sizeof(data2));
             }
         }
     }
 
-    if (genericInfoArray != nullptr)
+    for (LONG i = 0; i < m_JitInfoCount; i++)
     {
-        for (LONG i = 0; i < m_GenericInfoCount; i++)
+        if (m_JitInfoArray[i].IsGenericMethodInfo())
         {
-            delete [] genericInfoArray[i].genericSignature;
+            delete[] m_JitInfoArray[i].GetRawMethodSignature();
         }
-        delete [] genericInfoArray;
     }
 
     MulticoreJitTrace(("New profile: %d modules, %d methods", m_ModuleCount, m_JitInfoCount));
@@ -604,56 +600,75 @@ unsigned MulticoreJitRecorder::GetOrAddModuleIndex(Module * pModule)
     return slot;
 }
 
-
-void MulticoreJitRecorder::RecordJitInfo(unsigned module, unsigned method)
+void MulticoreJitRecorder::RecordMethodInfo(unsigned moduleIndex, MethodDesc * pMethod, bool application)
 {
     LIMITED_METHOD_CONTRACT;
 
-    if (m_JitInfoCount < (LONG) MAX_METHOD_ARRAY)
+    if (m_JitInfoArray != nullptr && m_JitInfoCount < (LONG) MAX_METHODS)
     {
-        unsigned info1 = Pack8_24(module, method & 0xFFFFFF);
+        m_ModuleList[moduleIndex].methodCount++;
+        m_JitInfoArray[m_JitInfoCount++].PackMethod(moduleIndex, pMethod, application);
+    }
+}
 
-        // Due to incremental loading, there are quite a few RecordModuleLoad coming with increasing load level, merge
+unsigned MulticoreJitRecorder::RecordModuleInfo(Module * pModule)
+{
+    LIMITED_METHOD_CONTRACT;
 
-        // Previous record and current record are both MODULE_DEPENDENCY
-        if ((m_JitInfoCount > 0) && (info1 & MODULE_DEPENDENCY))
-        {
-            unsigned info0 = m_JitInfoArray[m_JitInfoCount - 1];
+    // pModule could be unknown at this point (modules not enumerated, no event received yet)
+    unsigned moduleIndex = GetOrAddModuleIndex(pModule);
 
-            if ((info0 & 0xFFFF00FF) == (info1 & 0xFFFF00FF)) // to/from modules are the same
-            {
-                if (info1 > info0) // higher level
-                {
-                    m_JitInfoArray[m_JitInfoCount - 1] = info1; // replace
-                }
+    if (moduleIndex == UINT_MAX)
+    {
+        return UINT_MAX;
+    }
 
-                return; // no new record
-            }
-        }
+    if (m_fFirstMethod)
+    {
+        PreRecordFirstMethod();
+    }
 
-        if (method & MODULE_DEPENDENCY)
-        {
-            m_ModuleDepCount ++;
-        }
-        else
+    // Make sure level for current module is recorded properly
+    // Module dependency for generic and stub as well as regular method are handled in JitInfo.
+    // Any module dependencies for all types of methods would be handled with JitInfo before they are attempted to be multicorejitted.
+    if (m_ModuleList[moduleIndex].loadLevel != FILE_ACTIVE)
+    {
+        FileLoadLevel needLevel = MulticoreJitManager::GetModuleFileLoadLevel(pModule);
+
+        if (m_ModuleList[moduleIndex].loadLevel < needLevel)
         {
-            m_ModuleList[module].methodCount ++;
-        }
+            m_ModuleList[moduleIndex].loadLevel = needLevel;
 
-        m_JitInfoArray[m_JitInfoCount] = info1;
-        m_JitInfoCount ++;
+            // Update load level
+            RecordOrUpdateModuleInfo(needLevel, moduleIndex);
+        }
     }
+
+    return moduleIndex;
 }
 
-void MulticoreJitRecorder::RecordGenericInfo(MethodDesc * pMethod)
+void MulticoreJitRecorder::RecordOrUpdateModuleInfo(FileLoadLevel needLevel, unsigned moduleIndex)
 {
     LIMITED_METHOD_CONTRACT;
 
-    // To avoid recording overhead, records only method desc.
-    if (m_GenericInfoCount < (LONG) MAX_GENERIC_ARRAY)
+    if (m_JitInfoArray != nullptr && m_JitInfoCount < (LONG) MAX_METHODS)
     {
-        m_GenericInfoArray[m_GenericInfoCount] = pMethod;
-        m_GenericInfoCount++;
+        // Due to incremental loading, there are quite a few RecordModuleLoad coming with increasing load level, merge
+        // Previous record and current record both represent modules
+        if (m_JitInfoCount > 0
+            && m_JitInfoArray[m_JitInfoCount - 1].IsModuleInfo()
+            && m_JitInfoArray[m_JitInfoCount - 1].GetModuleIndex() == moduleIndex)
+        {
+            if (needLevel > m_JitInfoArray[m_JitInfoCount - 1].GetModuleLoadLevel())
+            {
+                m_JitInfoArray[m_JitInfoCount - 1].PackModule(needLevel, moduleIndex);
+            }
+
+            return; // no new record
+        }
+
+        m_ModuleDepCount++;
+        m_JitInfoArray[m_JitInfoCount++].PackModule(needLevel, moduleIndex);
     }
 }
 
@@ -701,15 +716,17 @@ void MulticoreJitRecorder::AddModuleDependency(Module * pModule, FileLoadLevel l
 
     unsigned moduleTo = GetOrAddModuleIndex(pModule);
 
-    if (moduleTo != UINT_MAX)
+    if (moduleTo == UINT_MAX)
     {
-        if (m_ModuleList[moduleTo].loadLevel < loadLevel)
-        {
-            m_ModuleList[moduleTo].loadLevel = loadLevel;
+        return;
+    }
 
-            // Update load level
-            RecordJitInfo(0, ((unsigned) loadLevel << 8) | moduleTo | MODULE_DEPENDENCY);
-        }
+    if (m_ModuleList[moduleTo].loadLevel < loadLevel)
+    {
+        m_ModuleList[moduleTo].loadLevel = loadLevel;
+
+        // Update load level
+        RecordOrUpdateModuleInfo(loadLevel, moduleTo);
     }
 }
 
@@ -720,19 +737,24 @@ DWORD MulticoreJitRecorder::EncodeModule(Module * pReferencedModule)
     unsigned slot = GetOrAddModuleIndex(pReferencedModule);
     FileLoadLevel loadLevel = MulticoreJitManager::GetModuleFileLoadLevel(pReferencedModule);
 
-    if (slot != UINT_MAX)
+    if (slot == UINT_MAX)
     {
-        if (m_ModuleList[slot].loadLevel < loadLevel)
-        {
-            m_ModuleList[slot].loadLevel = loadLevel;
+        return ENCODE_MODULE_FAILED;
+    }
 
-            // Update load level
-            RecordJitInfo(0, ((unsigned) loadLevel << 8) | slot | MODULE_DEPENDENCY);
-        }
-        m_ModuleList[slot].methodCount++;
-        return (DWORD)slot;
+    if (m_ModuleList[slot].loadLevel < loadLevel)
+    {
+        m_ModuleList[slot].loadLevel = loadLevel;
+
+        // Update load level
+        RecordOrUpdateModuleInfo(loadLevel, slot);
     }
-    return ENCODE_MODULE_FAILED;
+
+    // This increment is required, because we need to increase methodCount for all referenced modules for generic method.
+    // RecordMethodInfo will only increment this counter for pMethod->GetModule_NoLogging.
+    m_ModuleList[slot].methodCount++;
+
+    return (DWORD) slot;
 }
 
 // Enumerate all modules within an assembly, call OnModule virtual method
@@ -859,52 +881,14 @@ void MulticoreJitRecorder::RecordMethodJit(MethodDesc * pMethod, bool applicatio
         return;
     }
 
-    // pModule could be unknown at this point (modules not enumerated, no event received yet)
-    unsigned moduleIndex = GetOrAddModuleIndex(pModule);
+    unsigned moduleIndex = RecordModuleInfo(pModule);
 
-    if (moduleIndex >= UINT_MAX)
+    if (moduleIndex == UINT_MAX)
     {
         return;
     }
 
-    if (m_fFirstMethod)
-    {
-        PreRecordFirstMethod();
-    }
-
-    // Make sure level for current module is recorded properly
-    // Module dependency for generic and stub as well as regular method are handled in JitInfo.
-    // Any module dependencies for all types of methods would be handled with JitInfo before they are attempted to be multicorejitted.
-    if (m_ModuleList[moduleIndex].loadLevel != FILE_ACTIVE)
-    {
-        FileLoadLevel needLevel = MulticoreJitManager::GetModuleFileLoadLevel(pModule);
-
-        if (m_ModuleList[moduleIndex].loadLevel < needLevel)
-        {
-            m_ModuleList[moduleIndex].loadLevel = needLevel;
-
-            // Update load level
-            RecordJitInfo(0, ((unsigned) needLevel << 8) | moduleIndex | MODULE_DEPENDENCY);
-        }
-    }
-    if (!pMethod->IsTypicalSharedInstantiation())
-    {
-        RecordGenericInfo(pMethod);
-    }
-    else
-    {
-        unsigned methodIndex = pMethod->GetMemberDef_NoLogging() & 0xFFFFFF;
-
-        if (methodIndex <= METHODINDEX_MASK)
-        {
-            if (application) // Jitted by application threads, not background thread
-            {
-                methodIndex |= JIT_BY_APP_THREAD;
-            }
-
-            RecordJitInfo(moduleIndex, methodIndex);
-        }
-    }
+    RecordMethodInfo(moduleIndex, pMethod, application);
 }
 
 
@@ -1149,7 +1133,9 @@ void MulticoreJitManager::SetProfileRoot(const WCHAR * pProfilePath)
 
 #endif
 
-    if (g_SystemInfo.dwNumberOfProcessors >= 2)
+    unsigned minNumCpus = (unsigned)CLRConfig::GetConfigValue(CLRConfig::INTERNAL_MultiCoreJitMinNumCpus);
+
+    if (g_SystemInfo.dwNumberOfProcessors >= minNumCpus)
     {
         if (InterlockedCompareExchange(& m_fSetProfileRootCalled, SETPROFILEROOTCALLED, 0) == 0) // Only allow the first call per appdomain
         {
@@ -1179,8 +1165,8 @@ void MulticoreJitManager::StartProfile(AppDomain * pDomain, ICLRPrivBinder *pBin
         return;
     }
 
-    // Need extra processor for multicore JIT feature
-    _ASSERTE(g_SystemInfo.dwNumberOfProcessors >= 2);
+    // Check if need extra processor for multicore JIT feature
+    _ASSERTE(g_SystemInfo.dwNumberOfProcessors >= (unsigned)CLRConfig::GetConfigValue(CLRConfig::INTERNAL_MultiCoreJitMinNumCpus));
 
 #ifdef PROFILING_SUPPORTED
 
@@ -1469,7 +1455,8 @@ bool MulticoreJitManager::IsMethodSupported(MethodDesc * pMethod)
     }
     CONTRACTL_END;
 
-    return !pMethod->IsDynamicMethod() &&
+    return pMethod->HasILHeader() &&
+           !pMethod->IsDynamicMethod() &&
            !pMethod->GetLoaderAllocator()->IsCollectible();
 }
 
index 6074fe0..35ea8a3 100644 (file)
 
 #endif
 
-// Make sure a record can fit within 2048 bytes, 511 methods now
+// Bits 0xff0000 are reserved method flags. Currently only first bit is used.
+const unsigned METHOD_FLAGS_MASK       = 0xff0000;
+const unsigned JIT_BY_APP_THREAD_TAG   = 0x10000;   // tag, that indicates whether method is jitted by application thread(1) or background thread(0)
+// Tags 0xfe0000 are currently free
 
-const int      MAX_RECORD_SIZE   = 2048;
-const unsigned MAX_JIT_COUNT     = (MAX_RECORD_SIZE - sizeof(unsigned)) / sizeof(unsigned);
+const unsigned RECORD_TYPE_OFFSET      = 24;        // offset of type of record
 
-const int      HEADER_W_COUNTER  = 14;              // Extra 16-bit counters in header for statistics: 28
-const int      HEADER_D_COUNTER  = 3;               // Extra 32-bit counters in header for statistics: 12
-const unsigned MAX_MODULES       = 512;             // Maximum number of modules
-
-const unsigned MAX_METHOD_ARRAY  = 16384;           // Maximum number of methods
-
-const int      MULTICOREJITLIFE  = 60 * 1000;       // 60 seconds
-
-const int      MULTICOREJITBLOCKLIMIT = 10 * 1000;  // 10 seconds
+const unsigned MAX_MODULES             = 0x1000;    // maximum allowed number of modules (2^12 values)
+const unsigned MODULE_MASK             = 0xffff;    // mask to get module index from packed data
 
-const unsigned MAX_GENERIC_ARRAY  = 16384;          // Maximum number of generics
-                                                    //  8-bit module index
+const unsigned MODULE_LEVEL_OFFSET     = 16;        // offset of module load level
+const unsigned MAX_MODULE_LEVELS       = 0x100;     // maximum allowed number of module levels (2^8 values)
 
-                                                    // Method JIT information: 8-bit module 4-bit flag 20-bit method index
-const unsigned MODULE_DEPENDENCY = 0x800000;        //  1-bit module dependency mask
-const unsigned JIT_BY_APP_THREAD = 0x400000;        //  1-bit application thread
+const unsigned MAX_METHODS             = 0x4000;    // Maximum allowed number of methods (2^14 values) (in principle this is also limited by "unsigned short" counters)
 
-const unsigned METHODINDEX_MASK  = 0x0FFFFF;        // 20-bit method index
+const unsigned SIGNATURE_LENGTH_MASK   = 0xffff;    // mask to get signature from packed data (2^16-1 max signature length)
 
-                                                    // Dependendy information: 8-bit module 4-bit flag 4-bit unused 8-bit level 8-bit module
-const unsigned LEVEL_SHIFT       = 8;
-const unsigned LEVEL_MASK        = 0xFF;            //  8-bit file load level
-const unsigned MODULE_MASK       = 0xFF;            //  8-bit dependent module index
+const int      HEADER_W_COUNTER  = 14;              // Extra 16-bit counters in header for statistics: 28
+const int      HEADER_D_COUNTER  = 3;               // Extra 32-bit counters in header for statistics: 12
 
+const int      MULTICOREJITLIFE  = 60 * 1000;       // 60 seconds
 const int      MAX_WALKBACK      = 128;
 
-const unsigned SIGNATURELENGTH_MASK  = 0x0FFFFFF;        // 24-bit signature length
-
 enum
 {
-    MULTICOREJIT_PROFILE_VERSION   = 101,
+    MULTICOREJIT_PROFILE_VERSION   = 102,
 
-    MULTICOREJIT_HEADER_RECORD_ID          = 1,
-    MULTICOREJIT_MODULE_RECORD_ID          = 2,
-    MULTICOREJIT_JITINF_RECORD_ID          = 3,
-    MULTICOREJIT_GENERICINF_RECORD_ID      = 4
+    MULTICOREJIT_HEADER_RECORD_ID           = 1,
+    MULTICOREJIT_MODULE_RECORD_ID           = 2,
+    MULTICOREJIT_MODULEDEPENDENCY_RECORD_ID = 3,
+    MULTICOREJIT_METHOD_RECORD_ID           = 4,
+    MULTICOREJIT_GENERICMETHOD_RECORD_ID    = 5,
 };
 
-
 inline unsigned Pack8_24(unsigned up, unsigned low)
 {
     LIMITED_METHOD_CONTRACT;
@@ -71,33 +61,71 @@ inline unsigned Pack8_24(unsigned up, unsigned low)
     return (up << 24) + low;
 }
 
-// Multicore JIT profile format
-
+// Multicore JIT profile format.
+//
 // <profile>::= <HeaderRecord> { <ModuleRecord> | <JitInfRecord> }
 //
 //  1. Each record is DWORD aligned
-//  2. Each record starts with a DWORD <recordID> with Pack8_24(record type, record size)
+//  2. Each record starts with a 1 byte recordType identifier
 //  3. Counter are just statistical information gathed (mainly during play back), good for quick diagnosis, not used to guide playback
-//  4  Maximum number of modules supported is 256
-//  5  Simple module name stored
-//  6  Maximum method index: 20-bit, could extend to 22 bits
-//  7  JIT_BY_APP_THREAD is for diagnosis only
-
-// <HeaderRecord>::= <recordID> <version> <timeStamp> <moduleCount> <methodCount> <DependencyCount> <unsigned short counter>*14 <unsigned counter>*3
-// <ModuleRecord>::= <recordID> <ModuleVersion> <JitMethodCount> <loadLevel> <lenModuleName> char*lenModuleName <padding>
-// <JifInfRecord>::= <recordID> { <moduleDependency> | <methodJitInfo> }
-
-// <moduleDependency>::
-//    8-bit source module index,  current always 0 until we track per module dependency
-//    8-bit flag                  MODULE_DEPENDENCY is 1
-//    8-bit load level
-//    8-bit target module index
-
-// <methodJitInfo>::
-//    8-bit module index,         current always 0 until we track per module dependency
-//    4-bit flag                  MODULE_DEPENDENCY is 0, JIT_BY_APP_THREAD could be 1
-//   20-bit method index
-
+//  4. Maximum number of modules supported is MAX_MODULES
+//  5. Maximum number of methods supported is MAX_METHODS
+//  6. Simple module name stored
+//  7. Method flag JIT_BY_APP_THREAD is for diagnosis only
+//
+// <HeaderRecord>::=     <recordType=MULTICOREJIT_HEADER_RECORD_ID> <3byte_recordSize> <version> <timeStamp> <moduleCount> <methodCount> <DependencyCount> <unsigned short counter>*14 <unsigned counter>*3
+// <ModuleRecord>::=     <recordType=MULTICOREJIT_MODULE_RECORD_ID> <3byte_recordSize> <ModuleVersion> <JitMethodCount> <loadLevel> <lenModuleName> char*lenModuleName <padding>
+// <ModuleDependency>::= <recordType=MULTICOREJIT_MODULEDEPENDENCY_RECORD_ID> <loadLevel_1byte> <moduleIndex_2bytes>
+// <GenericMethod>::=    <recordType=MULTICOREJIT_GENERICMETHOD_RECORD_ID> <methodFlags_1byte> <moduleIndex_2byte> <sigSize_2byte> <signature> <optional padding>
+// <NonGenericMethod>::= <recordType=MULTICOREJIT_METHOD_RECORD_ID> <methodFlags_1byte> <moduleIndex_2byte> <methodToken_4byte>
+//
+//
+// Actual profile has two representations: internal and the one, that is stored in file.
+//
+// I. Internal profile
+//
+//   Internal profile representation is stored in m_JitInfoArray and is used during profile gathering.
+//   m_JitInfoArray is an array of RecorderInfo (12 bytes on 32-bit systems, 16 bytes on 64-bit systems), with MAX_METHODS elements.
+//
+//   1. Modules.
+//     For modules RecorderInfo::data2 and RecorderInfo::ptr are set to 0. RecorderInfo::ptr == 0 is also a flag that RecorderInfo correponds to module.
+//     RecorderInfo::data1 is non-zero and represents info for module.
+//
+//     Info for module includes module index and requested load level, with some additional data in higher bits (MULTICOREJIT_MODULEDEPENDENCY_RECORD_ID tag).
+//     - bits 0-15 store module index
+//     - bits 16-23 store load level
+//     - bits 24-31 store tag (MULTICOREJIT_MODULEDEPENDENCY_RECORD_ID)
+//
+//   2. Methods.
+//     For methods RecorderInfo::data2 is set to 0.
+//     RecorderInfo::ptr is set to pointer to MethodDesc.
+//     RecorderInfo::data1 is non-zero and represents additional info for method.
+//
+//     Info for method includes module index and method flags (like JIT_BY_APP_THREAD_TAG, etc.), with some additional data in higher bits (tag).
+//     - bits 0-15 store module index
+//     - bits 16-23 store method flags
+//     - bits 24-31 store tag (MULTICOREJIT_METHOD_RECORD_ID or MULTICOREJIT_GENERICMETHOD_RECORD_ID).
+//
+// II. Profile in file
+//
+//   Preprocessing is performed right before profile saving to file.
+//
+//   1. Modules.
+//     For modules, no preprocessing of RecorderInfo is required, RecorderInfo::data1 is written to file as JifInfRecord.
+//
+//   2. Methods.
+//     2.1. For generic methods, binary signature is computed and RecorderInfo contents are changed.
+//       a) RecorderInfo::data1 doesn't change.
+//       b) RecorderInfo::data2 stores signature length.
+//       c) RecorderInfo::ptr is replaced with pointer to method's binary signature.
+//
+//     2.2 For non-generic methods, method token is obtained and RecorderInfo contents are changed.
+//       a) RecorderInfo::data1 doesn't change.
+//       b) RecorderInfo::data2 stores method token.
+//       c) RecorderInfo::ptr doesn't change.
+//
+//     File write order for generic methods: RecorderInfo::data1, RecorderInfo::data2, signature, extra alignment (this is optional). All of these represent JitInfRecord.
+//     File write order for non-generic methods: RecorderInfo::data1, RecorderInfo::data2. All of these represent JitInfRecord.
 
 struct HeaderRecord
 {
@@ -263,23 +291,19 @@ private:
 
     int                                m_nLoadedModuleCount;
 
-    unsigned                           m_busyWith;
-
     unsigned                           m_headerModuleCount;
     unsigned                           m_moduleCount;
     PlayerModuleInfo                 * m_pModules;
 
-    void JITMethod(Module * pModule, unsigned methodIndex);
-
-    HRESULT HandleModuleRecord(const ModuleRecord * pModule);
-    HRESULT HandleMethodRecord(unsigned * buffer, int count);
-    HRESULT HandleGenericMethodRecord(unsigned moduleIndex, BYTE * signature, unsigned length);
+    HRESULT HandleModuleRecord(const ModuleRecord * pMod);
+    HRESULT HandleModuleInfoRecord(unsigned moduleTo, unsigned level);
+    HRESULT HandleNonGenericMethodInfoRecord(unsigned moduleIndex, unsigned token);
+    HRESULT HandleGenericMethodInfoRecord(unsigned moduleIndex, BYTE * signature, unsigned length);
+    void CompileMethodInfoRecord(Module *pModule, MethodDesc *pMethod, bool isGeneric);
 
     bool CompileMethodDesc(Module * pModule, MethodDesc * pMD);
     HRESULT PlayProfile();
 
-    bool GroupWaitForModuleLoad(int pos);
-
     bool ShouldAbort(bool fast) const;
 
     HRESULT JITThreadProc(Thread * pThread);
@@ -290,8 +314,6 @@ private:
 
     HRESULT UpdateModuleInfo();
 
-    bool HandleModuleDependency(unsigned jitInfo);
-
     HRESULT ReadCheckFile(const WCHAR * pFileName);
 
     DomainAssembly * LoadAssembly(SString & assemblyName);
@@ -333,17 +355,259 @@ struct RecorderModuleInfo
     bool SetModule(Module * pModule);
 };
 
-struct RecorderGenericInfo
+struct RecorderInfo
 {
-    unsigned        genericInfo;
-    BYTE *          genericSignature;
+    unsigned data1;
+    unsigned data2;
+    BYTE *   ptr;
+
+    RecorderInfo()
+    {
+        LIMITED_METHOD_CONTRACT;
+
+        data1 = 0;
+        data2 = 0;
+        ptr  = nullptr;
+    }
+
+    bool IsPartiallyInitialized()
+    {
+        LIMITED_METHOD_CONTRACT;
+
+        return data1 != 0;
+    }
+
+    bool IsGenericMethodInfo()
+    {
+        LIMITED_METHOD_CONTRACT;
+
+        _ASSERTE(IsPartiallyInitialized());
+        return (data1 >> RECORD_TYPE_OFFSET) == MULTICOREJIT_GENERICMETHOD_RECORD_ID;
+    }
+
+    bool IsNonGenericMethodInfo()
+    {
+        LIMITED_METHOD_CONTRACT;
+
+        _ASSERTE(IsPartiallyInitialized());
+        return (data1 >> RECORD_TYPE_OFFSET) == MULTICOREJIT_METHOD_RECORD_ID;
+    }
+
+    bool IsMethodInfo()
+    {
+        LIMITED_METHOD_CONTRACT;
+
+        return IsGenericMethodInfo() || IsNonGenericMethodInfo();
+    }
+
+    bool IsModuleInfo()
+    {
+        LIMITED_METHOD_CONTRACT;
+
+        _ASSERTE(IsPartiallyInitialized());
+        bool ret = (data1 >> RECORD_TYPE_OFFSET) == MULTICOREJIT_MODULEDEPENDENCY_RECORD_ID;
+        _ASSERTE(ret == !IsMethodInfo());
+        return ret;
+    }
+
+    bool IsFullyInitialized()
+    {
+        LIMITED_METHOD_CONTRACT;
+
+        _ASSERTE(IsPartiallyInitialized());
+
+        if (IsModuleInfo())
+        {
+            return true;
+        }
+        else
+        {
+            if (IsNonGenericMethodInfo())
+            {
+                return data2 != 0;
+            }
+            else
+            {
+                return data2 != 0 && ptr != nullptr;
+            }
+        }
+    }
+
+    unsigned GetRawModuleData()
+    {
+        LIMITED_METHOD_CONTRACT;
+
+        _ASSERTE(IsModuleInfo());
+        return data1;
+    }
+
+    unsigned GetModuleIndex()
+    {
+        LIMITED_METHOD_CONTRACT;
+
+        _ASSERTE(IsModuleInfo());
+        return data1 & MODULE_MASK;
+    }
+
+    int GetModuleLoadLevel()
+    {
+        LIMITED_METHOD_CONTRACT;
+
+        _ASSERTE(IsModuleInfo());
+        return (data1 >> MODULE_LEVEL_OFFSET) & (MAX_MODULE_LEVELS - 1);
+    }
+
+    unsigned GetRawMethodData1()
+    {
+        LIMITED_METHOD_CONTRACT;
+
+        _ASSERTE(IsMethodInfo());
+        return data1;
+    }
 
-    RecorderGenericInfo()
+    unsigned GetRawMethodData2NonGeneric()
     {
         LIMITED_METHOD_CONTRACT;
 
-        genericInfo      = 0;
-        genericSignature = nullptr;
+        _ASSERTE(IsNonGenericMethodInfo());
+        return data2;
+    }
+
+    unsigned short GetRawMethodData2Generic()
+    {
+        LIMITED_METHOD_CONTRACT;
+
+        _ASSERTE(IsGenericMethodInfo());
+        _ASSERTE(data2 < SIGNATURE_LENGTH_MASK + 1);
+        return (unsigned short) data2;
+    }
+
+    BYTE * GetRawMethodSignature()
+    {
+        LIMITED_METHOD_CONTRACT;
+
+        _ASSERTE(IsGenericMethodInfo());
+        return ptr;
+    }
+
+    unsigned GetMethodSignatureSize()
+    {
+        LIMITED_METHOD_CONTRACT;
+
+        _ASSERTE(IsGenericMethodInfo());
+        _ASSERTE(IsFullyInitialized());
+
+        return data2;
+    }
+
+    unsigned GetMethodRecordPaddingSize()
+    {
+        LIMITED_METHOD_CONTRACT;
+
+        _ASSERTE(IsGenericMethodInfo());
+        _ASSERTE(IsFullyInitialized());
+
+        unsigned unalignedrecSize = GetMethodSignatureSize() + sizeof(DWORD) + sizeof(unsigned short);
+        unsigned recSize = AlignUp(unalignedrecSize, sizeof(DWORD));
+        unsigned paddingSize = recSize - unalignedrecSize;
+        _ASSERTE(paddingSize < sizeof(unsigned));
+
+        return paddingSize;
+    }
+
+    MethodDesc * GetMethodDescAndClean()
+    {
+        LIMITED_METHOD_CONTRACT;
+
+        _ASSERTE(IsMethodInfo());
+        _ASSERTE(data2 == 0);
+        _ASSERTE(ptr != nullptr);
+
+        MethodDesc * ret = (MethodDesc*) ptr;
+        ptr = nullptr;
+
+        return ret;
+    }
+
+    void PackSignatureForGenericMethod(BYTE *pSignature, unsigned signatureLength)
+    {
+        LIMITED_METHOD_CONTRACT;
+
+        _ASSERTE(IsGenericMethodInfo());
+        _ASSERTE(data2 == 0);
+        _ASSERTE(ptr == nullptr);
+
+        _ASSERTE(pSignature != nullptr);
+        _ASSERTE(signatureLength > 0);
+
+        data2 = signatureLength & SIGNATURE_LENGTH_MASK;
+        ptr = pSignature;
+
+        _ASSERTE(IsFullyInitialized());
+    }
+
+    void PackTokenForNonGenericMethod(unsigned token)
+    {
+        LIMITED_METHOD_CONTRACT;
+
+        _ASSERTE(IsNonGenericMethodInfo());
+        _ASSERTE(data2 == 0);
+        _ASSERTE(ptr == nullptr);
+
+        data2 = token;
+
+        _ASSERTE(IsFullyInitialized());
+    }
+
+    void PackMethod(unsigned moduleIndex, MethodDesc * pMethod, bool application)
+    {
+        LIMITED_METHOD_CONTRACT;
+
+        _ASSERTE(data1 == 0);
+        _ASSERTE(data2 == 0);
+        _ASSERTE(ptr == nullptr);
+
+        _ASSERTE(moduleIndex < MAX_MODULES);
+        _ASSERTE(pMethod != NULL);
+
+        unsigned tag = MULTICOREJIT_METHOD_RECORD_ID;
+
+        if (!pMethod->IsTypicalSharedInstantiation())
+        {
+            // Generic method
+            tag = MULTICOREJIT_GENERICMETHOD_RECORD_ID;
+        }
+
+        data1 = Pack8_24(tag, moduleIndex);
+
+        if (application)
+        {
+             // Jitted by application threads, not background thread
+            data1 |= JIT_BY_APP_THREAD_TAG;
+        }
+
+        data2 = 0;
+        // To avoid recording overhead, records only pointer to MethodDesc.
+        ptr = (BYTE *) pMethod;
+
+        _ASSERTE(IsMethodInfo());
+    }
+
+    void PackModule(FileLoadLevel needLevel, unsigned moduleIndex)
+    {
+        LIMITED_METHOD_CONTRACT;
+
+        _ASSERTE(data2 == 0);
+        _ASSERTE(ptr == nullptr);
+
+        _ASSERTE(((unsigned) needLevel) < MAX_MODULE_LEVELS);
+        _ASSERTE(moduleIndex < MAX_MODULES);
+
+        data1 = Pack8_24(MULTICOREJIT_MODULEDEPENDENCY_RECORD_ID, ((unsigned) needLevel << MODULE_LEVEL_OFFSET) | moduleIndex);
+        data2 = 0;
+        ptr = nullptr;
+
+        _ASSERTE(IsModuleInfo());
     }
 };
 
@@ -359,12 +623,9 @@ private:
     unsigned                  m_ModuleCount;
     unsigned                  m_ModuleDepCount;
 
-    unsigned                  m_JitInfoArray[MAX_METHOD_ARRAY];
+    RecorderInfo              m_JitInfoArray[MAX_METHODS];
     LONG                      m_JitInfoCount;
 
-    MethodDesc*               m_GenericInfoArray[MAX_GENERIC_ARRAY];
-    LONG                      m_GenericInfoCount;
-
     bool                      m_fFirstMethod;
     bool                      m_fAborted;
     bool                      m_fAppxMode;
@@ -378,8 +639,9 @@ private:
 
     HRESULT WriteModuleRecord(IStream * pStream,  const RecorderModuleInfo & module);
 
-    void RecordJitInfo(unsigned module, unsigned method);
-    void RecordGenericInfo(MethodDesc * pMethod);
+    void RecordMethodInfo(unsigned moduleIndex, MethodDesc * pMethod, bool application);
+    unsigned RecordModuleInfo(Module * pModule);
+    void RecordOrUpdateModuleInfo(FileLoadLevel needLevel, unsigned moduleIndex);
 
     void AddAllModulesInAsm(DomainAssembly * pAssembly);
 
@@ -402,10 +664,10 @@ public:
 
         m_pDomain           = pDomain;
         m_pBinderContext    = pBinderContext;
-        m_JitInfoCount      = 0;
         m_ModuleCount       = 0;
         m_ModuleDepCount    = 0;
-        m_GenericInfoCount  = 0;
+
+        m_JitInfoCount      = 0;
 
         m_fFirstMethod      = true;
         m_fAborted          = false;
@@ -444,8 +706,7 @@ public:
     {
         LIMITED_METHOD_CONTRACT;
 
-        return (m_JitInfoCount >= (LONG) MAX_METHOD_ARRAY) ||
-               (m_GenericInfoCount >= (LONG) MAX_GENERIC_ARRAY) ||
+        return (m_JitInfoCount >= (LONG) MAX_METHODS) ||
                (m_ModuleCount  >= MAX_MODULES);
     }
 
@@ -500,4 +761,3 @@ void MulticoreJitFireEtwMethodCodeReturned(MethodDesc * pMethod);
 #define _FireEtwMulticoreJit(String1, String2, Int1, Int2, Int3)  if (PrivateEtwEnabled()) MulticoreJitFireEtw (String1, String2, Int1, Int2, Int3)
 #define _FireEtwMulticoreJitA(String1, String2, Int1, Int2, Int3) if (PrivateEtwEnabled()) MulticoreJitFireEtwA(String1, String2, Int1, Int2, Int3)
 #define _FireEtwMulticoreJitMethodCodeReturned(pMethod) if(PrivateEtwEnabled()) MulticoreJitFireEtwMethodCodeReturned(pMethod)
-
index 4dc8493..ab243cc 100644 (file)
@@ -389,8 +389,6 @@ MulticoreJitProfilePlayer::MulticoreJitProfilePlayer(ICLRPrivBinder * pBinderCon
     m_pFileBuffer        = NULL;
     m_nFileSize          = 0;
 
-    m_busyWith           = EmptyToken;
-
     m_nStartTime         = GetTickCount();
 }
 
@@ -601,89 +599,6 @@ bool MulticoreJitProfilePlayer::CompileMethodDesc(Module * pModule, MethodDesc *
     return false;
 }
 
-
-// Conditional JIT of a method
-void MulticoreJitProfilePlayer::JITMethod(Module * pModule, unsigned methodIndex)
-{
-    STANDARD_VM_CONTRACT;
-
-       // Ensure non-null module
-       if (pModule == NULL)
-       {
-               if (ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PRIVATE_PROVIDER_DOTNET_Context, TRACE_LEVEL_VERBOSE, CLR_PRIVATEMULTICOREJIT_KEYWORD))
-               {
-                       _FireEtwMulticoreJitA(W("NULLMODULEPOINTER"), NULL, methodIndex, 0, 0);
-               }
-               return;
-       }
-
-    methodIndex &= METHODINDEX_MASK; // 20-bit
-
-    unsigned token = TokenFromRid(methodIndex, mdtMethodDef);
-
-    // Similar to Module::FindMethod + Module::FindMethodThrowing,
-    // except it calls GetMethodDescFromMemberDefOrRefOrSpec with strictMetadataChecks=FALSE to allow generic instantiation
-    MethodDesc * pMethod = MemberLoader::GetMethodDescFromMemberDefOrRefOrSpec(pModule, token, NULL, FALSE, FALSE);
-    if (pMethod != NULL && !pMethod->IsDynamicMethod())
-    {
-        if (pMethod->HasILHeader())
-        {
-            // MethodDesc::FindOrCreateTypicalSharedInstantiation is expensive, avoid calling it unless the method or class has generic arguments
-            if (pMethod->HasClassOrMethodInstantiation())
-            {
-                pMethod = pMethod->FindOrCreateTypicalSharedInstantiation();
-
-                if (pMethod == NULL)
-                {
-                    goto BadMethod;
-                }
-
-                pModule = pMethod->GetModule_NoLogging();
-            }
-
-            if (pMethod->GetNativeCode() != NULL) // last check before
-            {
-                m_stats.m_nHasNativeCode ++;
-
-                return;
-            }
-            else
-            {
-                m_busyWith = methodIndex;
-
-                bool rslt = CompileMethodDesc(pModule, pMethod);
-
-                m_busyWith = EmptyToken;
-
-                if (rslt)
-                {
-                    return;
-                }
-            }
-        }
-        else if (pMethod->IsNDirect())
-        {
-            // NDirect Stub
-            if (GetStubForInteropMethod(pMethod))
-            {
-                return;
-            }
-        }
-    }
-
-BadMethod:
-
-    m_stats.m_nFilteredMethods ++;
-
-    MulticoreJitTrace(("Filtered out methods: pModule:[%s] token:[%x]", pModule->GetSimpleName(), token));
-
-    if (ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PRIVATE_PROVIDER_DOTNET_Context, TRACE_LEVEL_VERBOSE, CLR_PRIVATEMULTICOREJIT_KEYWORD))
-    {
-        _FireEtwMulticoreJitA(W("FILTERMETHOD-GENERIC"), pModule->GetSimpleName(), token, 0, 0);
-    }
-}
-
-
 class MulticoreJitPlayerModuleEnumerator : public MulticoreJitModuleEnumerator
 {
     MulticoreJitProfilePlayer * m_pPlayer;
@@ -847,94 +762,26 @@ bool MulticoreJitProfilePlayer::ShouldAbort(bool fast) const
     return false;
 }
 
-
-// Basic delay unit
-const int DelayUnit          = 1;     //  1 ms delay
-const int MissingModuleDelay = 10;    // 10 ms for each missing module
-
-
-// Wait for all the module loading and level requests to be fullfilled
-// This allows for longer delay based on number of mismatches, to reduce CPU usage
-
-// Return true blocking count is 0, false if aborted
-bool MulticoreJitProfilePlayer::GroupWaitForModuleLoad(int pos)
+HRESULT MulticoreJitProfilePlayer::HandleModuleInfoRecord(unsigned moduleTo, unsigned level)
 {
     STANDARD_VM_CONTRACT;
+    
+    HRESULT hr = S_OK;
 
-    MulticoreJitTrace(("Enter GroupWaitForModuleLoad(pos=%4d): %d modules loaded, blocking count=%d", pos, m_nLoadedModuleCount, m_nBlockingCount));
-
-    _FireEtwMulticoreJit(W("GROUPWAIT"), W("Enter"), m_nLoadedModuleCount, m_nBlockingCount, pos);
-
-    bool rslt = false;
-
-    // Ensure that we don't block in this particular case for longer than the block limit.
-    // This limit is smaller than the overall MULTICOREJITLIFE and ensures that we don't sit for the
-    // full player lifetime waiting for a module when the app behavior has changed.
-    DWORD currentModuleBlockStart = GetTickCount();
-
-    // Only allow module blocking to occur a certain number of times.
+    MulticoreJitTrace(("ModuleDependency(%u) start module load",
+        moduleTo));
 
-    while (! ShouldAbort(false))
+    if (moduleTo >= m_moduleCount)
     {
-        if (FAILED(UpdateModuleInfo()))
-        {
-            break;
-        }
-
-        if (m_nBlockingCount == 0)
-        {
-            rslt = true;
-            break;
-        }
-
-        if(GetTickCount() - currentModuleBlockStart > MULTICOREJITBLOCKLIMIT)
-        {
-            MulticoreJitTrace(("MulticoreJitProfilePlayer::GroupWaitForModuleLoad timeout exceeded."));
-            _FireEtwMulticoreJit(W("ABORTPLAYER"), W("GroupWaitForModuleLoad timeout exceeded."), 0, 0, 0);
-
-            break;
-        }
-
-        // Heuristic for reducing CPU usage: delay longer when there are more blocking modules
-        unsigned delay = min((m_nMissingModule * MissingModuleDelay + m_nBlockingCount) * DelayUnit, 50);
-
-        MulticoreJitTrace(("Delay: %d ms", delay));
-
-        if (ETW_TRACING_CATEGORY_ENABLED(MICROSOFT_WINDOWS_DOTNETRUNTIME_PRIVATE_PROVIDER_DOTNET_Context, TRACE_LEVEL_VERBOSE, CLR_PRIVATEMULTICOREJIT_KEYWORD))
-        {
-            _FireEtwMulticoreJit(W("GROUPWAIT"), W("Delay"), delay, 0, 0);
-        }
-
-        ClrSleepEx(delay, FALSE);
-
-        m_stats.m_nTotalDelay += (unsigned short) delay;
-        m_stats.m_nDelayCount ++;
+        m_stats.m_nMissingModuleSkip++;
+        hr = COR_E_BADIMAGEFORMAT;
     }
-
-    MulticoreJitTrace(("Leave GroupWaitForModuleLoad(pos=%4d): blocking count=%d (rslt=%d)", pos, m_nBlockingCount, rslt));
-
-    _FireEtwMulticoreJit(W("GROUPWAIT"), W("Leave"), m_nLoadedModuleCount, m_nBlockingCount, rslt);
-
-    return rslt;
-}
-
-
-bool MulticoreJitProfilePlayer::HandleModuleDependency(unsigned jitInfo)
-{
-    STANDARD_VM_CONTRACT;
-
-    // depends on moduleTo, which may not loaded yet
-
-    unsigned moduleTo = jitInfo & MODULE_MASK;
-
-    if (moduleTo < m_moduleCount)
+    else
     {
-        unsigned level = (jitInfo >> LEVEL_SHIFT) & LEVEL_MASK;
-
         PlayerModuleInfo & mod = m_pModules[moduleTo];
 
         // Load the module if necessary.
-        if (!mod.m_pModule)
+        if (!mod.IsModuleLoaded())
         {
             // Update loaded module status.
             AppDomain * pAppDomain = GetAppDomain();
@@ -961,24 +808,32 @@ bool MulticoreJitProfilePlayer::HandleModuleDependency(unsigned jitInfo)
                     if (mod.m_pModule == NULL)
                     {
                         // Unable to load the assembly, so abort.
-                        return false;
+                        m_stats.m_nMissingModuleSkip++;
+                        hr = E_ABORT;
                     }
                 }
                 else
                 {
                     // Unable to load the assembly, so abort.
-                    return false;
+                    m_stats.m_nMissingModuleSkip++;
+                    hr = E_ABORT;
                 }
             }
         }
 
-        if (mod.UpdateNeedLevel((FileLoadLevel) level))
+        if ((SUCCEEDED(hr)) && mod.UpdateNeedLevel((FileLoadLevel) level))
         {
-            m_nBlockingCount ++;
+            m_nBlockingCount++;
         }
     }
 
-    return true;
+    MulticoreJitTrace(("ModuleDependency(%d) end module load, hr=%x",
+        moduleTo,
+        hr));
+
+    TraceSummary();
+
+    return hr;
 }
 
 DomainAssembly * MulticoreJitProfilePlayer::LoadAssembly(SString & assemblyName)
@@ -1010,157 +865,72 @@ DomainAssembly * MulticoreJitProfilePlayer::LoadAssembly(SString & assemblyName)
         FALSE); // Don't throw on FileNotFound.
 }
 
-
-inline bool MethodJifInfo(unsigned inst)
-{
-    LIMITED_METHOD_CONTRACT;
-
-    return ((inst & MODULE_DEPENDENCY) == 0);
-}
-
-
-// Process a block of methodDef, call JIT if not blocked
-HRESULT MulticoreJitProfilePlayer::HandleMethodRecord(unsigned * buffer, int count)
+HRESULT MulticoreJitProfilePlayer::HandleNonGenericMethodInfoRecord(unsigned moduleIndex, unsigned token)
 {
     STANDARD_VM_CONTRACT;
 
     HRESULT hr = E_ABORT;
 
-    MulticoreJitTrace(("MethodRecord(%d) start %d methods, %d mod loaded", m_stats.m_nTotalMethod, count, m_nLoadedModuleCount));
-
-    MulticoreJitManager & manager = GetAppDomain()->GetMulticoreJitManager();
+    MulticoreJitTrace(("NonGeneric MethodRecord(%d) start method compilation, %d mod loaded", m_stats.m_nTotalMethod, m_nLoadedModuleCount));
 
-#ifdef MULTICOREJIT_LOGGING
-
-    MulticoreJitCodeStorage & curStorage = manager.GetMulticoreJitCodeStorage();
-
-    int lastCompiled = curStorage.GetStored();
-
-#endif
-
-    int pos = 0;
-
-    while (! ShouldAbort(true) && (pos < count))
+    if (moduleIndex >= m_moduleCount)
     {
-        unsigned jitInfo = buffer[pos]; // moduleIndex + methodIndex
-
-        unsigned moduleIndex = jitInfo >> 24;
+        m_stats.m_nMissingModuleSkip++;
+        hr = COR_E_BADIMAGEFORMAT;
+    }
+    else
+    {
+        PlayerModuleInfo & mod = m_pModules[moduleIndex];
+        m_stats.m_nTotalMethod++;
 
-        if (moduleIndex < m_moduleCount)
+        if (mod.IsModuleLoaded() && mod.m_enableJit)
         {
-            if (jitInfo & MODULE_DEPENDENCY) // Module depedency information
-            {
-                if (! HandleModuleDependency(jitInfo))
-                {
-                    goto Abort;
-                }
-            }
-            else
-            {
-                PlayerModuleInfo & info = m_pModules[moduleIndex];
+            Module * pModule = mod.m_pModule;
 
-                m_stats.m_nTotalMethod ++;
+            // Similar to Module::FindMethod + Module::FindMethodThrowing,
+            // except it calls GetMethodDescFromMemberDefOrRefOrSpec with strictMetadataChecks=FALSE to allow generic instantiation
+            MethodDesc * pMethod = MemberLoader::GetMethodDescFromMemberDefOrRefOrSpec(pModule, token, NULL, FALSE, FALSE);
 
-                // If module is disabled for Jitting, just skip method without even waiting
-                if (! info.m_enableJit)
-                {
-                    m_stats.m_nFilteredMethods ++;
-                }
-                else
-                {
-
-                    //  To reduce contention with foreground thread, walk backward within the group of methods Jittable methods, not broken apart by dependency
-                    {
-                        int run = 1; // size of the group
-
-                        while (((pos + run) < count) && MethodJifInfo(buffer[pos + run]))
-                        {
-                            run ++;
-
-                            // If walk-back run is too long, lots of methods in the front will be missed by background thread
-                            if (run > MAX_WALKBACK)
-                            {
-                                break;
-                            }
-                        }
-
-                        if (run > 1)
-                        {
-                            MulticoreJitTrace(("Jit backwards %d methods",  run));
-                        }
-
-                        // Walk backwards within the same group, may be from different modules
-                        for (int p = pos + run - 1; p >= pos; p --)
-                        {
-                            unsigned inst = buffer[p];
-
-                            _ASSERTE(MethodJifInfo(inst));
-
-                            PlayerModuleInfo & mod = m_pModules[inst >> 24];
-
-                            if (mod.IsModuleLoaded() && mod.m_enableJit)
-                            {
-                                JITMethod(mod.m_pModule, inst);
-                            }
-                            else
-                            {
-                                m_stats.m_nFilteredMethods ++;
-                            }
-                        }
-
-                        m_stats.m_nWalkBack    += (short) (run - 1);
-                        m_stats.m_nTotalMethod += (short) (run - 1);
-
-                        pos += run - 1; // Skip the group
-                    }
-                }
-            }
+            CompileMethodInfoRecord(pModule, pMethod, false);
         }
         else
         {
-            hr = COR_E_BADIMAGEFORMAT;
-            goto Abort;
+            m_stats.m_nFilteredMethods++;
         }
 
-        pos ++;
+        hr = S_OK;
     }
 
-    // Mark success
-    hr = S_OK;
-
-Abort:
-
-    m_stats.m_nMissingModuleSkip += (short) (count - pos);
-
-    MulticoreJitTrace(("MethodRecord(%d) end %d compiled, %d aborted / %d methods, hr=%x",
+    MulticoreJitTrace(("NonGeneric MethodRecord(%d) end method compilation, filtered %d methods, hr=%x",
         m_stats.m_nTotalMethod,
-        curStorage.GetStored() - lastCompiled,
-        count - pos, count, hr));
+        m_stats.m_nFilteredMethods,
+        hr));
 
     TraceSummary();
 
     return hr;
 }
 
-HRESULT MulticoreJitProfilePlayer::HandleGenericMethodRecord(unsigned moduleIndex, BYTE * signature, unsigned length)
+HRESULT MulticoreJitProfilePlayer::HandleGenericMethodInfoRecord(unsigned moduleIndex, BYTE * signature, unsigned length)
 {
     STANDARD_VM_CONTRACT;
 
     HRESULT hr = E_ABORT;
 
-    MulticoreJitTrace(("MethodRecord(%d) start a generic method, %d mod loaded", m_stats.m_nTotalMethod, m_nLoadedModuleCount));
+    MulticoreJitTrace(("Generic MethodRecord(%d) start method compilation, %d mod loaded", m_stats.m_nTotalMethod, m_nLoadedModuleCount));
 
     if (moduleIndex >= m_moduleCount)
     {
-        m_stats.m_nMissingModuleSkip += 1;
+        m_stats.m_nMissingModuleSkip++;
+        hr = COR_E_BADIMAGEFORMAT;
     }
     else
     {
         PlayerModuleInfo & mod = m_pModules[moduleIndex];
+        m_stats.m_nTotalMethod++;
+
         if (mod.IsModuleLoaded() && mod.m_enableJit)
         {
-            m_stats.m_nTotalMethod ++;
-
             Module * pModule = mod.m_pModule;
 
             SigTypeContext typeContext;   // empty type context
@@ -1175,27 +945,66 @@ HRESULT MulticoreJitProfilePlayer::HandleGenericMethodRecord(unsigned moduleInde
             }
             EX_END_CATCH(SwallowAllExceptions);
 
-            if (pMethod && !pMethod->IsDynamicMethod() && pMethod->HasILHeader() && pMethod->GetNativeCode() == NULL)
-            {
-                CompileMethodDesc(pModule, pMethod);
-            }
-            else
-            {
-                m_stats.m_nFilteredMethods ++;
-            }
+            CompileMethodInfoRecord(pModule, pMethod, true);
+        }
+        else
+        {
+            m_stats.m_nFilteredMethods++;
         }
 
         hr = S_OK;
     }
 
-    MulticoreJitTrace(("MethodRecord(%d) end a generic method compiled, hr=%x",
+    MulticoreJitTrace(("Generic MethodRecord(%d) end method compilation, filtered %d methods, hr=%x",
         m_stats.m_nTotalMethod,
+        m_stats.m_nFilteredMethods,
         hr));
 
     TraceSummary();
+
     return hr;
 }
 
+void MulticoreJitProfilePlayer::CompileMethodInfoRecord(Module *pModule, MethodDesc *pMethod, bool isGeneric)
+{
+    STANDARD_VM_CONTRACT;
+
+    if (pMethod != NULL && MulticoreJitManager::IsMethodSupported(pMethod))
+    {
+        if (!isGeneric)
+        {
+            // MethodDesc::FindOrCreateTypicalSharedInstantiation is expensive, avoid calling it unless the method or class has generic arguments
+            if (pMethod->HasClassOrMethodInstantiation())
+            {
+                pMethod = pMethod->FindOrCreateTypicalSharedInstantiation();
+
+                if (pMethod == NULL)
+                {
+                    m_stats.m_nFilteredMethods++;
+                    return;
+                }
+
+                pModule = pMethod->GetModule_NoLogging();
+            }
+        }
+
+        if (pMethod->GetNativeCode() == NULL)
+        {
+            if (CompileMethodDesc(pModule, pMethod))
+            {
+                return;
+            }
+        }
+        else
+        {
+            m_stats.m_nHasNativeCode++;
+            return;
+        }
+    }
+
+    m_stats.m_nFilteredMethods++;
+}
+
 void MulticoreJitProfilePlayer::TraceSummary()
 {
     LIMITED_METHOD_CONTRACT;
@@ -1266,7 +1075,7 @@ HRESULT MulticoreJitProfilePlayer::ReadCheckFile(const WCHAR * pFileName)
 
             MulticoreJitTrace(("HeaderRecord(version=%d, module=%d, method=%d)", header.version, m_headerModuleCount, header.methodCount));
 
-            if ((header.version != MULTICOREJIT_PROFILE_VERSION) || (header.moduleCount > MAX_MODULES) || (header.methodCount > MAX_METHOD_ARRAY) ||
+            if ((header.version != MULTICOREJIT_PROFILE_VERSION) || (header.moduleCount > MAX_MODULES) || (header.methodCount > MAX_METHODS) ||
                 (header.recordID != Pack8_24(MULTICOREJIT_HEADER_RECORD_ID, sizeof(HeaderRecord))))
             {
                 hr = COR_E_BADIMAGEFORMAT;
@@ -1349,53 +1158,175 @@ HRESULT MulticoreJitProfilePlayer::PlayProfile()
 
     while ((SUCCEEDED(hr)) && (nSize > sizeof(unsigned)))
     {
-        unsigned data   = * (const unsigned *) pBuffer;
-        unsigned rcdLen = data & 0xFFFFFF;
-        unsigned rcdTyp = data >> 24;
+        unsigned data1 = * (const unsigned *) pBuffer;
+        unsigned rcdTyp = data1 >> RECORD_TYPE_OFFSET;
+        unsigned rcdLen = 0;
+
+        if (rcdTyp == MULTICOREJIT_MODULE_RECORD_ID)
+        {
+            rcdLen = data1 & 0xFFFFFF;    
+        }
+        else if (rcdTyp == MULTICOREJIT_MODULEDEPENDENCY_RECORD_ID)
+        {
+            rcdLen = sizeof(unsigned);
+        }
+        else if (rcdTyp == MULTICOREJIT_METHOD_RECORD_ID)
+        {
+            rcdLen = 2 * sizeof(unsigned);
+        }
+        else if (rcdTyp == MULTICOREJIT_GENERICMETHOD_RECORD_ID)
+        {
+            if (nSize < sizeof(unsigned) + sizeof(unsigned short))
+            {
+                hr = COR_E_BADIMAGEFORMAT;
+                break;
+            }
+
+            unsigned signatureLength = * (const unsigned short *) (((const unsigned *) pBuffer) + 1);
+            DWORD dataSize = signatureLength + sizeof(DWORD) + sizeof(unsigned short);
+            dataSize = AlignUp(dataSize, sizeof(DWORD));
+            rcdLen = dataSize;
+        }
+        else
+        {
+            hr = COR_E_BADIMAGEFORMAT;
+            break;
+        }
 
         if ((rcdLen > nSize) || (rcdLen & 3)) // Better DWORD align
         {
             hr = COR_E_BADIMAGEFORMAT;
+            break;
         }
-        else
+
+        if (rcdTyp == MULTICOREJIT_MODULE_RECORD_ID)
+        {
+            const ModuleRecord * pRec = (const ModuleRecord * ) pBuffer;
+
+            if (((unsigned)(pRec->lenModuleName
+                + pRec->lenAssemblyName
+                ) > (rcdLen - sizeof(ModuleRecord))) ||
+                (m_moduleCount >= m_headerModuleCount))
+            {
+                hr = COR_E_BADIMAGEFORMAT;
+            }
+            else
+            {
+                hr = HandleModuleRecord(pRec);
+            }
+        }
+        else if (rcdTyp == MULTICOREJIT_MODULEDEPENDENCY_RECORD_ID)
+        {
+            unsigned moduleIndex = data1 & MODULE_MASK;
+            unsigned level = (data1 >> MODULE_LEVEL_OFFSET) & (MAX_MODULE_LEVELS - 1);
+
+            hr = HandleModuleInfoRecord(moduleIndex, level);
+        }
+        else if (rcdTyp == MULTICOREJIT_METHOD_RECORD_ID || rcdTyp == MULTICOREJIT_GENERICMETHOD_RECORD_ID)
         {
-            if (rcdTyp == MULTICOREJIT_MODULE_RECORD_ID)
+            // Find all subsequent methods and jit/load them reversed
+            bool isMethod = true;
+            bool isGenericMethod = rcdTyp == MULTICOREJIT_GENERICMETHOD_RECORD_ID;
+            const BYTE * pCurBuf = pBuffer;
+            unsigned curSize = nSize;
+
+            unsigned sizes[MAX_WALKBACK] = {0};
+            int count = 0;
+
+            do
             {
-                const ModuleRecord * pRec = (const ModuleRecord * ) pBuffer;
+                unsigned currcdLen = 0;
 
-                if (((unsigned)(pRec->lenModuleName
-                    + pRec->lenAssemblyName
-                    ) > (rcdLen - sizeof(ModuleRecord))) ||
-                    (m_moduleCount >= m_headerModuleCount))
+                if (isGenericMethod)
                 {
-                    hr = COR_E_BADIMAGEFORMAT;
+                    unsigned cursignatureLength = * (const unsigned short *) (((const unsigned *) pCurBuf) + 1);
+                    DWORD dataSize = cursignatureLength + sizeof(DWORD) + sizeof(unsigned short);
+                    dataSize = AlignUp(dataSize, sizeof(DWORD));
+                    currcdLen = dataSize;
                 }
                 else
                 {
-                    hr = HandleModuleRecord(pRec);
+                    currcdLen = 2 * sizeof(unsigned);
                 }
-            }
-            else if (rcdTyp == MULTICOREJIT_JITINF_RECORD_ID)
-            {
-                int mCount = (rcdLen - sizeof(unsigned)) / sizeof(unsigned);
 
-                hr = HandleMethodRecord((unsigned *) (pBuffer + sizeof(unsigned)), mCount);
+                _ASSERTE(currcdLen > 0);
+
+                if (currcdLen > curSize)
+                {
+                    hr = COR_E_BADIMAGEFORMAT;
+                    break;
+                }
+
+                sizes[count] = currcdLen;
+                count++;
+
+                pCurBuf += currcdLen;
+                curSize -= currcdLen;
+
+                if (curSize == 0)
+                {
+                    break;
+                }
+
+                unsigned curdata1 = * (const unsigned *) pCurBuf;
+                unsigned currcdTyp = curdata1 >> RECORD_TYPE_OFFSET;
+                isGenericMethod = currcdTyp == MULTICOREJIT_GENERICMETHOD_RECORD_ID;
+                isMethod = currcdTyp == MULTICOREJIT_METHOD_RECORD_ID || isGenericMethod;
             }
-            else if (rcdTyp == MULTICOREJIT_GENERICINF_RECORD_ID)
+            while (isMethod && count < MAX_WALKBACK);
+
+            if (SUCCEEDED(hr))
             {
-                unsigned info = *(unsigned *)(pBuffer + sizeof(unsigned));
-                unsigned moduleIndex = info >> 24;
-                unsigned signatureLength = info & SIGNATURELENGTH_MASK;
-                hr = HandleGenericMethodRecord(moduleIndex, (BYTE *) (pBuffer + sizeof(unsigned) * 2), signatureLength);
+                _ASSERTE(count > 0);
+                if (count > 1)
+                {
+                    MulticoreJitTrace(("Jit backwards %d methods",  count));
+                }
             }
-            else
+
+            int i = count - 1;
+            for (; (SUCCEEDED(hr)) && i >= 0; --i)
             {
-                hr = COR_E_BADIMAGEFORMAT;
+                pCurBuf -= sizes[i];
+
+                unsigned curdata1 = * (const unsigned *) pCurBuf;
+                unsigned currcdTyp = curdata1 >> RECORD_TYPE_OFFSET;
+                unsigned curmoduleIndex = curdata1 & MODULE_MASK;
+                unsigned curflags = curdata1 & METHOD_FLAGS_MASK;
+
+                if (currcdTyp == MULTICOREJIT_METHOD_RECORD_ID)
+                {
+                    unsigned token = * (((const unsigned *) pCurBuf) + 1);
+
+                    hr = HandleNonGenericMethodInfoRecord(curmoduleIndex, token);
+                }
+                else
+                {
+                    _ASSERTE(currcdTyp == MULTICOREJIT_GENERICMETHOD_RECORD_ID);
+
+                    unsigned cursignatureLength = * (const unsigned short *) (((const unsigned *) pCurBuf) + 1);
+
+                    hr = HandleGenericMethodInfoRecord(curmoduleIndex, (BYTE *) (pCurBuf + sizeof(unsigned) + sizeof(unsigned short)), cursignatureLength);
+                }
+
+                if (SUCCEEDED(hr) && ShouldAbort(false))
+                {
+                    hr = E_ABORT;
+                }
             }
 
-            pBuffer += rcdLen;
-            nSize -= rcdLen;
+            m_stats.m_nWalkBack += (short) count;
+            m_stats.m_nFilteredMethods += (short) (i + 1);
+
+            rcdLen = nSize - curSize;
+        }
+        else
+        {
+            hr = COR_E_BADIMAGEFORMAT;
         }
+        
+        pBuffer += rcdLen;
+        nSize -= rcdLen;
 
         if (SUCCEEDED(hr) && ShouldAbort(false))
         {
@@ -1553,4 +1484,3 @@ Module * MulticoreJitProfilePlayer::GetModuleFromIndex(DWORD ix) const
     }
     return NULL;
 }
-