Enable tiered jitting for R2R methods (#15967)
authorKoundinya Veluri <kouvel@users.noreply.github.com>
Thu, 25 Jan 2018 20:01:32 +0000 (12:01 -0800)
committerGitHub <noreply@github.com>
Thu, 25 Jan 2018 20:01:32 +0000 (12:01 -0800)
Enable tiered jitting for R2R methods

- Included R2R methods and generics over value types in CoreLib for tiered jitting. Tier 0 for R2R methods is the precompiled code if available, and tier 1 is selectively scheduled based on call counting.
- Added a delay before starting to count calls for tier 1 promotion. The delay is a short duration after frequent tier 0 jitting stops (current heuristic for identifying startup).
- Startup time and steady-state performance have improved on JitBench. There is a regression shortly following startup due to call counting and tier 1 jitting, for a short duration before steady-state performance stabilizes.
- Added two new config values, one for configuring the call count threshold for promoting to tier 1, and another for specifying the delay from the last tier 0 JIT invocation before starting to count calls

17 files changed:
src/inc/clrconfigvalues.h
src/vm/arm/cgencpu.h
src/vm/arm64/cgencpu.h
src/vm/callcounter.cpp
src/vm/callcounter.h
src/vm/ceemain.cpp
src/vm/codeversion.cpp
src/vm/eeconfig.cpp
src/vm/eeconfig.h
src/vm/i386/stublinkerx86.cpp
src/vm/i386/stublinkerx86.h
src/vm/methodtablebuilder.cpp
src/vm/precode.cpp
src/vm/precode.h
src/vm/prestub.cpp
src/vm/tieredcompilation.cpp
src/vm/tieredcompilation.h

index a0b205a..95179d8 100644 (file)
@@ -649,6 +649,8 @@ RETAIL_CONFIG_DWORD_INFO(INTERNAL_HillClimbing_GainExponent,
 ///
 #ifdef FEATURE_TIERED_COMPILATION
 RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_TieredCompilation, W("EXPERIMENTAL_TieredCompilation"), 0, "Enables tiered compilation")
+RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_TieredCompilation_Tier1CallCountThreshold, W("TieredCompilation_Tier1CallCountThreshold"), 30, "Number of times a method must be called after which it is promoted to tier 1.")
+RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_TieredCompilation_Tier1CallCountingDelayMs, W("TieredCompilation_Tier1CallCountingDelayMs"), 100, "Delay in milliseconds since process startup or the last tier 0 JIT before call counting begins for tier 1 promotion.")
 #endif
 
 
index 8da2b2b..3997dbf 100644 (file)
@@ -29,6 +29,8 @@ class BaseDomain;
 class ZapNode;
 struct ArgLocDesc;
 
+extern PCODE GetPreStubEntryPoint();
+
 #define USE_REDIRECT_FOR_GCSTRESS
 
 // CPU-dependent functions
@@ -1113,6 +1115,19 @@ struct StubPrecode {
         return m_pTarget;
     }
 
+    void ResetTargetInterlocked()
+    {
+        CONTRACTL
+        {
+            THROWS;
+            GC_TRIGGERS;
+        }
+        CONTRACTL_END;
+
+        EnsureWritableExecutablePages(&m_pTarget);
+        InterlockedExchange((LONG*)&m_pTarget, (LONG)GetPreStubEntryPoint());
+    }
+
     BOOL SetTargetInterlocked(TADDR target, TADDR expected)
     {
         CONTRACTL
@@ -1206,6 +1221,19 @@ struct FixupPrecode {
         return m_pTarget;
     }
 
+    void ResetTargetInterlocked()
+    {
+        CONTRACTL
+        {
+            THROWS;
+            GC_TRIGGERS;
+        }
+        CONTRACTL_END;
+
+        EnsureWritableExecutablePages(&m_pTarget);
+        InterlockedExchange((LONG*)&m_pTarget, (LONG)GetEEFuncEntryPoint(PrecodeFixupThunk));
+    }
+
     BOOL SetTargetInterlocked(TADDR target, TADDR expected)
     {
         CONTRACTL
index a168cdc..7e3d620 100644 (file)
@@ -24,6 +24,7 @@ EXTERN_C void setFPReturn(int fpSize, INT64 retVal);
 
 class ComCallMethodDesc;
 
+extern PCODE GetPreStubEntryPoint();
 
 #define COMMETHOD_PREPAD                        24   // # extra bytes to allocate in addition to sizeof(ComCallMethodDesc)
 #ifdef FEATURE_COMINTEROP
@@ -572,6 +573,19 @@ struct StubPrecode {
         return m_pTarget;
     }
 
+    void ResetTargetInterlocked()
+    {
+        CONTRACTL
+        {
+            THROWS;
+            GC_TRIGGERS;
+        }
+        CONTRACTL_END;
+
+        EnsureWritableExecutablePages(&m_pTarget);
+        InterlockedExchange64((LONGLONG*)&m_pTarget, (TADDR)GetPreStubEntryPoint());
+    }
+
     BOOL SetTargetInterlocked(TADDR target, TADDR expected)
     {
         CONTRACTL
@@ -685,6 +699,19 @@ struct FixupPrecode {
         return m_pTarget;
     }
 
+    void ResetTargetInterlocked()
+    {
+        CONTRACTL
+        {
+            THROWS;
+            GC_TRIGGERS;
+        }
+        CONTRACTL_END;
+
+        EnsureWritableExecutablePages(&m_pTarget);
+        InterlockedExchange64((LONGLONG*)&m_pTarget, (TADDR)GetEEFuncEntryPoint(PrecodeFixupThunk));
+    }
+
     BOOL SetTargetInterlocked(TADDR target, TADDR expected)
     {
         CONTRACTL
index 14d9e6e..641b611 100644 (file)
@@ -32,11 +32,18 @@ CallCounter::CallCounter()
 // Returns TRUE if no future invocations are needed (we reached the count we cared about)
 // and FALSE otherwise. It is permissible to keep calling even when TRUE was previously
 // returned and multi-threaded race conditions will surely cause this to occur.
-BOOL CallCounter::OnMethodCalled(MethodDesc* pMethodDesc)
+void CallCounter::OnMethodCalled(
+    MethodDesc* pMethodDesc,
+    TieredCompilationManager *pTieredCompilationManager,
+    BOOL* shouldStopCountingCallsRef,
+    BOOL* wasPromotedToTier1Ref)
 {
     STANDARD_VM_CONTRACT;
 
     _ASSERTE(pMethodDesc->IsEligibleForTieredCompilation());
+    _ASSERTE(pTieredCompilationManager != nullptr);
+    _ASSERTE(shouldStopCountingCallsRef != nullptr);
+    _ASSERTE(wasPromotedToTier1Ref != nullptr);
 
     // PERF: This as a simple to implement, but not so performant, call counter
     // Currently this is only called until we reach a fixed call count and then
@@ -75,7 +82,7 @@ BOOL CallCounter::OnMethodCalled(MethodDesc* pMethodDesc)
         }
     }
 
-    return GetAppDomain()->GetTieredCompilationManager()->OnMethodCalled(pMethodDesc, callCount);
+    pTieredCompilationManager->OnMethodCalled(pMethodDesc, callCount, shouldStopCountingCallsRef, wasPromotedToTier1Ref);
 }
 
 #endif // FEATURE_TIERED_COMPILATION
index ed98ccb..4e9a5d3 100644 (file)
@@ -70,7 +70,7 @@ public:
     CallCounter();
 #endif
 
-    BOOL OnMethodCalled(MethodDesc* pMethodDesc);
+    void OnMethodCalled(MethodDesc* pMethodDesc, TieredCompilationManager *pTieredCompilationManager, BOOL* shouldStopCountingCallsRef, BOOL* wasPromotedToTier1Ref);
 
 private:
 
index 3f6492b..e9b914e 100644 (file)
@@ -1102,7 +1102,16 @@ void EEStartupHelper(COINITIEE fFlags)
         hr = S_OK;
         STRESS_LOG0(LF_STARTUP, LL_ALWAYS, "===================EEStartup Completed===================");
 
-#if defined(_DEBUG) && !defined(CROSSGEN_COMPILE)
+#ifndef CROSSGEN_COMPILE
+
+#ifdef FEATURE_TIERED_COMPILATION
+        if (g_pConfig->TieredCompilation())
+        {
+            SystemDomain::System()->DefaultDomain()->GetTieredCompilationManager()->InitiateTier1CountingDelay();
+        }
+#endif
+
+#ifdef _DEBUG
 
         //if g_fEEStarted was false when we loaded the System Module, we did not run ExpandAll on it.  In
         //this case, make sure we run ExpandAll here.  The rationale is that if we Jit before g_fEEStarted
@@ -1120,7 +1129,9 @@ void EEStartupHelper(COINITIEE fFlags)
         // Perform mscorlib consistency check if requested
         g_Mscorlib.CheckExtended();
 
-#endif // _DEBUG && !CROSSGEN_COMPILE
+#endif // _DEBUG
+
+#endif // !CROSSGEN_COMPILE
 
 ErrExit: ;
     }
index 10d3013..da808e8 100644 (file)
@@ -2177,12 +2177,14 @@ PCODE CodeVersionManager::PublishVersionableCodeIfNecessary(MethodDesc* pMethodD
                 // attempt to publish the active version still under the lock
                 if (FAILED(hr = PublishNativeCodeVersion(pMethodDesc, activeVersion, fEESuspend)))
                 {
-                    // if we need an EESuspend to publish then start over. We have to leave the lock in order to suspend,
-                    // and when we leave the lock the active version might change again. However now we know that suspend
+                    // If we need an EESuspend to publish then start over. We have to leave the lock in order to suspend,
+                    // and when we leave the lock the active version might change again. However now we know that suspend is
+                    // necessary.
                     if (hr == CORPROF_E_RUNTIME_SUSPEND_REQUIRED)
                     {
                         _ASSERTE(!fEESuspend);
                         fEESuspend = true;
+                        continue; // skip RestartEE() below since SuspendEE() has not been called yet
                     }
                     else
                     {
@@ -2215,6 +2217,8 @@ PCODE CodeVersionManager::PublishVersionableCodeIfNecessary(MethodDesc* pMethodD
 
 HRESULT CodeVersionManager::PublishNativeCodeVersion(MethodDesc* pMethod, NativeCodeVersion nativeCodeVersion, BOOL fEESuspended)
 {
+    // TODO: This function needs to make sure it does not change the precode's target if call counting is in progress. Track
+    // whether call counting is currently being done for the method, and use a lock to ensure the expected precode target.
     LIMITED_METHOD_CONTRACT;
     _ASSERTE(LockOwnedByCurrentThread());
     _ASSERTE(pMethod->IsVersionable());
@@ -2236,7 +2240,12 @@ HRESULT CodeVersionManager::PublishNativeCodeVersion(MethodDesc* pMethod, Native
         {
             EX_TRY
             {
-                hr = pPrecode->SetTargetInterlocked(pCode, FALSE) ? S_OK : E_FAIL;
+                pPrecode->SetTargetInterlocked(pCode, FALSE);
+
+                // SetTargetInterlocked() would return false if it lost the race with another thread. That is fine, this thread
+                // can continue assuming it was successful, similarly to it successfully updating the target and another thread
+                // updating the target again shortly afterwards.
+                hr = S_OK;
             }
             EX_CATCH_HRESULT(hr);
             return hr;
index 95a7133..ab83463 100644 (file)
@@ -376,6 +376,8 @@ HRESULT EEConfig::Init()
 
 #if defined(FEATURE_TIERED_COMPILATION)
     fTieredCompilation = false;
+    tieredCompilation_tier1CallCountThreshold = 1;
+    tieredCompilation_tier1CallCountingDelayMs = 0;
 #endif
     
 #if defined(FEATURE_GDBJIT) && defined(_DEBUG)
@@ -1239,6 +1241,14 @@ HRESULT EEConfig::sync()
 
 #if defined(FEATURE_TIERED_COMPILATION)
     fTieredCompilation = CLRConfig::GetConfigValue(CLRConfig::UNSUPPORTED_TieredCompilation) != 0;
+    tieredCompilation_tier1CallCountThreshold =
+        CLRConfig::GetConfigValue(CLRConfig::UNSUPPORTED_TieredCompilation_Tier1CallCountThreshold);
+    if (tieredCompilation_tier1CallCountThreshold < 1)
+    {
+        tieredCompilation_tier1CallCountThreshold = 1;
+    }
+    tieredCompilation_tier1CallCountingDelayMs =
+        CLRConfig::GetConfigValue(CLRConfig::UNSUPPORTED_TieredCompilation_Tier1CallCountingDelayMs);
 #endif
 
 #if defined(FEATURE_GDBJIT) && defined(_DEBUG)
index 9bc0073..5c88f42 100644 (file)
@@ -285,6 +285,8 @@ public:
     // Tiered Compilation config
 #if defined(FEATURE_TIERED_COMPILATION)
     bool          TieredCompilation(void)           const {LIMITED_METHOD_CONTRACT;  return fTieredCompilation; }
+    DWORD         TieredCompilation_Tier1CallCountThreshold() const { LIMITED_METHOD_CONTRACT; return tieredCompilation_tier1CallCountThreshold; }
+    DWORD         TieredCompilation_Tier1CallCountingDelayMs() const { LIMITED_METHOD_CONTRACT; return tieredCompilation_tier1CallCountingDelayMs; }
 #endif
 
 #if defined(FEATURE_GDBJIT) && defined(_DEBUG)
@@ -1107,6 +1109,8 @@ private: //----------------------------------------------------------------
 
 #if defined(FEATURE_TIERED_COMPILATION)
     bool fTieredCompilation;
+    DWORD tieredCompilation_tier1CallCountThreshold;
+    DWORD tieredCompilation_tier1CallCountingDelayMs;
 #endif
 
 #if defined(FEATURE_GDBJIT) && defined(_DEBUG)
index 14b9701..a11c6a3 100644 (file)
@@ -6416,6 +6416,21 @@ void FixupPrecode::EnumMemoryRegions(CLRDataEnumMemoryFlags flags)
 
 #ifndef DACCESS_COMPILE
 
+void rel32SetInterlocked(/*PINT32*/ PVOID pRel32, TADDR target, MethodDesc* pMD)
+{
+    CONTRACTL
+    {
+        THROWS;         // Creating a JumpStub could throw OutOfMemory
+        GC_TRIGGERS;
+    }
+    CONTRACTL_END;
+
+    INT32 targetRel32 = rel32UsingJumpStub((INT32*)pRel32, target, pMD);
+
+    _ASSERTE(IS_ALIGNED(pRel32, sizeof(INT32)));
+    FastInterlockExchange((LONG*)pRel32, (LONG)targetRel32);
+}
+
 BOOL rel32SetInterlocked(/*PINT32*/ PVOID pRel32, TADDR target, TADDR expected, MethodDesc* pMD)
 {
     CONTRACTL
@@ -6535,6 +6550,33 @@ void FixupPrecode::Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator, int
     }
 }
 
+void FixupPrecode::ResetTargetInterlocked()
+{
+    CONTRACTL
+    {
+        THROWS;         // Creating a JumpStub could throw OutOfMemory
+        GC_NOTRIGGER;
+    }
+    CONTRACTL_END;
+
+    FixupPrecode newValue = *this;
+    newValue.m_op = X86_INSTR_CALL_REL32; // call PrecodeFixupThunk
+    newValue.m_type = FixupPrecode::TypePrestub;
+
+    PCODE target = (PCODE)GetEEFuncEntryPoint(PrecodeFixupThunk);
+    MethodDesc* pMD = (MethodDesc*)GetMethodDesc();
+    newValue.m_rel32 =
+#ifdef FIXUP_PRECODE_PREALLOCATE_DYNAMIC_METHOD_JUMP_STUBS
+        pMD->IsLCGMethod() ?
+            rel32UsingPreallocatedJumpStub(&m_rel32, target, GetDynamicMethodEntryJumpStub()) :
+#endif // FIXUP_PRECODE_PREALLOCATE_DYNAMIC_METHOD_JUMP_STUBS
+            rel32UsingJumpStub(&m_rel32, target, pMD);
+
+    _ASSERTE(IS_ALIGNED(this, sizeof(INT64)));
+    EnsureWritableExecutablePages(this, sizeof(INT64));
+    FastInterlockExchangeLong((INT64*)this, *(INT64*)&newValue);
+}
+
 BOOL FixupPrecode::SetTargetInterlocked(TADDR target, TADDR expected)
 {
     CONTRACTL
index 50dc3b3..229ab07 100644 (file)
@@ -10,6 +10,8 @@
 struct ArrayOpScript;
 class MetaSig;
 
+extern PCODE GetPreStubEntryPoint();
+
 //=======================================================================
 
 #define X86_INSTR_CALL_REL32    0xE8        // call rel32
@@ -454,6 +456,7 @@ inline TADDR rel32Decode(/*PTR_INT32*/ TADDR pRel32)
     return pRel32 + 4 + *PTR_INT32(pRel32);
 }
 
+void rel32SetInterlocked(/*PINT32*/ PVOID pRel32, TADDR target, MethodDesc* pMD);
 BOOL rel32SetInterlocked(/*PINT32*/ PVOID pRel32, TADDR target, TADDR expected, MethodDesc* pMD);
 
 //------------------------------------------------------------------------
@@ -533,6 +536,19 @@ struct StubPrecode {
         return rel32Decode(PTR_HOST_MEMBER_TADDR(StubPrecode, this, m_rel32));
     }
 
+    void ResetTargetInterlocked()
+    {
+        CONTRACTL
+        {
+            THROWS;
+            GC_TRIGGERS;
+        }
+        CONTRACTL_END;
+
+        EnsureWritableExecutablePages(&m_rel32);
+        return rel32SetInterlocked(&m_rel32, GetPreStubEntryPoint(), (MethodDesc*)GetMethodDesc());
+    }
+
     BOOL SetTargetInterlocked(TADDR target, TADDR expected)
     {
         CONTRACTL
@@ -714,6 +730,7 @@ struct FixupPrecode {
         return rel32Decode(PTR_HOST_MEMBER_TADDR(FixupPrecode, this, m_rel32));
     }
 
+    void ResetTargetInterlocked();
     BOOL SetTargetInterlocked(TADDR target, TADDR expected);
 
     static BOOL IsFixupPrecodeByASM(TADDR addr)
index 83116f9..4307160 100644 (file)
@@ -6983,7 +6983,6 @@ MethodTableBuilder::NeedsNativeCodeSlot(bmtMDMethod * pMDMethod)
 #ifdef FEATURE_TIERED_COMPILATION
     // Keep in-sync with MethodDesc::IsEligibleForTieredCompilation()
     if (g_pConfig->TieredCompilation() &&
-        !GetModule()->HasNativeOrReadyToRunImage() &&
         (pMDMethod->GetMethodType() == METHOD_TYPE_NORMAL || pMDMethod->GetMethodType() == METHOD_TYPE_INSTANTIATED))
     {
         return TRUE;
index 8891d5a..103fc03 100644 (file)
@@ -425,6 +425,29 @@ void Precode::Init(PrecodeType t, MethodDesc* pMD, LoaderAllocator *pLoaderAlloc
     _ASSERTE(IsValidType(GetType()));
 }
 
+void Precode::ResetTargetInterlocked()
+{
+    WRAPPER_NO_CONTRACT;
+
+    PrecodeType precodeType = GetType();
+    switch (precodeType)
+    {
+        case PRECODE_STUB:
+            AsStubPrecode()->ResetTargetInterlocked();
+            break;
+
+#ifdef HAS_FIXUP_PRECODE
+        case PRECODE_FIXUP:
+            AsFixupPrecode()->ResetTargetInterlocked();
+            break;
+#endif // HAS_FIXUP_PRECODE
+
+        default:
+            UnexpectedPrecodeType("Precode::ResetTargetInterlocked", precodeType);
+            break;
+    }
+}
+
 BOOL Precode::SetTargetInterlocked(PCODE target, BOOL fOnlyRedirectFromPrestub)
 {
     WRAPPER_NO_CONTRACT;
index 8947192..1a61253 100644 (file)
@@ -261,6 +261,7 @@ public:
     void Init(PrecodeType t, MethodDesc* pMD, LoaderAllocator *pLoaderAllocator);
 
 #ifndef DACCESS_COMPILE
+    void ResetTargetInterlocked();
     BOOL SetTargetInterlocked(PCODE target, BOOL fOnlyRedirectFromPrestub = TRUE);
 
     // Reset precode to point to prestub
index 8934f25..cd85713 100644 (file)
@@ -730,6 +730,13 @@ PCODE MethodDesc::JitCompileCodeLockedEventWrapper(PrepareCodeConfig* pConfig, J
 
     }
 
+#ifdef FEATURE_TIERED_COMPILATION
+    if (g_pConfig->TieredCompilation() && !flags.IsSet(CORJIT_FLAGS::CORJIT_FLAG_TIER1))
+    {
+        GetAppDomain()->GetTieredCompilationManager()->OnTier0JitInvoked();
+    }
+#endif // FEATURE_TIERED_COMPILATION
+
 #ifdef FEATURE_STACK_SAMPLING
     StackSampler::RecordJittingInfo(this, flags);
 #endif // FEATURE_STACK_SAMPLING
@@ -1699,11 +1706,14 @@ PCODE MethodDesc::DoPrestub(MethodTable *pDispatchingMT)
     // for this method only then do we back-patch it.
     BOOL fCanBackpatchPrestub = TRUE;
 #ifdef FEATURE_TIERED_COMPILATION
+    TieredCompilationManager* pTieredCompilationManager = nullptr;
     BOOL fEligibleForTieredCompilation = IsEligibleForTieredCompilation();
+    BOOL fWasPromotedToTier1 = FALSE;
     if (fEligibleForTieredCompilation)
     {
+        pTieredCompilationManager = GetAppDomain()->GetTieredCompilationManager();
         CallCounter * pCallCounter = GetCallCounter();
-        fCanBackpatchPrestub = pCallCounter->OnMethodCalled(this);
+        pCallCounter->OnMethodCalled(this, pTieredCompilationManager, &fCanBackpatchPrestub, &fWasPromotedToTier1);
     }
 #endif
 
@@ -1715,6 +1725,12 @@ PCODE MethodDesc::DoPrestub(MethodTable *pDispatchingMT)
         (!fIsPointingToPrestub && IsVersionableWithJumpStamp()))
     {
         pCode = GetCodeVersionManager()->PublishVersionableCodeIfNecessary(this, fCanBackpatchPrestub);
+
+        if (pTieredCompilationManager != nullptr && fCanBackpatchPrestub && pCode != NULL && !fWasPromotedToTier1)
+        {
+            pTieredCompilationManager->OnMethodCallCountingStoppedWithoutTier1Promotion(this);
+        }
+
         fIsPointingToPrestub = IsPointingToPrestub();
     }
 #endif
@@ -1733,10 +1749,10 @@ PCODE MethodDesc::DoPrestub(MethodTable *pDispatchingMT)
     
     if (pCode)
     {
-        // The only reason we are still pointing to prestub is because the call counter
-        // prevented it. We should still short circuit and return the code without
+        // The only reasons we are still pointing to prestub is because the call counter
+        // prevented it or this thread lost the race with another thread in updating the
+        // entry point. We should still short circuit and return the code without
         // backpatching.
-        _ASSERTE(!fCanBackpatchPrestub);
         RETURN pCode;
     }
     
index 48c6670..f89f4f2 100644 (file)
 TieredCompilationManager::TieredCompilationManager() :
     m_isAppDomainShuttingDown(FALSE),
     m_countOptimizationThreadsRunning(0),
-    m_callCountOptimizationThreshhold(30),
-    m_optimizationQuantumMs(50)
+    m_callCountOptimizationThreshhold(1),
+    m_optimizationQuantumMs(50),
+    m_methodsPendingCountingForTier1(nullptr),
+    m_tier1CountingDelayTimerHandle(nullptr),
+    m_wasTier0JitInvokedSinceCountingDelayReset(false)
 {
     LIMITED_METHOD_CONTRACT;
     m_lock.Init(LOCK_TYPE_DEFAULT);
+
+    // On Unix, we can reach here before EEConfig is initialized, so defer config-based initialization to Init()
 }
 
 // Called at AppDomain Init
@@ -102,29 +107,115 @@ void TieredCompilationManager::Init(ADID appDomainId)
 
     SpinLockHolder holder(&m_lock);
     m_domainId = appDomainId;
+    m_callCountOptimizationThreshhold = g_pConfig->TieredCompilation_Tier1CallCountThreshold();
     m_asyncWorkDoneEvent.CreateManualEventNoThrow(TRUE);
 }
 
+void TieredCompilationManager::InitiateTier1CountingDelay()
+{
+    WRAPPER_NO_CONTRACT;
+    _ASSERTE(g_pConfig->TieredCompilation());
+    _ASSERTE(m_methodsPendingCountingForTier1 == nullptr);
+    _ASSERTE(m_tier1CountingDelayTimerHandle == nullptr);
+
+    DWORD delayMs = g_pConfig->TieredCompilation_Tier1CallCountingDelayMs();
+    if (delayMs == 0)
+    {
+        return;
+    }
+
+    m_tier1CountingDelayLock.Init(LOCK_TYPE_DEFAULT);
+
+    NewHolder<SArray<MethodDesc*>> methodsPendingCountingHolder = new(nothrow) SArray<MethodDesc*>();
+    if (methodsPendingCountingHolder == nullptr)
+    {
+        return;
+    }
+
+    NewHolder<ThreadpoolMgr::TimerInfoContext> timerContextHolder = new(nothrow) ThreadpoolMgr::TimerInfoContext();
+    if (timerContextHolder == nullptr)
+    {
+        return;
+    }
+
+    timerContextHolder->AppDomainId = m_domainId;
+    timerContextHolder->TimerId = 0;
+    if (!ThreadpoolMgr::CreateTimerQueueTimer(
+            &m_tier1CountingDelayTimerHandle,
+            Tier1DelayTimerCallback,
+            timerContextHolder,
+            delayMs,
+            (DWORD)-1 /* Period, non-repeating */,
+            0 /* flags */))
+    {
+        _ASSERTE(m_tier1CountingDelayTimerHandle == nullptr);
+        return;
+    }
+
+    m_methodsPendingCountingForTier1 = methodsPendingCountingHolder.Extract();
+    timerContextHolder.SuppressRelease(); // the timer context is automatically deleted by the timer infrastructure
+}
+
+void TieredCompilationManager::OnTier0JitInvoked()
+{
+    LIMITED_METHOD_CONTRACT;
+
+    if (m_methodsPendingCountingForTier1 != nullptr)
+    {
+        m_wasTier0JitInvokedSinceCountingDelayReset = true;
+    }
+}
+
 // Called each time code in this AppDomain has been run. This is our sole entrypoint to begin
 // tiered compilation for now. Returns TRUE if no more notifications are necessary, but
 // more notifications may come anyways.
 //
 // currentCallCount is pre-incremented, that is to say the value is 1 on first call for a given
 //      method.
-BOOL TieredCompilationManager::OnMethodCalled(MethodDesc* pMethodDesc, DWORD currentCallCount)
+void TieredCompilationManager::OnMethodCalled(
+    MethodDesc* pMethodDesc,
+    DWORD currentCallCount,
+    BOOL* shouldStopCountingCallsRef,
+    BOOL* wasPromotedToTier1Ref)
 {
-    STANDARD_VM_CONTRACT;
+    WRAPPER_NO_CONTRACT;
+    _ASSERTE(pMethodDesc->IsEligibleForTieredCompilation());
+    _ASSERTE(shouldStopCountingCallsRef != nullptr);
+    _ASSERTE(wasPromotedToTier1Ref != nullptr);
+
+    *shouldStopCountingCallsRef =
+        m_methodsPendingCountingForTier1 != nullptr || currentCallCount >= m_callCountOptimizationThreshhold;
+    *wasPromotedToTier1Ref = currentCallCount >= m_callCountOptimizationThreshhold;
+
+    if (currentCallCount == m_callCountOptimizationThreshhold)
+    {
+        AsyncPromoteMethodToTier1(pMethodDesc);
+    }
+}
+
+void TieredCompilationManager::OnMethodCallCountingStoppedWithoutTier1Promotion(MethodDesc* pMethodDesc)
+{
+    WRAPPER_NO_CONTRACT;
+    _ASSERTE(pMethodDesc != nullptr);
+    _ASSERTE(pMethodDesc->IsEligibleForTieredCompilation());
 
-    if (currentCallCount < m_callCountOptimizationThreshhold)
+    if (g_pConfig->TieredCompilation_Tier1CallCountingDelayMs() == 0)
     {
-        return FALSE; // continue notifications for this method
+        return;
     }
-    else if (currentCallCount > m_callCountOptimizationThreshhold)
+
     {
-        return TRUE; // stop notifications for this method
+        SpinLockHolder holder(&m_tier1CountingDelayLock);
+        if (m_methodsPendingCountingForTier1 != nullptr)
+        {
+            // Record the method to resume counting later (see Tier1DelayTimerCallback)
+            m_methodsPendingCountingForTier1->Append(pMethodDesc);
+            return;
+        }
     }
-    AsyncPromoteMethodToTier1(pMethodDesc);
-    return TRUE;
+
+    // Rare race condition with the timer callback
+    ResumeCountingCalls(pMethodDesc);
 }
 
 void TieredCompilationManager::AsyncPromoteMethodToTier1(MethodDesc* pMethodDesc)
@@ -258,6 +349,74 @@ void TieredCompilationManager::Shutdown(BOOL fBlockUntilAsyncWorkIsComplete)
     }
 }
 
+VOID WINAPI TieredCompilationManager::Tier1DelayTimerCallback(PVOID parameter, BOOLEAN timerFired)
+{
+    WRAPPER_NO_CONTRACT;
+    _ASSERTE(timerFired);
+
+    GCX_COOP();
+    ThreadpoolMgr::TimerInfoContext* timerContext = (ThreadpoolMgr::TimerInfoContext*)parameter;
+    ManagedThreadBase::ThreadPool(timerContext->AppDomainId, Tier1DelayTimerCallbackInAppDomain, nullptr);
+}
+
+void TieredCompilationManager::Tier1DelayTimerCallbackInAppDomain(LPVOID parameter)
+{
+    WRAPPER_NO_CONTRACT;
+    GetAppDomain()->GetTieredCompilationManager()->Tier1DelayTimerCallbackWorker();
+}
+
+void TieredCompilationManager::Tier1DelayTimerCallbackWorker()
+{
+    WRAPPER_NO_CONTRACT;
+
+    // Reschedule the timer if a tier 0 JIT has been invoked since the timer was started to further delay call counting
+    if (m_wasTier0JitInvokedSinceCountingDelayReset)
+    {
+        m_wasTier0JitInvokedSinceCountingDelayReset = false;
+
+        _ASSERTE(m_tier1CountingDelayTimerHandle != nullptr);
+        if (ThreadpoolMgr::ChangeTimerQueueTimer(
+                m_tier1CountingDelayTimerHandle,
+                g_pConfig->TieredCompilation_Tier1CallCountingDelayMs(),
+                (DWORD)-1 /* Period, non-repeating */))
+        {
+            return;
+        }
+    }
+
+    // Exchange the list of methods pending counting for tier 1
+    SArray<MethodDesc*>* methodsPendingCountingForTier1;
+    {
+        SpinLockHolder holder(&m_tier1CountingDelayLock);
+        methodsPendingCountingForTier1 = m_methodsPendingCountingForTier1;
+        _ASSERTE(methodsPendingCountingForTier1 != nullptr);
+        m_methodsPendingCountingForTier1 = nullptr;
+    }
+
+    // Install call counters
+    MethodDesc** methods = methodsPendingCountingForTier1->GetElements();
+    COUNT_T methodCount = methodsPendingCountingForTier1->GetCount();
+    for (COUNT_T i = 0; i < methodCount; ++i)
+    {
+        ResumeCountingCalls(methods[i]);
+    }
+    delete methodsPendingCountingForTier1;
+
+    // Delete the timer
+    _ASSERTE(m_tier1CountingDelayTimerHandle != nullptr);
+    ThreadpoolMgr::DeleteTimerQueueTimer(m_tier1CountingDelayTimerHandle, nullptr);
+    m_tier1CountingDelayTimerHandle = nullptr;
+}
+
+void TieredCompilationManager::ResumeCountingCalls(MethodDesc* pMethodDesc)
+{
+    WRAPPER_NO_CONTRACT;
+    _ASSERTE(pMethodDesc != nullptr);
+    _ASSERTE(pMethodDesc->IsVersionableWithPrecode());
+
+    pMethodDesc->GetPrecode()->ResetTargetInterlocked();
+}
+
 // This is the initial entrypoint for the background thread, called by
 // the threadpool.
 DWORD WINAPI TieredCompilationManager::StaticOptimizeMethodsCallback(void *args)
index 9f61872..95dbb74 100644 (file)
@@ -25,7 +25,12 @@ public:
 #endif
 
     void Init(ADID appDomainId);
-    BOOL OnMethodCalled(MethodDesc* pMethodDesc, DWORD currentCallCount);
+
+    void InitiateTier1CountingDelay();
+    void OnTier0JitInvoked();
+
+    void OnMethodCalled(MethodDesc* pMethodDesc, DWORD currentCallCount, BOOL* shouldStopCountingCallsRef, BOOL* wasPromotedToTier1Ref);
+    void OnMethodCallCountingStoppedWithoutTier1Promotion(MethodDesc* pMethodDesc);
     void AsyncPromoteMethodToTier1(MethodDesc* pMethodDesc);
     static void ShutdownAllDomains();
     void Shutdown(BOOL fBlockUntilAsyncWorkIsComplete);
@@ -33,6 +38,11 @@ public:
 
 private:
 
+    static VOID WINAPI Tier1DelayTimerCallback(PVOID parameter, BOOLEAN timerFired);
+    static void Tier1DelayTimerCallbackInAppDomain(LPVOID parameter);
+    void Tier1DelayTimerCallbackWorker();
+    static void ResumeCountingCalls(MethodDesc* pMethodDesc);
+
     static DWORD StaticOptimizeMethodsCallback(void* args);
     void OptimizeMethodsCallback();
     void OptimizeMethod(NativeCodeVersion nativeCodeVersion);
@@ -50,6 +60,12 @@ private:
     DWORD m_countOptimizationThreadsRunning;
     DWORD m_callCountOptimizationThreshhold;
     DWORD m_optimizationQuantumMs;
+
+    SpinLock m_tier1CountingDelayLock;
+    SArray<MethodDesc*>* m_methodsPendingCountingForTier1;
+    HANDLE m_tier1CountingDelayTimerHandle;
+    bool m_wasTier0JitInvokedSinceCountingDelayReset;
+
     CLREvent m_asyncWorkDoneEvent;
 };