Use a separate thread for tiered compilation background work (#45901)
authorKoundinya Veluri <kouvel@users.noreply.github.com>
Tue, 26 Jan 2021 15:51:37 +0000 (07:51 -0800)
committerGitHub <noreply@github.com>
Tue, 26 Jan 2021 15:51:37 +0000 (07:51 -0800)
Use a separate thread for tiered compilation background work

- Makes it easier to manage how much time is spend for performing background work like rejitting and allows yielding more frequently with just Sleep without incurring thread pool overhead, which is useful in CPU-limited cases
- A min/max range is determined for how long background work will be done before yielding the thread. The max is the same as before, 50 ms. For now the min is `processor count` ms (capped to the max), such that in CPU-limited cases the thread would yield more frequently in order to not monopolize too much of the limited CPU resources for background work, and in cases with a larger number of processors where the background work is typically less intrusive to foreground work it would yield less frequently.
- At the same time, progress should be made on background work such that steady-state perf would be reached in reasonable time. Yielding too frequently can slow down the background work too much. The sleep duration is measured to identify oversubscribed situations to yield less frequently and make faster progress on the background work.
- Due to less time spent rejitting in some CPU-limited cases, steady-state performance may be reached a bit later in favor of fewer spikes along the way
- When the portable thread pool is enabled, a side effect of using a managed worker thread for tiering background work was that several GC-heavy microbenchmarks regressed. Tiering was the only thing using the thread pool in those tests and stack-walking the managed thread was slower due to the presence of GC refs. It's not too concerning, the benchmarks are just measuring something different from before, but in any case this change also resolves that issue. Fixes https://github.com/dotnet/runtime/issues/44211.

src/coreclr/inc/clrconfigvalues.h
src/coreclr/vm/callcounting.cpp
src/coreclr/vm/callcounting.h
src/coreclr/vm/codeversion.cpp
src/coreclr/vm/eeconfig.cpp
src/coreclr/vm/eeconfig.h
src/coreclr/vm/synch.cpp
src/coreclr/vm/tieredcompilation.cpp
src/coreclr/vm/tieredcompilation.h

index 6ddd274..aec3911 100644 (file)
@@ -604,6 +604,7 @@ RETAIL_CONFIG_DWORD_INFO(EXTERNAL_TieredCompilation, W("TieredCompilation"), 1,
 RETAIL_CONFIG_DWORD_INFO(EXTERNAL_TC_QuickJit, W("TC_QuickJit"), 1, "For methods that would be jitted, enable using quick JIT when appropriate.")
 RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_TC_QuickJitForLoops, W("TC_QuickJitForLoops"), 0, "When quick JIT is enabled, quick JIT may also be used for methods that contain loops.")
 RETAIL_CONFIG_DWORD_INFO(EXTERNAL_TC_AggressiveTiering, W("TC_AggressiveTiering"), 0, "Transition through tiers aggressively.")
+RETAIL_CONFIG_DWORD_INFO(INTERNAL_TC_BackgroundWorkerTimeoutMs, W("TC_BackgroundWorkerTimeoutMs"), 4000, "How long in milliseconds the background worker thread may remain idle before exiting.")
 RETAIL_CONFIG_DWORD_INFO(INTERNAL_TC_CallCountThreshold, W("TC_CallCountThreshold"), 30, "Number of times a method must be called in tier 0 after which it is promoted to the next tier.")
 RETAIL_CONFIG_DWORD_INFO(INTERNAL_TC_CallCountingDelayMs, W("TC_CallCountingDelayMs"), 100, "A perpetual delay in milliseconds that is applied call counting in tier 0 and jitting at higher tiers, while there is startup-like activity.")
 RETAIL_CONFIG_DWORD_INFO(INTERNAL_TC_DelaySingleProcMultiplier, W("TC_DelaySingleProcMultiplier"), 10, "Multiplier for TC_CallCountingDelayMs that is applied on a single-processor machine or when the process is affinitized to a single processor.")
@@ -614,7 +615,7 @@ RETAIL_CONFIG_DWORD_INFO(INTERNAL_TC_DeleteCallCountingStubsAfter, W("TC_DeleteC
 #else
 RETAIL_CONFIG_DWORD_INFO(INTERNAL_TC_DeleteCallCountingStubsAfter, W("TC_DeleteCallCountingStubsAfter"), 4096, "Deletes call counting stubs after this many have completed. Zero to disable deleting.")
 #endif
-#endif
+#endif // FEATURE_TIERED_COMPILATION
 
 ///
 /// On-Stack Replacement
index c5dafd1..dfecf88 100644 (file)
@@ -501,7 +501,7 @@ bool CallCountingManager::SetCodeEntryPoint(
     NativeCodeVersion activeCodeVersion,
     PCODE codeEntryPoint,
     bool wasMethodCalled,
-    bool *scheduleTieringBackgroundWorkRef)
+    bool *createTieringBackgroundWorkerRef)
 {
     CONTRACTL
     {
@@ -532,8 +532,8 @@ bool CallCountingManager::SetCodeEntryPoint(
         methodDesc->GetCodeVersionManager()->GetActiveILCodeVersion(methodDesc).GetActiveNativeCodeVersion(methodDesc));
     _ASSERTE(codeEntryPoint != NULL);
     _ASSERTE(codeEntryPoint == activeCodeVersion.GetNativeCode());
-    _ASSERTE(!wasMethodCalled || scheduleTieringBackgroundWorkRef != nullptr);
-    _ASSERTE(scheduleTieringBackgroundWorkRef == nullptr || !*scheduleTieringBackgroundWorkRef);
+    _ASSERTE(!wasMethodCalled || createTieringBackgroundWorkerRef != nullptr);
+    _ASSERTE(createTieringBackgroundWorkerRef == nullptr || !*createTieringBackgroundWorkerRef);
 
     if (!methodDesc->IsEligibleForTieredCompilation() ||
         (
@@ -600,7 +600,7 @@ bool CallCountingManager::SetCodeEntryPoint(
                 {
                     GetAppDomain()
                         ->GetTieredCompilationManager()
-                        ->AsyncPromoteToTier1(activeCodeVersion, scheduleTieringBackgroundWorkRef);
+                        ->AsyncPromoteToTier1(activeCodeVersion, createTieringBackgroundWorkerRef);
                 }
                 methodDesc->SetCodeEntryPoint(codeEntryPoint);
                 callCountingInfo->SetStage(CallCountingInfo::Stage::Complete);
@@ -822,118 +822,114 @@ void CallCountingManager::CompleteCallCounting()
     }
     CONTRACTL_END;
 
+    _ASSERTE(GetThread() == TieredCompilationManager::GetBackgroundWorkerThread());
+
     AppDomain *appDomain = GetAppDomain();
     TieredCompilationManager *tieredCompilationManager = appDomain->GetTieredCompilationManager();
-    bool scheduleTieringBackgroundWork = false;
-    {
-        CodeVersionManager *codeVersionManager = appDomain->GetCodeVersionManager();
+    CodeVersionManager *codeVersionManager = appDomain->GetCodeVersionManager();
 
-        MethodDescBackpatchInfoTracker::ConditionalLockHolderForGCCoop slotBackpatchLockHolder;
+    MethodDescBackpatchInfoTracker::ConditionalLockHolderForGCCoop slotBackpatchLockHolder;
+
+    // Backpatching entry point slots requires cooperative GC mode, see
+    // MethodDescBackpatchInfoTracker::Backpatch_Locked(). The code version manager's table lock is an unsafe lock that
+    // may be taken in any GC mode. The lock is taken in cooperative GC mode on some other paths, so the same ordering
+    // must be used here to prevent deadlock.
+    GCX_COOP();
+    CodeVersionManager::LockHolder codeVersioningLockHolder;
 
-        // Backpatching entry point slots requires cooperative GC mode, see
-        // MethodDescBackpatchInfoTracker::Backpatch_Locked(). The code version manager's table lock is an unsafe lock that
-        // may be taken in any GC mode. The lock is taken in cooperative GC mode on some other paths, so the same ordering
-        // must be used here to prevent deadlock.
-        GCX_COOP();
-        CodeVersionManager::LockHolder codeVersioningLockHolder;
+    for (auto itEnd = s_callCountingManagers->End(), it = s_callCountingManagers->Begin(); it != itEnd; ++it)
+    {
+        CallCountingManager *callCountingManager = *it;
+        SArray<CallCountingInfo *> &callCountingInfosPendingCompletion =
+            callCountingManager->m_callCountingInfosPendingCompletion;
+        COUNT_T callCountingInfoCount = callCountingInfosPendingCompletion.GetCount();
+        if (callCountingInfoCount == 0)
+        {
+            continue;
+        }
 
-        for (auto itEnd = s_callCountingManagers->End(), it = s_callCountingManagers->Begin(); it != itEnd; ++it)
+        CallCountingInfo **callCountingInfos = callCountingInfosPendingCompletion.GetElements();
+        for (COUNT_T i = 0; i < callCountingInfoCount; ++i)
         {
-            CallCountingManager *callCountingManager = *it;
-            SArray<CallCountingInfo *> &callCountingInfosPendingCompletion =
-                callCountingManager->m_callCountingInfosPendingCompletion;
-            COUNT_T callCountingInfoCount = callCountingInfosPendingCompletion.GetCount();
-            if (callCountingInfoCount == 0)
+            CallCountingInfo *callCountingInfo = callCountingInfos[i];
+            CallCountingInfo::Stage callCountingStage = callCountingInfo->GetStage();
+            if (callCountingStage != CallCountingInfo::Stage::PendingCompletion)
             {
                 continue;
             }
 
-            CallCountingInfo **callCountingInfos = callCountingInfosPendingCompletion.GetElements();
-            for (COUNT_T i = 0; i < callCountingInfoCount; ++i)
+            NativeCodeVersion codeVersion = callCountingInfo->GetCodeVersion();
+            MethodDesc *methodDesc = codeVersion.GetMethodDesc();
+            _ASSERTE(codeVersionManager == methodDesc->GetCodeVersionManager());
+            EX_TRY
             {
-                CallCountingInfo *callCountingInfo = callCountingInfos[i];
-                CallCountingInfo::Stage callCountingStage = callCountingInfo->GetStage();
-                if (callCountingStage != CallCountingInfo::Stage::PendingCompletion)
+                if (!codeVersion.GetILCodeVersion().HasAnyOptimizedNativeCodeVersion(codeVersion))
                 {
-                    continue;
+                    bool createTieringBackgroundWorker = false;
+                    tieredCompilationManager->AsyncPromoteToTier1(codeVersion, &createTieringBackgroundWorker);
+                    _ASSERTE(!createTieringBackgroundWorker); // the current thread is the background worker thread
                 }
 
-                NativeCodeVersion codeVersion = callCountingInfo->GetCodeVersion();
-                MethodDesc *methodDesc = codeVersion.GetMethodDesc();
-                _ASSERTE(codeVersionManager == methodDesc->GetCodeVersionManager());
-                EX_TRY
+                // The active code version may have changed externally after the call counting stub was activated,
+                // deactivating the call counting stub without our knowledge. Check the active code version and determine
+                // what needs to be done.
+                NativeCodeVersion activeCodeVersion =
+                    codeVersionManager->GetActiveILCodeVersion(methodDesc).GetActiveNativeCodeVersion(methodDesc);
+                do
                 {
-                    if (!codeVersion.GetILCodeVersion().HasAnyOptimizedNativeCodeVersion(codeVersion))
+                    if (activeCodeVersion == codeVersion)
                     {
-                        tieredCompilationManager->AsyncPromoteToTier1(codeVersion, &scheduleTieringBackgroundWork);
+                        methodDesc->SetCodeEntryPoint(activeCodeVersion.GetNativeCode());
+                        break;
                     }
 
-                    // The active code version may have changed externally after the call counting stub was activated,
-                    // deactivating the call counting stub without our knowledge. Check the active code version and determine
-                    // what needs to be done.
-                    NativeCodeVersion activeCodeVersion =
-                        codeVersionManager->GetActiveILCodeVersion(methodDesc).GetActiveNativeCodeVersion(methodDesc);
-                    do
+                    // There is at least one case where the IL code version is changed inside the code versioning lock, the
+                    // lock is released and reacquired, then the method's code entry point is reset. So if this path is
+                    // reached between those locks, the method would still be pointing to the call counting stub. Once the
+                    // stub is marked as complete, it may be deleted, so in all cases update the method's code entry point
+                    // to ensure that the method is no longer pointing to the call counting stub.
+
+                    if (!activeCodeVersion.IsNull())
                     {
-                        if (activeCodeVersion == codeVersion)
+                        PCODE activeNativeCode = activeCodeVersion.GetNativeCode();
+                        if (activeNativeCode != NULL)
                         {
-                            methodDesc->SetCodeEntryPoint(activeCodeVersion.GetNativeCode());
+                            methodDesc->SetCodeEntryPoint(activeNativeCode);
                             break;
                         }
+                    }
 
-                        // There is at least one case where the IL code version is changed inside the code versioning lock, the
-                        // lock is released and reacquired, then the method's code entry point is reset. So if this path is
-                        // reached between those locks, the method would still be pointing to the call counting stub. Once the
-                        // stub is marked as complete, it may be deleted, so in all cases update the method's code entry point
-                        // to ensure that the method is no longer pointing to the call counting stub.
-
-                        if (!activeCodeVersion.IsNull())
-                        {
-                            PCODE activeNativeCode = activeCodeVersion.GetNativeCode();
-                            if (activeNativeCode != NULL)
-                            {
-                                methodDesc->SetCodeEntryPoint(activeNativeCode);
-                                break;
-                            }
-                        }
-
-                        methodDesc->ResetCodeEntryPoint();
-                    } while (false);
+                    methodDesc->ResetCodeEntryPoint();
+                } while (false);
 
-                    callCountingInfo->SetStage(CallCountingInfo::Stage::Complete);
-                }
-                EX_CATCH
-                {
-                    // Avoid abandoning call counting completion for all recorded call counting infos on exception. Since this
-                    // is happening on a background thread, following the general policy so far, the exception will be caught,
-                    // logged, and ignored anyway, so make an attempt to complete call counting for each item. Individual items
-                    // that fail will result in those code versions not getting promoted (similar to elsewhere).
-                    STRESS_LOG1(LF_TIEREDCOMPILATION, LL_WARNING, "CallCountingManager::CompleteCallCounting: "
-                        "Exception, hr=0x%x\n",
-                        GET_EXCEPTION()->GetHR());
-                }
-                EX_END_CATCH(RethrowTerminalExceptions);
+                callCountingInfo->SetStage(CallCountingInfo::Stage::Complete);
             }
-
-            callCountingInfosPendingCompletion.Clear();
-            if (callCountingInfosPendingCompletion.GetAllocation() > 64)
+            EX_CATCH
             {
-                callCountingInfosPendingCompletion.Trim();
-                EX_TRY
-                {
-                    callCountingInfosPendingCompletion.Preallocate(64);
-                }
-                EX_CATCH
-                {
-                }
-                EX_END_CATCH(RethrowTerminalExceptions);
+                // Avoid abandoning call counting completion for all recorded call counting infos on exception. Since this
+                // is happening on a background thread, following the general policy so far, the exception will be caught,
+                // logged, and ignored anyway, so make an attempt to complete call counting for each item. Individual items
+                // that fail will result in those code versions not getting promoted (similar to elsewhere).
+                STRESS_LOG1(LF_TIEREDCOMPILATION, LL_WARNING, "CallCountingManager::CompleteCallCounting: "
+                    "Exception, hr=0x%x\n",
+                    GET_EXCEPTION()->GetHR());
             }
+            EX_END_CATCH(RethrowTerminalExceptions);
         }
-    }
 
-    if (scheduleTieringBackgroundWork)
-    {
-        tieredCompilationManager->ScheduleBackgroundWork(); // requires GC_TRIGGERS
+        callCountingInfosPendingCompletion.Clear();
+        if (callCountingInfosPendingCompletion.GetAllocation() > 64)
+        {
+            callCountingInfosPendingCompletion.Trim();
+            EX_TRY
+            {
+                callCountingInfosPendingCompletion.Preallocate(64);
+            }
+            EX_CATCH
+            {
+            }
+            EX_END_CATCH(RethrowTerminalExceptions);
+        }
     }
 }
 
@@ -947,6 +943,8 @@ void CallCountingManager::StopAndDeleteAllCallCountingStubs()
     }
     CONTRACTL_END;
 
+    _ASSERTE(GetThread() == TieredCompilationManager::GetBackgroundWorkerThread());
+
     // If a number of call counting stubs have completed, we can try to delete them to reclaim some memory. Deleting
     // involves suspending the runtime and will delete all call counting stubs, and after that some call counting stubs may
     // be recreated in the foreground. The threshold is to decrease the impact of both of those overheads.
@@ -957,52 +955,43 @@ void CallCountingManager::StopAndDeleteAllCallCountingStubs()
     }
 
     TieredCompilationManager *tieredCompilationManager = GetAppDomain()->GetTieredCompilationManager();
-    bool scheduleTieringBackgroundWork = false;
-    {
-        MethodDescBackpatchInfoTracker::ConditionalLockHolderForGCCoop slotBackpatchLockHolder;
 
-        ThreadSuspend::SuspendEE(ThreadSuspend::SUSPEND_OTHER);
-        struct AutoRestartEE
-        {
-            ~AutoRestartEE()
-            {
-                WRAPPER_NO_CONTRACT;
-                ThreadSuspend::RestartEE(false, true);
-            }
-        } autoRestartEE;
-
-        // Backpatching entry point slots requires cooperative GC mode, see
-        // MethodDescBackpatchInfoTracker::Backpatch_Locked(). The code version manager's table lock is an unsafe lock that
-        // may be taken in any GC mode. The lock is taken in cooperative GC mode on some other paths, so the same ordering
-        // must be used here to prevent deadlock.
-        GCX_COOP();
-        CodeVersionManager::LockHolder codeVersioningLockHolder;
-
-        // After the following, no method's entry point would be pointing to a call counting stub
-        StopAllCallCounting(tieredCompilationManager, &scheduleTieringBackgroundWork);
-
-        // Call counting has been stopped above and call counting stubs will soon be deleted. Ensure that call counting stubs
-        // will not be used after resuming the runtime. The following ensures that other threads will not use an old cached
-        // entry point value that will not be valid. Do this here in case of exception later.
-        MemoryBarrier(); // flush writes from this thread first to guarantee ordering
-        FlushProcessWriteBuffers();
-
-        // At this point, allocated call counting stubs won't be used anymore. Call counting stubs and corresponding infos may
-        // now be safely deleted. Note that call counting infos may not be deleted prior to this point because call counting
-        // stubs refer to the remaining call count in the info, and the call counting info is necessary to get a code version
-        // from a call counting stub address.
-        DeleteAllCallCountingStubs();
-    }
+    MethodDescBackpatchInfoTracker::ConditionalLockHolderForGCCoop slotBackpatchLockHolder;
 
-    if (scheduleTieringBackgroundWork)
+    ThreadSuspend::SuspendEE(ThreadSuspend::SUSPEND_OTHER);
+    struct AutoRestartEE
     {
-        tieredCompilationManager->ScheduleBackgroundWork(); // requires GC_TRIGGERS
-    }
+        ~AutoRestartEE()
+        {
+            WRAPPER_NO_CONTRACT;
+            ThreadSuspend::RestartEE(false, true);
+        }
+    } autoRestartEE;
+
+    // Backpatching entry point slots requires cooperative GC mode, see
+    // MethodDescBackpatchInfoTracker::Backpatch_Locked(). The code version manager's table lock is an unsafe lock that
+    // may be taken in any GC mode. The lock is taken in cooperative GC mode on some other paths, so the same ordering
+    // must be used here to prevent deadlock.
+    GCX_COOP();
+    CodeVersionManager::LockHolder codeVersioningLockHolder;
+
+    // After the following, no method's entry point would be pointing to a call counting stub
+    StopAllCallCounting(tieredCompilationManager);
+
+    // Call counting has been stopped above and call counting stubs will soon be deleted. Ensure that call counting stubs
+    // will not be used after resuming the runtime. The following ensures that other threads will not use an old cached
+    // entry point value that will not be valid. Do this here in case of exception later.
+    MemoryBarrier(); // flush writes from this thread first to guarantee ordering
+    FlushProcessWriteBuffers();
+
+    // At this point, allocated call counting stubs won't be used anymore. Call counting stubs and corresponding infos may
+    // now be safely deleted. Note that call counting infos may not be deleted prior to this point because call counting
+    // stubs refer to the remaining call count in the info, and the call counting info is necessary to get a code version
+    // from a call counting stub address.
+    DeleteAllCallCountingStubs();
 }
 
-void CallCountingManager::StopAllCallCounting(
-    TieredCompilationManager *tieredCompilationManager,
-    bool *scheduleTieringBackgroundWorkRef)
+void CallCountingManager::StopAllCallCounting(TieredCompilationManager *tieredCompilationManager)
 {
     CONTRACTL
     {
@@ -1012,11 +1001,10 @@ void CallCountingManager::StopAllCallCounting(
     }
     CONTRACTL_END;
 
+    _ASSERTE(GetThread() == TieredCompilationManager::GetBackgroundWorkerThread());
     _ASSERTE(MethodDescBackpatchInfoTracker::IsLockOwnedByCurrentThread());
     _ASSERTE(CodeVersionManager::IsLockOwnedByCurrentThread());
     _ASSERTE(tieredCompilationManager != nullptr);
-    _ASSERTE(scheduleTieringBackgroundWorkRef != nullptr);
-    _ASSERTE(!*scheduleTieringBackgroundWorkRef);
 
     for (auto itEnd = s_callCountingManagers->End(), it = s_callCountingManagers->Begin(); it != itEnd; ++it)
     {
@@ -1047,7 +1035,9 @@ void CallCountingManager::StopAllCallCounting(
                 _ASSERTE(callCountingStage == CallCountingInfo::Stage::PendingCompletion);
                 if (!codeVersion.GetILCodeVersion().HasAnyOptimizedNativeCodeVersion(codeVersion))
                 {
-                    tieredCompilationManager->AsyncPromoteToTier1(codeVersion, scheduleTieringBackgroundWorkRef);
+                    bool createTieringBackgroundWorker = false;
+                    tieredCompilationManager->AsyncPromoteToTier1(codeVersion, &createTieringBackgroundWorker);
+                    _ASSERTE(!createTieringBackgroundWorker); // the current thread is the background worker thread
                 }
 
                 newCallCountingStage = CallCountingInfo::Stage::Complete;
index bdc5b42..fa03452 100644 (file)
@@ -270,7 +270,7 @@ public:
         NativeCodeVersion activeCodeVersion,
         PCODE codeEntryPoint,
         bool wasMethodCalled,
-        bool *scheduleTieringBackgroundWorkRef);
+        bool *createTieringBackgroundWorker);
     static PCODE OnCallCountThresholdReached(TransitionBlock *transitionBlock, TADDR stubIdentifyingToken);
     static COUNT_T GetCountOfCodeVersionsPendingCompletion();
     static void CompleteCallCounting();
@@ -278,7 +278,7 @@ public:
 public:
     static void StopAndDeleteAllCallCountingStubs();
 private:
-    static void StopAllCallCounting(TieredCompilationManager *tieredCompilationManager, bool *scheduleTieringBackgroundWorkRef);
+    static void StopAllCallCounting(TieredCompilationManager *tieredCompilationManager);
     static void DeleteAllCallCountingStubs();
     void TrimCollections();
 #endif // !DACCESS_COMPILE
index 78f2ede..8fa82da 100644 (file)
@@ -1754,7 +1754,7 @@ PCODE CodeVersionManager::PublishVersionableCodeIfNecessary(
     #endif
 
         bool done = false;
-        bool scheduleTieringBackgroundWork = false;
+        bool createTieringBackgroundWorker = false;
         NativeCodeVersion newActiveVersion;
         do
         {
@@ -1816,10 +1816,10 @@ PCODE CodeVersionManager::PublishVersionableCodeIfNecessary(
                     }
                 #ifdef FEATURE_TIERED_COMPILATION
                     else if (
-                        !CallCountingManager::SetCodeEntryPoint(activeVersion, pCode, true, &scheduleTieringBackgroundWork))
+                        !CallCountingManager::SetCodeEntryPoint(activeVersion, pCode, true, &createTieringBackgroundWorker))
                     {
                         _ASSERTE(!g_pConfig->TieredCompilation_UseCallCountingStubs());
-                        _ASSERTE(!scheduleTieringBackgroundWork);
+                        _ASSERTE(!createTieringBackgroundWorker);
                         *doBackpatchRef = doPublish = false;
                     }
                 #endif
@@ -1842,19 +1842,19 @@ PCODE CodeVersionManager::PublishVersionableCodeIfNecessary(
             {
                 _ASSERTE(doPublish);
                 _ASSERTE(!handleCallCounting);
-                _ASSERTE(!scheduleTieringBackgroundWork);
+                _ASSERTE(!createTieringBackgroundWorker);
 
                 // The code entry point is set before recording the method for call counting to avoid a race. Otherwise, the
                 // tiering delay may expire and enable call counting for the method before the entry point is set here, in which
                 // case calls to the method would not be counted anymore.
                 GetAppDomain()->GetTieredCompilationManager()->HandleCallCountingForFirstCall(pMethodDesc);
             }
-            else if (scheduleTieringBackgroundWork)
+            else if (createTieringBackgroundWorker)
             {
                 _ASSERTE(doPublish);
                 _ASSERTE(handleCallCounting);
                 _ASSERTE(!handleCallCountingForFirstCall);
-                GetAppDomain()->GetTieredCompilationManager()->ScheduleBackgroundWork(); // requires GC_TRIGGERS
+                TieredCompilationManager::CreateBackgroundWorker(); // requires GC_TRIGGERS
             }
         #endif
 
index d963d92..dd89bf5 100644 (file)
@@ -244,6 +244,7 @@ HRESULT EEConfig::Init()
     fTieredCompilation_CallCounting = false;
     fTieredCompilation_UseCallCountingStubs = false;
     tieredCompilation_CallCountThreshold = 1;
+    tieredCompilation_BackgroundWorkerTimeoutMs = 0;
     tieredCompilation_CallCountingDelayMs = 0;
     tieredCompilation_DeleteCallCountingStubsAfter = 0;
 #endif
@@ -880,6 +881,9 @@ fTrackDynamicMethodDebugInfo = CLRConfig::GetConfigValue(CLRConfig::UNSUPPORTED_
                     CLRConfig::UNSUPPORTED_TC_QuickJitForLoops);
         }
 
+        tieredCompilation_BackgroundWorkerTimeoutMs =
+            CLRConfig::GetConfigValue(CLRConfig::INTERNAL_TC_BackgroundWorkerTimeoutMs);
+
         fTieredCompilation_CallCounting = CLRConfig::GetConfigValue(CLRConfig::INTERNAL_TC_CallCounting) != 0;
 
         DWORD tieredCompilation_ConfiguredCallCountThreshold =
index a068e44..5c5babf 100644 (file)
@@ -82,6 +82,7 @@ public:
     bool          TieredCompilation(void)           const { LIMITED_METHOD_CONTRACT;  return fTieredCompilation; }
     bool          TieredCompilation_QuickJit() const { LIMITED_METHOD_CONTRACT; return fTieredCompilation_QuickJit; }
     bool          TieredCompilation_QuickJitForLoops() const { LIMITED_METHOD_CONTRACT; return fTieredCompilation_QuickJitForLoops; }
+    DWORD         TieredCompilation_BackgroundWorkerTimeoutMs() const { LIMITED_METHOD_CONTRACT; return tieredCompilation_BackgroundWorkerTimeoutMs; }
     bool          TieredCompilation_CallCounting()  const { LIMITED_METHOD_CONTRACT; return fTieredCompilation_CallCounting; }
     UINT16        TieredCompilation_CallCountThreshold() const { LIMITED_METHOD_CONTRACT; return tieredCompilation_CallCountThreshold; }
     DWORD         TieredCompilation_CallCountingDelayMs() const { LIMITED_METHOD_CONTRACT; return tieredCompilation_CallCountingDelayMs; }
@@ -733,6 +734,7 @@ private: //----------------------------------------------------------------
     bool fTieredCompilation_CallCounting;
     bool fTieredCompilation_UseCallCountingStubs;
     UINT16 tieredCompilation_CallCountThreshold;
+    DWORD tieredCompilation_BackgroundWorkerTimeoutMs;
     DWORD tieredCompilation_CallCountingDelayMs;
     DWORD tieredCompilation_DeleteCallCountingStubsAfter;
 #endif
index 9d51bef..bc60f8c 100644 (file)
@@ -359,10 +359,6 @@ BOOL CLREventBase::Reset()
 
     _ASSERTE(Thread::Debug_AllowCallout());
 
-    // We do not allow Reset on AutoEvent
-    _ASSERTE (!IsAutoEvent() ||
-              !"Can not call Reset on AutoEvent");
-
     {
         return ResetEvent(m_handle);
     }
index 9dcad74..b595aa8 100644 (file)
 
 #if defined(FEATURE_TIERED_COMPILATION) && !defined(DACCESS_COMPILE)
 
-class TieredCompilationManager::AutoResetIsBackgroundWorkScheduled
-{
-private:
-    TieredCompilationManager *m_tieredCompilationManager;
-
-public:
-    AutoResetIsBackgroundWorkScheduled(TieredCompilationManager *tieredCompilationManager)
-        : m_tieredCompilationManager(tieredCompilationManager)
-    {
-        LIMITED_METHOD_CONTRACT;
-        _ASSERTE(tieredCompilationManager == nullptr || tieredCompilationManager->m_isBackgroundWorkScheduled);
-    }
-
-    ~AutoResetIsBackgroundWorkScheduled()
-    {
-        WRAPPER_NO_CONTRACT;
-
-        if (m_tieredCompilationManager == nullptr)
-        {
-            return;
-        }
-
-        LockHolder tieredCompilationLockHolder;
-
-        _ASSERTE(m_tieredCompilationManager->m_isBackgroundWorkScheduled);
-        m_tieredCompilationManager->m_isBackgroundWorkScheduled = false;
-    }
-
-    void Cancel()
-    {
-        LIMITED_METHOD_CONTRACT;
-        m_tieredCompilationManager = nullptr;
-    }
-};
+CrstStatic TieredCompilationManager::s_lock;
+#ifdef _DEBUG
+Thread *TieredCompilationManager::s_backgroundWorkerThread = nullptr;
+#endif
+CLREvent TieredCompilationManager::s_backgroundWorkAvailableEvent;
+bool TieredCompilationManager::s_isBackgroundWorkerRunning = false;
+bool TieredCompilationManager::s_isBackgroundWorkerProcessingWork = false;
 
 // Called at AppDomain construction
 TieredCompilationManager::TieredCompilationManager() :
     m_countOfMethodsToOptimize(0),
     m_countOfNewMethodsCalledDuringDelay(0),
     m_methodsPendingCountingForTier1(nullptr),
-    m_tieringDelayTimerHandle(nullptr),
-    m_doBackgroundWorkTimerHandle(nullptr),
-    m_isBackgroundWorkScheduled(false),
     m_tier1CallCountingCandidateMethodRecentlyRecorded(false),
     m_isPendingCallCountingCompletion(false),
-    m_recentlyRequestedCallCountingCompletionAgain(false)
+    m_recentlyRequestedCallCountingCompletion(false)
 {
     WRAPPER_NO_CONTRACT;
     // On Unix, we can reach here before EEConfig is initialized, so defer config-based initialization to Init()
@@ -152,7 +122,14 @@ NativeCodeVersion::OptimizationTier TieredCompilationManager::GetInitialOptimiza
 
 void TieredCompilationManager::HandleCallCountingForFirstCall(MethodDesc* pMethodDesc)
 {
-    WRAPPER_NO_CONTRACT;
+    CONTRACTL
+    {
+        THROWS;
+        GC_TRIGGERS;
+        MODE_PREEMPTIVE;
+    }
+    CONTRACTL_END;
+
     _ASSERTE(pMethodDesc != nullptr);
     _ASSERTE(pMethodDesc->IsEligibleForTieredCompilation());
     _ASSERTE(g_pConfig->TieredCompilation_CallCountingDelayMs() != 0);
@@ -160,6 +137,7 @@ void TieredCompilationManager::HandleCallCountingForFirstCall(MethodDesc* pMetho
     // An exception here (OOM) would mean that the method's calls would not be counted and it would not be promoted. A
     // consideration is that an attempt can be made to reset the code entry point on exception (which can also OOM). Doesn't
     // seem worth it, the exception is propagated and there are other cases where a method may not be promoted due to OOM.
+    bool createBackgroundWorker;
     {
         LockHolder tieredCompilationLockHolder;
 
@@ -187,54 +165,43 @@ void TieredCompilationManager::HandleCallCountingForFirstCall(MethodDesc* pMetho
         m_methodsPendingCountingForTier1 = methodsPendingCountingHolder.Extract();
         _ASSERTE(!m_tier1CallCountingCandidateMethodRecentlyRecorded);
         _ASSERTE(IsTieringDelayActive());
+
+        // The thread is in a GC_NOTRIGGER scope here. If the background worker is already running, we can schedule it inside
+        // the same lock without triggering a GC.
+        createBackgroundWorker = !TryScheduleBackgroundWorkerWithoutGCTrigger_Locked();
     }
 
-    // Elsewhere, the tiered compilation lock is taken inside the code versioning lock. The code versioning lock is an unsafe
-    // any-GC-mode lock, so the tiering lock is also that type of lock. Inside that type of lock, there is an implicit
-    // GC_NOTRIGGER contract. So, the timer cannot be created inside the tiering lock since it may GC_TRIGGERS. At this point,
-    // this is the only thread that may attempt creating the timer. If creating the timer fails, let the exception propagate,
-    // but because the tiering lock was released above, first reset any recorded methods' code entry points and deactivate the
-    // tiering delay so that timer creation may be attempted again.
-    EX_TRY
+    if (createBackgroundWorker)
     {
-        NewHolder<ThreadpoolMgr::TimerInfoContext> timerContextHolder = new ThreadpoolMgr::TimerInfoContext();
-        timerContextHolder->TimerId = 0;
-
-        _ASSERTE(m_tieringDelayTimerHandle == nullptr);
-        if (!ThreadpoolMgr::CreateTimerQueueTimer(
-                &m_tieringDelayTimerHandle,
-                TieringDelayTimerCallback,
-                timerContextHolder,
-                g_pConfig->TieredCompilation_CallCountingDelayMs(),
-                (DWORD)-1 /* Period, non-repeating */,
-                0 /* flags */))
+        // Elsewhere, the tiered compilation lock is taken inside the code versioning lock. The code versioning lock is an
+        // unsafe any-GC-mode lock, so the tiering lock is also that type of lock. Inside that type of lock, there is an
+        // implicit GC_NOTRIGGER contract. So, a thread cannot be created inside the tiering lock since it may GC_TRIGGERS. At
+        // this point, this is the only thread that may attempt creating the background worker thread.
+        EX_TRY
         {
-            _ASSERTE(m_tieringDelayTimerHandle == nullptr);
-            ThrowOutOfMemory();
+            CreateBackgroundWorker();
         }
-
-        timerContextHolder.SuppressRelease(); // the timer context is automatically deleted by the timer infrastructure
-    }
-    EX_CATCH
-    {
-        // Since the tiering lock was released and reacquired, other methods may have been recorded in-between. Just deactivate
-        // the tiering delay. Any methods that have been recorded would not have their calls be counted and would not be
-        // promoted (due to the small window, there shouldn't be many of those). See consideration above in a similar exception
-        // case.
+        EX_CATCH
         {
-            LockHolder tieredCompilationLockHolder;
+            // Since the tiering lock was released and reacquired, other methods may have been recorded in-between. Just
+            // deactivate the tiering delay. Any methods that have been recorded would not have their calls be counted and
+            // would not be promoted (due to the small window, there shouldn't be many of those). See consideration above in a
+            // similar exception case.
+            {
+                LockHolder tieredCompilationLockHolder;
 
-            _ASSERTE(IsTieringDelayActive());
-            m_tier1CallCountingCandidateMethodRecentlyRecorded = false;
-            _ASSERTE(m_methodsPendingCountingForTier1 != nullptr);
-            delete m_methodsPendingCountingForTier1;
-            m_methodsPendingCountingForTier1 = nullptr;
-            _ASSERTE(!IsTieringDelayActive());
-        }
+                _ASSERTE(IsTieringDelayActive());
+                m_tier1CallCountingCandidateMethodRecentlyRecorded = false;
+                _ASSERTE(m_methodsPendingCountingForTier1 != nullptr);
+                delete m_methodsPendingCountingForTier1;
+                m_methodsPendingCountingForTier1 = nullptr;
+                _ASSERTE(!IsTieringDelayActive());
+            }
 
-        EX_RETHROW;
+            EX_RETHROW;
+        }
+        EX_END_CATCH(RethrowTerminalExceptions);
     }
-    EX_END_CATCH(RethrowTerminalExceptions);
 
     if (ETW::CompilationLog::TieredCompilation::Runtime::IsEnabled())
     {
@@ -272,7 +239,7 @@ bool TieredCompilationManager::TrySetCodeEntryPointAndRecordMethodForCallCountin
 
 void TieredCompilationManager::AsyncPromoteToTier1(
     NativeCodeVersion tier0NativeCodeVersion,
-    bool *scheduleTieringBackgroundWorkRef)
+    bool *createTieringBackgroundWorkerRef)
 {
     CONTRACTL
     {
@@ -285,7 +252,7 @@ void TieredCompilationManager::AsyncPromoteToTier1(
     _ASSERTE(CodeVersionManager::IsLockOwnedByCurrentThread());
     _ASSERTE(!tier0NativeCodeVersion.IsNull());
     _ASSERTE(tier0NativeCodeVersion.GetOptimizationTier() == NativeCodeVersion::OptimizationTier0);
-    _ASSERTE(scheduleTieringBackgroundWorkRef != nullptr);
+    _ASSERTE(createTieringBackgroundWorkerRef != nullptr);
 
     NativeCodeVersion t1NativeCodeVersion;
     HRESULT hr;
@@ -306,12 +273,6 @@ void TieredCompilationManager::AsyncPromoteToTier1(
 
     // Insert the method into the optimization queue and trigger a thread to service
     // the queue if needed.
-    //
-    // Note an error here could affect concurrent threads running this
-    // code. Those threads will observe m_isBackgroundWorkScheduled == true and return,
-    // then QueueUserWorkItem fails on this thread resetting the field to false and leaves them
-    // unserviced. Synchronous retries appear unlikely to offer any material improvement
-    // and complicating the code to narrow an already rare error case isn't desirable.
     SListElem<NativeCodeVersion>* pMethodListItem = new SListElem<NativeCodeVersion>(t1NativeCodeVersion);
     {
         LockHolder tieredCompilationLockHolder;
@@ -323,29 +284,107 @@ void TieredCompilationManager::AsyncPromoteToTier1(
             pMethodDesc, pMethodDesc->m_pszDebugClassName, pMethodDesc->m_pszDebugMethodName,
             t1NativeCodeVersion.GetVersionId()));
 
-        if (m_isBackgroundWorkScheduled || IsTieringDelayActive())
+        // The thread is in a GC_NOTRIGGER scope here. If the background worker is already running, we can schedule it inside
+        // the same lock without triggering a GC.
+        if (TryScheduleBackgroundWorkerWithoutGCTrigger_Locked())
         {
             return;
         }
     }
 
-    // This function is called from a GC_NOTRIGGER scope and scheduling background work (creating a thread) may GC_TRIGGERS.
-    // The caller needs to schedule background work after leaving the GC_NOTRIGGER scope. The contract is that the caller must
-    // make an attempt to schedule background work in any normal path. In the event of an atypical exception (eg. OOM),
-    // background work may not be scheduled and would have to be tried again the next time some background work is queued.
-    if (!*scheduleTieringBackgroundWorkRef)
+    // This function is called from a GC_NOTRIGGER scope and creating the background worker (creating a thread) may GC_TRIGGERS.
+    // The caller needs to create the background worker after leaving the GC_NOTRIGGER scope. The contract is that the caller
+    // must make an attempt to create the background worker in any normal path. In the event of an atypical exception (eg. OOM),
+    // the background worker may not be created and would have to be tried again the next time some background work is queued.
+    *createTieringBackgroundWorkerRef = true;
+}
+
+bool TieredCompilationManager::TryScheduleBackgroundWorkerWithoutGCTrigger_Locked()
+{
+    CONTRACTL
+    {
+        NOTHROW;
+        GC_NOTRIGGER;
+        MODE_ANY;
+    }
+    CONTRACTL_END;
+
+    _ASSERTE(IsLockOwnedByCurrentThread());
+
+    if (s_isBackgroundWorkerProcessingWork)
     {
-        *scheduleTieringBackgroundWorkRef = true;
+        _ASSERTE(s_isBackgroundWorkerRunning);
+        return true;
     }
+
+    if (s_isBackgroundWorkerRunning)
+    {
+        s_isBackgroundWorkerProcessingWork = true;
+        s_backgroundWorkAvailableEvent.Set();
+        return true;
+    }
+
+    s_isBackgroundWorkerRunning = true;
+    s_isBackgroundWorkerProcessingWork = true;
+    return false; // it's the caller's responsibility to call CreateBackgroundWorker() after leaving the GC_NOTRIGGER region
 }
 
-bool TieredCompilationManager::IsTieringDelayActive()
+void TieredCompilationManager::CreateBackgroundWorker()
 {
-    LIMITED_METHOD_CONTRACT;
-    return m_methodsPendingCountingForTier1 != nullptr;
+    CONTRACTL
+    {
+        THROWS;
+        GC_TRIGGERS;
+        MODE_PREEMPTIVE;
+    }
+    CONTRACTL_END;
+
+    _ASSERTE(!IsLockOwnedByCurrentThread());
+    _ASSERTE(s_isBackgroundWorkerRunning);
+    _ASSERTE(s_isBackgroundWorkerProcessingWork);
+    _ASSERTE(s_backgroundWorkerThread == nullptr);
+
+    EX_TRY
+    {
+        if (!s_backgroundWorkAvailableEvent.IsValid())
+        {
+            // An auto-reset event is used since it's a bit easier to manage and felt more natural in this case. It is also
+            // possible to use a manual-reset event instead, though there doesn't appear to be anything to gain from doing so.
+            s_backgroundWorkAvailableEvent.CreateAutoEvent(false);
+        }
+
+        Thread *newThread = SetupUnstartedThread();
+        _ASSERTE(newThread != nullptr);
+        INDEBUG(s_backgroundWorkerThread = newThread);
+    #ifdef FEATURE_COMINTEROP
+        newThread->SetApartment(Thread::AS_InMTA);
+    #endif
+        newThread->SetBackground(true);
+
+        if (!newThread->CreateNewThread(0, BackgroundWorkerBootstrapper0, newThread, W(".NET Tiered Compilation Worker")))
+        {
+            newThread->DecExternalCount(false);
+            ThrowOutOfMemory();
+        }
+
+        newThread->StartThread();
+    }
+    EX_CATCH
+    {
+        {
+            LockHolder tieredCompilationLockHolder;
+
+            s_isBackgroundWorkerProcessingWork = false;
+            s_isBackgroundWorkerRunning = false;
+            INDEBUG(s_backgroundWorkerThread = nullptr);
+        }
+
+        EX_RETHROW;
+    }
+    EX_END_CATCH(RethrowTerminalExceptions);
 }
 
-void WINAPI TieredCompilationManager::TieringDelayTimerCallback(PVOID parameter, BOOLEAN timerFired)
+DWORD WINAPI TieredCompilationManager::BackgroundWorkerBootstrapper0(LPVOID args)
 {
     CONTRACTL
     {
@@ -355,88 +394,190 @@ void WINAPI TieredCompilationManager::TieringDelayTimerCallback(PVOID parameter,
     }
     CONTRACTL_END;
 
-    _ASSERTE(timerFired);
+    _ASSERTE(args != nullptr);
+    Thread *thread = (Thread *)args;
+    _ASSERTE(s_backgroundWorkerThread == thread);
 
-    GetAppDomain()->GetTieredCompilationManager()->DeactivateTieringDelay();
+    if (!thread->HasStarted())
+    {
+        LockHolder tieredCompilationLockHolder;
+
+        s_isBackgroundWorkerProcessingWork = false;
+        s_isBackgroundWorkerRunning = false;
+        INDEBUG(s_backgroundWorkerThread = nullptr);
+        return 0;
+    }
+
+    _ASSERTE(GetThread() == thread);
+    ManagedThreadBase::KickOff(BackgroundWorkerBootstrapper1, nullptr);
+
+    GCX_PREEMP_NO_DTOR();
+
+    DestroyThread(thread);
+    return 0;
 }
 
-void TieredCompilationManager::DeactivateTieringDelay()
+void TieredCompilationManager::BackgroundWorkerBootstrapper1(LPVOID)
 {
     CONTRACTL
     {
         THROWS;
         GC_TRIGGERS;
-        MODE_PREEMPTIVE;
+        MODE_COOPERATIVE;
     }
     CONTRACTL_END;
 
-    HANDLE tieringDelayTimerHandle = nullptr;
-    SArray<MethodDesc *> *methodsPendingCounting = nullptr;
-    UINT32 countOfNewMethodsCalledDuringDelay = 0;
-    bool doBackgroundWork = false;
-    while (true)
+    GCX_PREEMP();
+    GetAppDomain()->GetTieredCompilationManager()->BackgroundWorkerStart();
+}
+
+void TieredCompilationManager::BackgroundWorkerStart()
+{
+    CONTRACTL
     {
-        {
-            // It's possible for the timer to tick before it is recorded that the delay is in effect. This lock guarantees that
-            // the delay is in effect.
-            LockHolder tieredCompilationLockHolder;
-            _ASSERTE(IsTieringDelayActive());
+        THROWS;
+        GC_TRIGGERS;
+        MODE_PREEMPTIVE;
+    }
+    CONTRACTL_END;
 
-            tieringDelayTimerHandle = m_tieringDelayTimerHandle;
-            if (m_tier1CallCountingCandidateMethodRecentlyRecorded)
-            {
-                m_tier1CallCountingCandidateMethodRecentlyRecorded = false;
-            }
-            else
-            {
-                // Exchange information into locals inside the lock
+    _ASSERTE(s_backgroundWorkAvailableEvent.IsValid());
 
-                methodsPendingCounting = m_methodsPendingCountingForTier1;
-                _ASSERTE(methodsPendingCounting != nullptr);
-                m_methodsPendingCountingForTier1 = nullptr;
+    DWORD timeoutMs = g_pConfig->TieredCompilation_BackgroundWorkerTimeoutMs();
+    DWORD delayMs = g_pConfig->TieredCompilation_CallCountingDelayMs();
 
-                _ASSERTE(tieringDelayTimerHandle == m_tieringDelayTimerHandle);
-                m_tieringDelayTimerHandle = nullptr;
+    int processorCount;
+#ifndef TARGET_UNIX
+    CPUGroupInfo::EnsureInitialized();
+    if (CPUGroupInfo::CanEnableGCCPUGroups() && CPUGroupInfo::CanEnableThreadUseAllCpuGroups())
+    {
+        processorCount = CPUGroupInfo::GetNumActiveProcessors();
+    }
+    else
+#endif
+    {
+        processorCount = GetCurrentProcessCpuCount();
+    }
+    _ASSERTE(processorCount > 0);
 
-                countOfNewMethodsCalledDuringDelay = m_countOfNewMethodsCalledDuringDelay;
-                m_countOfNewMethodsCalledDuringDelay = 0;
+    LARGE_INTEGER li;
+    QueryPerformanceFrequency(&li);
+    UINT64 ticksPerS = li.QuadPart;
+    UINT64 maxWorkDurationTicks = ticksPerS * 50 / 1000; // 50 ms
+    UINT64 minWorkDurationTicks = min(ticksPerS * processorCount / 1000, maxWorkDurationTicks); // <proc count> ms (capped)
+    UINT64 workDurationTicks = minWorkDurationTicks;
 
-                _ASSERTE(!IsTieringDelayActive());
+    while (true)
+    {
+        _ASSERTE(s_isBackgroundWorkerRunning);
+        _ASSERTE(s_isBackgroundWorkerProcessingWork);
 
-                if (!m_isBackgroundWorkScheduled && (m_isPendingCallCountingCompletion || m_countOfMethodsToOptimize != 0))
-                {
-                    m_isBackgroundWorkScheduled = true;
-                    doBackgroundWork = true;
-                }
+        if (IsTieringDelayActive())
+        {
+            do
+            {
+                ClrSleepEx(delayMs, false);
+            } while (!TryDeactivateTieringDelay());
+        }
 
-                break;
-            }
+        // Don't want to perform background work as soon as it is scheduled if there is possibly more important work that could
+        // be done. Some operating systems may also give a thread woken by a signal higher priority temporarily, which on a
+        // CPU-limited environment may lead to rejitting a method as soon as it's promoted, effectively in the foreground.
+        ClrSleepEx(0, false);
+
+        if (IsTieringDelayActive())
+        {
+            continue;
+        }
+
+        if ((m_isPendingCallCountingCompletion || m_countOfMethodsToOptimize != 0) &&
+            !DoBackgroundWork(&workDurationTicks, minWorkDurationTicks, maxWorkDurationTicks))
+        {
+            // Background work was interrupted due to the tiering delay being activated
+            _ASSERTE(IsTieringDelayActive());
+            continue;
         }
 
-        // Reschedule the timer if there has been recent tier 0 activity (when a new eligible method is called the first
-        // time) to further delay call counting
-        bool success = false;
-        EX_TRY
         {
-            if (ThreadpoolMgr::ChangeTimerQueueTimer(
-                    tieringDelayTimerHandle,
-                    g_pConfig->TieredCompilation_CallCountingDelayMs(),
-                    (DWORD)-1 /* Period, non-repeating */))
+            LockHolder tieredCompilationLockHolder;
+
+            if (IsTieringDelayActive() || m_isPendingCallCountingCompletion || m_countOfMethodsToOptimize != 0)
             {
-                success = true;
+                continue;
             }
+
+            s_isBackgroundWorkerProcessingWork = false;
         }
-        EX_CATCH
+
+        // Wait for the worker to be scheduled again
+        DWORD waitResult = s_backgroundWorkAvailableEvent.Wait(timeoutMs, false);
+        if (waitResult == WAIT_OBJECT_0)
         {
+            continue;
         }
-        EX_END_CATCH(RethrowTerminalExceptions);
-        if (success)
+        _ASSERTE(waitResult == WAIT_TIMEOUT);
+
+        // The wait timed out, see if the worker can exit
+
+        LockHolder tieredCompilationLockHolder;
+
+        if (s_isBackgroundWorkerProcessingWork)
         {
-            return;
+            // The background worker got scheduled again just as the wait timed out. The event would have been signaled just
+            // after the wait had timed out, so reset it and continue processing work.
+            s_backgroundWorkAvailableEvent.Reset();
+            continue;
         }
+
+        s_isBackgroundWorkerRunning = false;
+        INDEBUG(s_backgroundWorkerThread = nullptr);
+        return;
+    }
+}
+
+bool TieredCompilationManager::IsTieringDelayActive()
+{
+    LIMITED_METHOD_CONTRACT;
+    return m_methodsPendingCountingForTier1 != nullptr;
+}
+
+bool TieredCompilationManager::TryDeactivateTieringDelay()
+{
+    CONTRACTL
+    {
+        THROWS;
+        GC_TRIGGERS;
+        MODE_PREEMPTIVE;
     }
+    CONTRACTL_END;
+
+    _ASSERTE(GetThread() == s_backgroundWorkerThread);
+
+    SArray<MethodDesc *> *methodsPendingCounting = nullptr;
+    UINT32 countOfNewMethodsCalledDuringDelay = 0;
+    {
+        // It's possible for the timer to tick before it is recorded that the delay is in effect. This lock guarantees that
+        // the delay is in effect.
+        LockHolder tieredCompilationLockHolder;
+        _ASSERTE(IsTieringDelayActive());
+
+        if (m_tier1CallCountingCandidateMethodRecentlyRecorded)
+        {
+            m_tier1CallCountingCandidateMethodRecentlyRecorded = false;
+            return false;
+        }
+
+        // Exchange information into locals inside the lock
+
+        methodsPendingCounting = m_methodsPendingCountingForTier1;
+        _ASSERTE(methodsPendingCounting != nullptr);
+        m_methodsPendingCountingForTier1 = nullptr;
 
-    AutoResetIsBackgroundWorkScheduled autoResetIsBackgroundWorkScheduled(doBackgroundWork ? this : nullptr);
+        countOfNewMethodsCalledDuringDelay = m_countOfNewMethodsCalledDuringDelay;
+        m_countOfNewMethodsCalledDuringDelay = 0;
+
+        _ASSERTE(!IsTieringDelayActive());
+    }
 
     if (ETW::CompilationLog::TieredCompilation::Runtime::IsEnabled())
     {
@@ -486,13 +627,7 @@ void TieredCompilationManager::DeactivateTieringDelay()
     }
 
     delete methodsPendingCounting;
-    ThreadpoolMgr::DeleteTimerQueueTimer(tieringDelayTimerHandle, nullptr);
-
-    if (doBackgroundWork)
-    {
-        autoResetIsBackgroundWorkScheduled.Cancel(); // the call below will take care of it
-        DoBackgroundWork();
-    }
+    return true;
 }
 
 void TieredCompilationManager::AsyncCompleteCallCounting()
@@ -501,152 +636,55 @@ void TieredCompilationManager::AsyncCompleteCallCounting()
     {
         THROWS;
         GC_TRIGGERS;
-        MODE_ANY;
+        MODE_PREEMPTIVE;
     }
     CONTRACTL_END;
 
     {
         LockHolder tieredCompilationLockHolder;
 
-        if (m_recentlyRequestedCallCountingCompletionAgain)
+        if (m_recentlyRequestedCallCountingCompletion)
         {
             _ASSERTE(m_isPendingCallCountingCompletion);
         }
-        else if (m_isPendingCallCountingCompletion)
-        {
-            // A potentially large number of methods may reach the call count threshold at about the same time or in bursts.
-            // This field is used to coalesce a burst of pending completions, see the background work.
-            m_recentlyRequestedCallCountingCompletionAgain = true;
-        }
         else
         {
             m_isPendingCallCountingCompletion = true;
-        }
 
-        if (m_isBackgroundWorkScheduled || IsTieringDelayActive())
-        {
-            return;
+            // A potentially large number of methods may reach the call count threshold at about the same time or in bursts.
+            // This field is used to coalesce a burst of pending completions, see the background work.
+            m_recentlyRequestedCallCountingCompletion = true;
         }
-        m_isBackgroundWorkScheduled = true;
-    }
 
-    AutoResetIsBackgroundWorkScheduled autoResetIsBackgroundWorkScheduled(this);
-    RequestBackgroundWork();
-    autoResetIsBackgroundWorkScheduled.Cancel();
-}
-
-void TieredCompilationManager::ScheduleBackgroundWork()
-{
-    CONTRACTL
-    {
-        THROWS;
-        GC_TRIGGERS;
-        MODE_ANY;
-    }
-    CONTRACTL_END;
-
-    {
-        LockHolder tieredCompilationLockHolder;
-
-        if (m_isBackgroundWorkScheduled ||
-            (!m_isPendingCallCountingCompletion && m_countOfMethodsToOptimize == 0) ||
-            IsTieringDelayActive())
+        // The thread is in a GC_NOTRIGGER scope here. If the background worker is already running, we can schedule it inside
+        // the same lock without triggering a GC.
+        if (TryScheduleBackgroundWorkerWithoutGCTrigger_Locked())
         {
             return;
         }
-        m_isBackgroundWorkScheduled = true;
-    }
-
-    AutoResetIsBackgroundWorkScheduled autoResetIsBackgroundWorkScheduled(this);
-    RequestBackgroundWork();
-    autoResetIsBackgroundWorkScheduled.Cancel();
-}
-
-void TieredCompilationManager::RequestBackgroundWork()
-{
-    WRAPPER_NO_CONTRACT;
-    _ASSERTE(m_isBackgroundWorkScheduled);
-
-    if (ThreadpoolMgr::UsePortableThreadPool())
-    {
-        // QueueUserWorkItem is not intended to be supported in this mode, and there are call sites of this function where
-        // managed code cannot be called instead to queue a work item. Use a timer with zero due time instead, which would on
-        // the timer thread call into managed code to queue a work item.
-
-        NewHolder<ThreadpoolMgr::TimerInfoContext> timerContextHolder = new ThreadpoolMgr::TimerInfoContext();
-        timerContextHolder->TimerId = 0;
-
-        _ASSERTE(m_doBackgroundWorkTimerHandle == nullptr);
-        if (!ThreadpoolMgr::CreateTimerQueueTimer(
-                &m_doBackgroundWorkTimerHandle,
-                DoBackgroundWorkTimerCallback,
-                timerContextHolder,
-                0 /* DueTime */,
-                (DWORD)-1 /* Period, non-repeating */,
-                0 /* Flags */))
-        {
-            _ASSERTE(m_doBackgroundWorkTimerHandle == nullptr);
-            ThrowOutOfMemory();
-        }
-
-        timerContextHolder.SuppressRelease(); // the timer context is automatically deleted by the timer infrastructure
-        return;
-    }
-
-    if (!ThreadpoolMgr::QueueUserWorkItem(StaticBackgroundWorkCallback, this, QUEUE_ONLY, TRUE))
-    {
-        ThrowOutOfMemory();
     }
-}
 
-void WINAPI TieredCompilationManager::DoBackgroundWorkTimerCallback(PVOID parameter, BOOLEAN timerFired)
-{
-    CONTRACTL
-    {
-        THROWS;
-        GC_TRIGGERS;
-        MODE_PREEMPTIVE;
-    }
-    CONTRACTL_END;
-
-    _ASSERTE(ThreadpoolMgr::UsePortableThreadPool());
-    _ASSERTE(timerFired);
-
-    TieredCompilationManager *pTieredCompilationManager = GetAppDomain()->GetTieredCompilationManager();
-    _ASSERTE(pTieredCompilationManager->m_doBackgroundWorkTimerHandle != nullptr);
-    ThreadpoolMgr::DeleteTimerQueueTimer(pTieredCompilationManager->m_doBackgroundWorkTimerHandle, nullptr);
-    pTieredCompilationManager->m_doBackgroundWorkTimerHandle = nullptr;
-
-    pTieredCompilationManager->DoBackgroundWork();
-}
-
-// This is the initial entrypoint for the background thread, called by
-// the threadpool.
-DWORD WINAPI TieredCompilationManager::StaticBackgroundWorkCallback(void *args)
-{
-    STANDARD_VM_CONTRACT;
-    _ASSERTE(!ThreadpoolMgr::UsePortableThreadPool());
-
-    TieredCompilationManager * pTieredCompilationManager = (TieredCompilationManager *)args;
-    pTieredCompilationManager->DoBackgroundWork();
-    return 0;
+    CreateBackgroundWorker(); // requires GC_TRIGGERS
 }
 
 //This method will process one or more methods from optimization queue
 // on a background thread. Each such method will be jitted with code
 // optimizations enabled and then installed as the active implementation
 // of the method entrypoint.
-void TieredCompilationManager::DoBackgroundWork()
+bool TieredCompilationManager::DoBackgroundWork(
+    UINT64 *workDurationTicksRef,
+    UINT64 minWorkDurationTicks,
+    UINT64 maxWorkDurationTicks)
 {
     WRAPPER_NO_CONTRACT;
-    _ASSERTE(m_doBackgroundWorkTimerHandle == nullptr);
+    _ASSERTE(GetThread() == s_backgroundWorkerThread);
+    _ASSERTE(m_isPendingCallCountingCompletion || m_countOfMethodsToOptimize != 0);
+    _ASSERTE(workDurationTicksRef != nullptr);
+    _ASSERTE(minWorkDurationTicks <= maxWorkDurationTicks);
 
-    AutoResetIsBackgroundWorkScheduled autoResetIsBackgroundWorkScheduled(this);
-
-    // We need to be careful not to work for too long in a single invocation of this method or we could starve the thread pool
-    // and force it to create unnecessary additional threads. We will JIT for a minimum of this quantum, then schedule another
-    // work item to the thread pool and return this thread back to the pool.
-    const DWORD OptimizationQuantumMs = 50;
+    UINT64 workDurationTicks = *workDurationTicksRef;
+    _ASSERTE(workDurationTicks >= minWorkDurationTicks);
+    _ASSERTE(workDurationTicks <= maxWorkDurationTicks);
 
     if (ETW::CompilationLog::TieredCompilation::Runtime::IsEnabled())
     {
@@ -658,10 +696,15 @@ void TieredCompilationManager::DoBackgroundWork()
         ETW::CompilationLog::TieredCompilation::Runtime::SendBackgroundJitStart(countOfMethodsToOptimize);
     }
 
+    bool sendStopEvent = true;
     bool allMethodsJitted = false;
     UINT32 jittedMethodCount = 0;
-    DWORD startTickCount = GetTickCount();
-    while (true)
+    LARGE_INTEGER li;
+    QueryPerformanceCounter(&li);
+    UINT64 startTicks = li.QuadPart;
+    UINT64 previousTicks = startTicks;
+
+    do
     {
         bool completeCallCounting = false;
         NativeCodeVersion nativeCodeVersionToOptimize;
@@ -670,22 +713,20 @@ void TieredCompilationManager::DoBackgroundWork()
 
             if (IsTieringDelayActive())
             {
-                m_isBackgroundWorkScheduled = false;
-                autoResetIsBackgroundWorkScheduled.Cancel();
                 break;
             }
 
             bool wasPendingCallCountingCompletion = m_isPendingCallCountingCompletion;
             if (wasPendingCallCountingCompletion)
             {
-                if (m_recentlyRequestedCallCountingCompletionAgain)
+                if (m_recentlyRequestedCallCountingCompletion)
                 {
                     // A potentially large number of methods may reach the call count threshold at about the same time or in
                     // bursts. To coalesce a burst of pending completions a bit, if another method has reached the call count
                     // threshold since the last time it was checked here, don't complete call counting yet. Coalescing
                     // call counting completions a bit helps to avoid blocking foreground threads due to lock contention as
                     // methods are continuing to reach the call count threshold.
-                    m_recentlyRequestedCallCountingCompletionAgain = false;
+                    m_recentlyRequestedCallCountingCompletion = false;
                 }
                 else
                 {
@@ -705,13 +746,11 @@ void TieredCompilationManager::DoBackgroundWork()
                         // If call counting completions are pending and delayed above for coalescing, complete call counting
                         // now, as that will add more methods to be rejitted
                         m_isPendingCallCountingCompletion = false;
-                        _ASSERTE(!m_recentlyRequestedCallCountingCompletionAgain);
+                        _ASSERTE(!m_recentlyRequestedCallCountingCompletion);
                         completeCallCounting = true;
                     }
                     else
                     {
-                        m_isBackgroundWorkScheduled = false;
-                        autoResetIsBackgroundWorkScheduled.Cancel();
                         allMethodsJitted = true;
                         break;
                     }
@@ -733,43 +772,90 @@ void TieredCompilationManager::DoBackgroundWork()
                     GET_EXCEPTION()->GetHR());
             }
             EX_END_CATCH(RethrowTerminalExceptions);
+
+            continue;
         }
-        else
+
+        OptimizeMethod(nativeCodeVersionToOptimize);
+        ++jittedMethodCount;
+
+        // Yield the thread periodically to give preference to possibly more important work
+
+        QueryPerformanceCounter(&li);
+        UINT64 currentTicks = li.QuadPart;
+        if (currentTicks - startTicks < workDurationTicks)
+        {
+            previousTicks = currentTicks;
+            continue;
+        }
+        if (currentTicks - previousTicks >= maxWorkDurationTicks)
         {
-            OptimizeMethod(nativeCodeVersionToOptimize);
-            ++jittedMethodCount;
+            // It's unlikely that one iteration above would have taken that long, more likely this thread got scheduled out for
+            // a while, in which case there is no need to yield again. Discount the time taken for the previous iteration and
+            // continue processing work.
+            startTicks += currentTicks - previousTicks;
+            previousTicks = currentTicks;
+            continue;
         }
 
-        // If we have been running for too long return the thread to the threadpool and queue another event
-        // This gives the threadpool a chance to service other requests on this thread before returning to
-        // this work.
-        DWORD currentTickCount = GetTickCount();
-        if (currentTickCount - startTickCount >= OptimizationQuantumMs)
+        if (ETW::CompilationLog::TieredCompilation::Runtime::IsEnabled())
         {
-            bool success = false;
-            EX_TRY
+            UINT32 countOfMethodsToOptimize = m_countOfMethodsToOptimize;
+            if (m_isPendingCallCountingCompletion)
             {
-                RequestBackgroundWork();
-                success = true;
+                countOfMethodsToOptimize += CallCountingManager::GetCountOfCodeVersionsPendingCompletion();
             }
-            EX_CATCH
-            {
-                STRESS_LOG1(LF_TIEREDCOMPILATION, LL_WARNING, "TieredCompilationManager::DoBackgroundWork: "
-                    "Exception in RequestBackgroundWork, hr=0x%x\n",
-                    GET_EXCEPTION()->GetHR());
-            }
-            EX_END_CATCH(RethrowTerminalExceptions);
-            if (success)
+            ETW::CompilationLog::TieredCompilation::Runtime::SendBackgroundJitStop(countOfMethodsToOptimize, jittedMethodCount);
+        }
+
+        UINT64 beforeSleepTicks = currentTicks;
+        ClrSleepEx(0, false);
+
+        QueryPerformanceCounter(&li);
+        currentTicks = li.QuadPart;
+
+        // Depending on how oversubscribed thread usage is on the system, the sleep may have caused this thread to not be
+        // scheduled for a long time. Yielding the thread too frequently may significantly slow down the background work, which
+        // may significantly delay how long it takes to reach steady-state performance. On the other hand, yielding the thread
+        // too infrequently may cause the background work to monopolize the available CPU resources and prevent more important
+        // foreground work from occurring. So the sleep duration is measured and for the next batch of background work, at least
+        // a portion of that measured duration is used (within the min and max to keep things sensible). Since the background
+        // work duration is capped to a maximum and since a long sleep delay is likely to repeat, to avoid going back to
+        // too-frequent yielding too quickly, the background work duration is decayed back to the minimum if the sleep duration
+        // becomes consistently short.
+        UINT64 newWorkDurationTicks = (currentTicks - beforeSleepTicks) / 4;
+        UINT64 decayedWorkDurationTicks = (workDurationTicks + workDurationTicks / 2) / 2;
+        workDurationTicks = newWorkDurationTicks < decayedWorkDurationTicks ? decayedWorkDurationTicks : newWorkDurationTicks;
+        if (workDurationTicks < minWorkDurationTicks)
+        {
+            workDurationTicks = minWorkDurationTicks;
+        }
+        else if (workDurationTicks > maxWorkDurationTicks)
+        {
+            workDurationTicks = maxWorkDurationTicks;
+        }
+
+        if (IsTieringDelayActive())
+        {
+            sendStopEvent = false;
+            break;
+        }
+
+        if (ETW::CompilationLog::TieredCompilation::Runtime::IsEnabled())
+        {
+            UINT32 countOfMethodsToOptimize = m_countOfMethodsToOptimize;
+            if (m_isPendingCallCountingCompletion)
             {
-                autoResetIsBackgroundWorkScheduled.Cancel();
-                break;
+                countOfMethodsToOptimize += CallCountingManager::GetCountOfCodeVersionsPendingCompletion();
             }
-
-            startTickCount = currentTickCount;
+            ETW::CompilationLog::TieredCompilation::Runtime::SendBackgroundJitStart(countOfMethodsToOptimize);
         }
-    }
 
-    if (ETW::CompilationLog::TieredCompilation::Runtime::IsEnabled())
+        jittedMethodCount = 0;
+        startTicks = previousTicks = currentTicks;
+    } while (!IsTieringDelayActive());
+
+    if (ETW::CompilationLog::TieredCompilation::Runtime::IsEnabled() && sendStopEvent)
     {
         UINT32 countOfMethodsToOptimize = m_countOfMethodsToOptimize;
         if (m_isPendingCallCountingCompletion)
@@ -793,6 +879,9 @@ void TieredCompilationManager::DoBackgroundWork()
         }
         EX_END_CATCH(RethrowTerminalExceptions);
     }
+
+    *workDurationTicksRef = workDurationTicks;
+    return allMethodsJitted;
 }
 
 // Jit compiles and installs new optimized code for a method.
@@ -1009,8 +1098,6 @@ CORJIT_FLAGS TieredCompilationManager::GetJitFlags(PrepareCodeConfig *config)
     return flags;
 }
 
-CrstStatic TieredCompilationManager::s_lock;
-
 #ifdef _DEBUG
 bool TieredCompilationManager::IsLockOwnedByCurrentThread()
 {
index 003a9e5..3b61be3 100644 (file)
@@ -42,24 +42,36 @@ public:
 public:
     void HandleCallCountingForFirstCall(MethodDesc* pMethodDesc);
     bool TrySetCodeEntryPointAndRecordMethodForCallCounting(MethodDesc* pMethodDesc, PCODE codeEntryPoint);
-    void AsyncPromoteToTier1(NativeCodeVersion tier0NativeCodeVersion, bool *scheduleTieringBackgroundWorkRef);
+    void AsyncPromoteToTier1(NativeCodeVersion tier0NativeCodeVersion, bool *createTieringBackgroundWorkerRef);
     static CORJIT_FLAGS GetJitFlags(PrepareCodeConfig *config);
 
+#if !defined(DACCESS_COMPILE) && defined(_DEBUG)
+public:
+    static Thread *GetBackgroundWorkerThread()
+    {
+        LIMITED_METHOD_CONTRACT;
+        return s_backgroundWorkerThread;
+    }
+#endif
+
+public:
+    static bool TryScheduleBackgroundWorkerWithoutGCTrigger_Locked();
+    static void CreateBackgroundWorker();
+private:
+    static DWORD WINAPI BackgroundWorkerBootstrapper0(LPVOID args);
+    static void BackgroundWorkerBootstrapper1(LPVOID args);
+    void BackgroundWorkerStart();
+
 private:
     bool IsTieringDelayActive();
-    static void WINAPI TieringDelayTimerCallback(PVOID parameter, BOOLEAN timerFired);
-    void DeactivateTieringDelay();
+    bool TryDeactivateTieringDelay();
 
 public:
     void AsyncCompleteCallCounting();
 
-public:
-    void ScheduleBackgroundWork();
 private:
-    void RequestBackgroundWork();
-    static void WINAPI DoBackgroundWorkTimerCallback(PVOID parameter, BOOLEAN timerFired);
     static DWORD StaticBackgroundWorkCallback(void* args);
-    void DoBackgroundWork();
+    bool DoBackgroundWork(UINT64 *workDurationTicksRef, UINT64 minWorkDurationTicks, UINT64 maxWorkDurationTicks);
 
 private:
     void OptimizeMethod(NativeCodeVersion nativeCodeVersion);
@@ -68,9 +80,6 @@ private:
     void ActivateCodeVersion(NativeCodeVersion nativeCodeVersion);
 
 #ifndef DACCESS_COMPILE
-private:
-    static CrstStatic s_lock;
-
 public:
     static void StaticInitialize()
     {
@@ -99,9 +108,17 @@ public:
         LockHolder(const LockHolder &) = delete;
         LockHolder &operator =(const LockHolder &) = delete;
     };
+#endif // !DACCESS_COMPILE
 
+#ifndef DACCESS_COMPILE
 private:
-    class AutoResetIsBackgroundWorkScheduled;
+    static CrstStatic s_lock;
+#ifdef _DEBUG
+    static Thread *s_backgroundWorkerThread;
+#endif
+    static CLREvent s_backgroundWorkAvailableEvent;
+    static bool s_isBackgroundWorkerRunning;
+    static bool s_isBackgroundWorkerProcessingWork;
 #endif // !DACCESS_COMPILE
 
 private:
@@ -109,12 +126,9 @@ private:
     UINT32 m_countOfMethodsToOptimize;
     UINT32 m_countOfNewMethodsCalledDuringDelay;
     SArray<MethodDesc*>* m_methodsPendingCountingForTier1;
-    HANDLE m_tieringDelayTimerHandle;
-    HANDLE m_doBackgroundWorkTimerHandle;
-    bool m_isBackgroundWorkScheduled;
     bool m_tier1CallCountingCandidateMethodRecentlyRecorded;
     bool m_isPendingCallCountingCompletion;
-    bool m_recentlyRequestedCallCountingCompletionAgain;
+    bool m_recentlyRequestedCallCountingCompletion;
 
 #endif // FEATURE_TIERED_COMPILATION
 };