}
// This is done lazily because the first call to the function below in the process triggers a measurement that
- // takes a nontrivial amount of time. See Thread::InitializeYieldProcessorNormalized(), which describes and
- // calculates this value.
+ // takes a nontrivial amount of time if the measurement has not already been done in the backgorund.
+ // See Thread::InitializeYieldProcessorNormalized(), which describes and calculates this value.
s_optimalMaxSpinWaitsPerSpinIteration = GetOptimalMaxSpinWaitsPerSpinIterationInternal();
Debug.Assert(s_optimalMaxSpinWaitsPerSpinIteration > 0);
return s_optimalMaxSpinWaitsPerSpinIteration;
BEGIN_QCALL;
- Thread::EnsureYieldProcessorNormalizedInitialized();
- optimalMaxNormalizedYieldsPerSpinIteration = Thread::GetOptimalMaxNormalizedYieldsPerSpinIteration();
+ // RuntimeThread calls this function only once lazily and caches the result, so ensure initialization
+ EnsureYieldProcessorNormalizedInitialized();
+ optimalMaxNormalizedYieldsPerSpinIteration = g_optimalMaxNormalizedYieldsPerSpinIteration;
END_QCALL;
// spinning for less than that number of cycles, then switching to preemptive
// mode won't help a GC start any faster.
//
- if (iterations <= 100000 && Thread::IsYieldProcessorNormalizedInitialized())
+ if (iterations <= 100000)
{
+ YieldProcessorNormalizationInfo normalizationInfo;
for (int i = 0; i < iterations; i++)
- Thread::YieldProcessorNormalized();
+ YieldProcessorNormalized(normalizationInfo);
return;
}
HELPER_METHOD_FRAME_BEGIN_NOPOLL();
GCX_PREEMP();
- Thread::EnsureYieldProcessorNormalizedInitialized();
+ YieldProcessorNormalizationInfo normalizationInfo;
for (int i = 0; i < iterations; i++)
- Thread::YieldProcessorNormalized();
+ YieldProcessorNormalized(normalizationInfo);
HELPER_METHOD_FRAME_END();
}
#endif
GetFinalizerThread()->SetBackground(TRUE);
+ EnsureYieldProcessorNormalizedInitialized();
+
#ifdef FEATURE_PROFAPI_ATTACH_DETACH
// Add the Profiler Attach Event to the array of event handles that the
// finalizer thread waits on. If the process is not enabled for profiler
CONTEXT *ThreadStore::s_pOSContext = NULL;
CLREvent *ThreadStore::s_pWaitForStackCrawlEvent;
+static CrstStatic s_initializeYieldProcessorNormalizedCrst;
+
#ifndef DACCESS_COMPILE
}
CONTRACTL_END;
- Thread::s_initializeYieldProcessorNormalizedCrst.Init(CrstLeafLock);
+ s_initializeYieldProcessorNormalizedCrst.Init(CrstLeafLock);
// All patched helpers should fit into one page.
// If you hit this assert on retail build, there is most likely problem with BBT script.
}
#endif // FEATURE_APPDOMAIN_RESOURCE_MONITORING
-CrstStatic Thread::s_initializeYieldProcessorNormalizedCrst;
-int Thread::s_yieldsPerNormalizedYield = 0;
-int Thread::s_optimalMaxNormalizedYieldsPerSpinIteration = 0;
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// YieldProcessorNormalized
+
+// Defaults are for when InitializeYieldProcessorNormalized has not yet been called or when no measurement is done, and are
+// tuned for Skylake processors
+int g_yieldsPerNormalizedYield = 1; // current value is for Skylake processors, this would be 9 for pre-Skylake
+int g_optimalMaxNormalizedYieldsPerSpinIteration = 7;
+
+static Volatile<bool> s_isYieldProcessorNormalizedInitialized = false;
-void Thread::InitializeYieldProcessorNormalized()
+void InitializeYieldProcessorNormalized()
{
LIMITED_METHOD_CONTRACT;
CrstHolder lock(&s_initializeYieldProcessorNormalizedCrst);
- if (IsYieldProcessorNormalizedInitialized())
+ if (s_isYieldProcessorNormalizedInitialized)
{
return;
}
// Intel pre-Skylake processor: measured typically 14-17 cycles per yield
// Intel post-Skylake processor: measured typically 125-150 cycles per yield
- const int DefaultYieldsPerNormalizedYield = 1; // defaults are for when no measurement is done
- const int DefaultOptimalMaxNormalizedYieldsPerSpinIteration = 64; // tuned for pre-Skylake processors, for post-Skylake it should be 7
const int MeasureDurationMs = 10;
const int MaxYieldsPerNormalizedYield = 10; // measured typically 8-9 on pre-Skylake
const int MinNsPerNormalizedYield = 37; // measured typically 37-46 on post-Skylake
if (!QueryPerformanceFrequency(&li) || (ULONGLONG)li.QuadPart < 1000 / MeasureDurationMs)
{
// High precision clock not available or clock resolution is too low, resort to defaults
- s_yieldsPerNormalizedYield = DefaultYieldsPerNormalizedYield;
- s_optimalMaxNormalizedYieldsPerSpinIteration = DefaultOptimalMaxNormalizedYieldsPerSpinIteration;
+ s_isYieldProcessorNormalizedInitialized = true;
return;
}
ULONGLONG ticksPerSecond = li.QuadPart;
ULONGLONG elapsedTicks;
do
{
- for (int i = 0; i < 10; ++i)
+ // On some systems, querying the high performance counter has relatively significant overhead. Do enough yields to mask
+ // the timing overhead. Assuming one yield has a delay of MinNsPerNormalizedYield, 1000 yields would have a delay in the
+ // low microsecond range.
+ for (int i = 0; i < 1000; ++i)
{
YieldProcessor();
}
- yieldCount += 10;
+ yieldCount += 1000;
QueryPerformanceCounter(&li);
ULONGLONG nowTicks = li.QuadPart;
optimalMaxNormalizedYieldsPerSpinIteration = 1;
}
- s_yieldsPerNormalizedYield = yieldsPerNormalizedYield;
- s_optimalMaxNormalizedYieldsPerSpinIteration = optimalMaxNormalizedYieldsPerSpinIteration;
+ g_yieldsPerNormalizedYield = yieldsPerNormalizedYield;
+ g_optimalMaxNormalizedYieldsPerSpinIteration = optimalMaxNormalizedYieldsPerSpinIteration;
+ s_isYieldProcessorNormalizedInitialized = true;
+}
+
+void EnsureYieldProcessorNormalizedInitialized()
+{
+ WRAPPER_NO_CONTRACT;
+
+ if (!s_isYieldProcessorNormalizedInitialized)
+ {
+ InitializeYieldProcessorNormalized();
+ }
}
m_HijackReturnKind = returnKind;
}
#endif // FEATURE_HIJACK
-
-private:
- static CrstStatic s_initializeYieldProcessorNormalizedCrst;
- static int s_yieldsPerNormalizedYield;
- static int s_optimalMaxNormalizedYieldsPerSpinIteration;
-
-private:
- static void InitializeYieldProcessorNormalized();
-
-public:
- static bool IsYieldProcessorNormalizedInitialized()
- {
- LIMITED_METHOD_CONTRACT;
- return s_yieldsPerNormalizedYield != 0 && s_optimalMaxNormalizedYieldsPerSpinIteration != 0;
- }
-
-public:
- static void EnsureYieldProcessorNormalizedInitialized()
- {
- LIMITED_METHOD_CONTRACT;
-
- if (!IsYieldProcessorNormalizedInitialized())
- {
- InitializeYieldProcessorNormalized();
- }
- }
-
-public:
- static int GetOptimalMaxNormalizedYieldsPerSpinIteration()
- {
- WRAPPER_NO_CONTRACT;
- _ASSERTE(IsYieldProcessorNormalizedInitialized());
-
- return s_optimalMaxNormalizedYieldsPerSpinIteration;
- }
-
-public:
- static void YieldProcessorNormalized()
- {
- WRAPPER_NO_CONTRACT;
- _ASSERTE(IsYieldProcessorNormalizedInitialized());
-
- int n = s_yieldsPerNormalizedYield;
- while (--n >= 0)
- {
- YieldProcessor();
- }
- }
-
- static void YieldProcessorNormalizedWithBackOff(unsigned int spinIteration)
- {
- WRAPPER_NO_CONTRACT;
- _ASSERTE(IsYieldProcessorNormalizedInitialized());
-
- int n = s_optimalMaxNormalizedYieldsPerSpinIteration;
- if (spinIteration <= 30 && (1 << spinIteration) < n)
- {
- n = 1 << spinIteration;
- }
- n *= s_yieldsPerNormalizedYield;
- while (--n >= 0)
- {
- YieldProcessor();
- }
- }
};
// End of class Thread
BOOL Debug_IsLockedViaThreadSuspension();
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+// YieldProcessorNormalized
+
+extern int g_yieldsPerNormalizedYield;
+extern int g_optimalMaxNormalizedYieldsPerSpinIteration;
+
+void InitializeYieldProcessorNormalized();
+void EnsureYieldProcessorNormalizedInitialized();
+
+class YieldProcessorNormalizationInfo
+{
+private:
+ int yieldsPerNormalizedYield;
+
+public:
+ YieldProcessorNormalizationInfo() : yieldsPerNormalizedYield(g_yieldsPerNormalizedYield)
+ {
+ }
+
+ friend void YieldProcessorNormalized(const YieldProcessorNormalizationInfo &);
+};
+
+FORCEINLINE void YieldProcessorNormalized(const YieldProcessorNormalizationInfo &normalizationInfo)
+{
+ LIMITED_METHOD_CONTRACT;
+
+ int n = normalizationInfo.yieldsPerNormalizedYield;
+ while (--n >= 0)
+ {
+ YieldProcessor();
+ }
+}
+
+class YieldProcessorWithBackOffNormalizationInfo
+{
+private:
+ int yieldsPerNormalizedYield;
+ int optimalMaxNormalizedYieldsPerSpinIteration;
+ int optimalMaxYieldsPerSpinIteration;
+
+public:
+ YieldProcessorWithBackOffNormalizationInfo()
+ : yieldsPerNormalizedYield(g_yieldsPerNormalizedYield),
+ optimalMaxNormalizedYieldsPerSpinIteration(g_optimalMaxNormalizedYieldsPerSpinIteration),
+ optimalMaxYieldsPerSpinIteration(yieldsPerNormalizedYield * optimalMaxNormalizedYieldsPerSpinIteration)
+ {
+ }
+
+ friend void YieldProcessorWithBackOffNormalized(const YieldProcessorWithBackOffNormalizationInfo &, unsigned int);
+};
+
+FORCEINLINE void YieldProcessorWithBackOffNormalized(
+ const YieldProcessorWithBackOffNormalizationInfo &normalizationInfo,
+ unsigned int spinIteration)
+{
+ LIMITED_METHOD_CONTRACT;
+
+ int n;
+ if (spinIteration <= 30 && (1 << spinIteration) < normalizationInfo.optimalMaxNormalizedYieldsPerSpinIteration)
+ {
+ n = (1 << spinIteration) * normalizationInfo.yieldsPerNormalizedYield;
+ }
+ else
+ {
+ n = normalizationInfo.optimalMaxYieldsPerSpinIteration;
+ }
+ while (--n >= 0)
+ {
+ YieldProcessor();
+ }
+}
+
#endif //__threads_h__