From b8220e45b14ea4174e323b1ca7b74ac2fd128e0e Mon Sep 17 00:00:00 2001 From: Ben Adams Date: Fri, 29 Jul 2016 18:01:33 +0100 Subject: [PATCH] Adjust fences and add padding --- src/vm/comthreadpool.cpp | 7 ++++--- src/vm/threadpoolrequest.cpp | 24 +++++++++++++++--------- src/vm/threadpoolrequest.h | 28 ++++++++++++++++------------ src/vm/win32threadpool.cpp | 16 ++++++++++------ src/vm/win32threadpool.h | 15 ++++++++++----- 5 files changed, 55 insertions(+), 35 deletions(-) diff --git a/src/vm/comthreadpool.cpp b/src/vm/comthreadpool.cpp index e9ffbaf..8231214 100644 --- a/src/vm/comthreadpool.cpp +++ b/src/vm/comthreadpool.cpp @@ -251,6 +251,7 @@ FCIMPL0(FC_BOOL_RET, ThreadPoolNative::NotifyRequestComplete) pThread->HasCriticalRegion() || pThread->HasThreadAffinity(); + // Read fenced call bool shouldAdjustWorkers = ThreadpoolMgr::ShouldAdjustMaxWorkersActive(); // @@ -266,15 +267,15 @@ FCIMPL0(FC_BOOL_RET, ThreadPoolNative::NotifyRequestComplete) { HELPER_METHOD_FRAME_BEGIN_RET_0(); + if (needReset) + pThread->InternalReset(FALSE, TRUE, TRUE, FALSE); + if (shouldAdjustWorkers) { ThreadpoolMgr::AdjustMaxWorkersActive(); tal.Release(); } - if (needReset) - pThread->InternalReset (FALSE, TRUE, TRUE, FALSE); - HELPER_METHOD_FRAME_END(); } diff --git a/src/vm/threadpoolrequest.cpp b/src/vm/threadpoolrequest.cpp index 5b970c8..3b5cb37 100644 --- a/src/vm/threadpoolrequest.cpp +++ b/src/vm/threadpoolrequest.cpp @@ -362,7 +362,7 @@ void UnManagedPerAppDomainTPCount::SetAppDomainRequestsActive() { WRAPPER_NO_CONTRACT; #ifndef DACCESS_COMPILE - LONG count = m_outstandingThreadRequestCount; + LONG count = VolatileLoad(&m_outstandingThreadRequestCount); while (count < (LONG)ThreadpoolMgr::NumberOfProcessors) { LONG prevCount = FastInterlockCompareExchange(&m_outstandingThreadRequestCount, count+1, count); @@ -383,7 +383,7 @@ void UnManagedPerAppDomainTPCount::SetAppDomainRequestsActive() bool FORCEINLINE UnManagedPerAppDomainTPCount::TakeActiveRequest() { LIMITED_METHOD_CONTRACT; - LONG count = m_outstandingThreadRequestCount; + LONG count = VolatileLoad(&m_outstandingThreadRequestCount); while (count > 0) { @@ -602,7 +602,7 @@ void ManagedPerAppDomainTPCount::SetAppDomainRequestsActive() _ASSERTE(m_id.m_dwId != 0); #ifndef DACCESS_COMPILE - LONG count = m_numRequestsPending; + LONG count = VolatileLoad(&m_numRequestsPending); while (count != ADUnloading) { LONG prev = FastInterlockCompareExchange(&m_numRequestsPending, count+1, count); @@ -631,12 +631,15 @@ void ManagedPerAppDomainTPCount::ClearAppDomainRequestsActive(BOOL bADU) if (bADU) { - m_numRequestsPending = ADUnloading; + VolatileStore(&m_numRequestsPending, ADUnloading); } else { - LONG count = m_numRequestsPending; - while (count != ADUnloading && count > 0) + LONG count = VolatileLoad(&m_numRequestsPending); + // Test is: count > 0 && count != ADUnloading + // Since: const ADUnloading == -1 + // Both are tested: (count > 0) means following also true (count != ADUnloading) + while (count > 0) { LONG prev = FastInterlockCompareExchange(&m_numRequestsPending, 0, count); if (prev == count) @@ -649,8 +652,11 @@ void ManagedPerAppDomainTPCount::ClearAppDomainRequestsActive(BOOL bADU) bool ManagedPerAppDomainTPCount::TakeActiveRequest() { LIMITED_METHOD_CONTRACT; - LONG count = m_numRequestsPending; - while (count != ADUnloading && count > 0) + LONG count = VolatileLoad(&m_numRequestsPending); + // Test is: count > 0 && count != ADUnloading + // Since: const ADUnloading == -1 + // Both are tested: (count > 0) means following also true (count != ADUnloading) + while (count > 0) { LONG prev = FastInterlockCompareExchange(&m_numRequestsPending, count-1, count); if (prev == count) @@ -681,7 +687,7 @@ void ManagedPerAppDomainTPCount::ClearAppDomainUnloading() // it should be, but if it's smaller then we will be permanently out of sync with the // AD. // - m_numRequestsPending = ThreadpoolMgr::NumberOfProcessors; + VolatileStore(&m_numRequestsPending, (LONG)ThreadpoolMgr::NumberOfProcessors); if (!CLRThreadpoolHosted() && ThreadpoolMgr::IsInitialized()) { ThreadpoolMgr::MaybeAddWorkingWorker(); diff --git a/src/vm/threadpoolrequest.h b/src/vm/threadpoolrequest.h index 3c332e7..c1ee9e3 100644 --- a/src/vm/threadpoolrequest.h +++ b/src/vm/threadpoolrequest.h @@ -85,14 +85,16 @@ public: inline void ResetState() { LIMITED_METHOD_CONTRACT; - m_numRequestsPending = 0; + VolatileStore(&m_numRequestsPending, (LONG)0); m_id.m_dwId = 0; } inline BOOL IsRequestPending() { LIMITED_METHOD_CONTRACT; - return m_numRequestsPending != ADUnloading && m_numRequestsPending > 0; + + LONG count = VolatileLoad(&m_numRequestsPending); + return count != ADUnloading && count > 0; } void SetAppDomainRequestsActive(); @@ -106,7 +108,7 @@ public: //has started running yet. That implies, no requests should be pending //or dispatched to this structure yet. - _ASSERTE(m_numRequestsPending != ADUnloading); + _ASSERTE(VolatileLoad(&m_numRequestsPending) != ADUnloading); _ASSERTE(m_id.m_dwId == 0); m_id = id; @@ -119,7 +121,7 @@ public: //has started running yet. That implies, no requests should be pending //or dispatched to this structure yet. - _ASSERTE(m_numRequestsPending != ADUnloading); + _ASSERTE(VolatileLoad(&m_numRequestsPending) != ADUnloading); _ASSERTE(m_id.m_dwId == 0); _ASSERTE(m_index.m_dwIndex == UNUSED_THREADPOOL_INDEX); @@ -135,7 +137,7 @@ public: //added removed at this time. So, make sure that the per-appdomain structures that //have been cleared(reclaimed) don't have any pending requests to them. - _ASSERTE(m_numRequestsPending != ADUnloading); + _ASSERTE(VolatileLoad(&m_numRequestsPending) != ADUnloading); _ASSERTE(m_id.m_dwId == 0); return TRUE; @@ -159,14 +161,14 @@ public: inline void SetAppDomainUnloading() { LIMITED_METHOD_CONTRACT; - m_numRequestsPending = ADUnloading; + VolatileStore(&m_numRequestsPending, ADUnloading); } inline void ClearAppDomainUnloading(); inline BOOL IsAppDomainUnloading() { - return m_numRequestsPending.Load() == ADUnloading; + return VolatileLoad(&m_numRequestsPending) == ADUnloading; } void DispatchWorkItem(bool* foundWork, bool* wasNotRecalled); @@ -174,7 +176,8 @@ public: private: struct { BYTE padding1[64]; // padding to ensure own cache line - Volatile m_numRequestsPending; + // Only use with VolatileLoad+VolatileStore+FastInterlockCompareExchange + LONG m_numRequestsPending; BYTE padding2[64]; // padding to ensure own cache line ADID m_id; BYTE padding3[64]; // padding to ensure own cache line @@ -217,13 +220,13 @@ public: { LIMITED_METHOD_CONTRACT; m_NumRequests = 0; - m_outstandingThreadRequestCount = 0; + VolatileStore(&m_outstandingThreadRequestCount, (LONG)0); } inline BOOL IsRequestPending() { LIMITED_METHOD_CONTRACT; - return m_outstandingThreadRequestCount != 0 ? TRUE : FALSE; + return VolatileLoad(&m_outstandingThreadRequestCount) != (LONG)0 ? TRUE : FALSE; } void SetAppDomainRequestsActive(); @@ -231,7 +234,7 @@ public: inline void ClearAppDomainRequestsActive(BOOL bADU) { LIMITED_METHOD_CONTRACT; - m_outstandingThreadRequestCount = 0; + VolatileStore(&m_outstandingThreadRequestCount, (LONG)0); } bool TakeActiveRequest(); @@ -281,7 +284,8 @@ private: BYTE padding1[64]; // padding to ensure own cache line ULONG m_NumRequests; BYTE padding2[64]; // padding to ensure own cache line - Volatile m_outstandingThreadRequestCount; + // Only use with VolatileLoad+VolatileStore+FastInterlockCompareExchange + LONG m_outstandingThreadRequestCount; BYTE padding3[64]; // padding to ensure own cache line SpinLock m_lock; }; diff --git a/src/vm/win32threadpool.cpp b/src/vm/win32threadpool.cpp index 3696ad5..0df0f1e 100644 --- a/src/vm/win32threadpool.cpp +++ b/src/vm/win32threadpool.cpp @@ -95,9 +95,12 @@ LONG ThreadpoolMgr::cpuUtilizationAverage = 0; HillClimbing ThreadpoolMgr::HillClimbingInstance; -Volatile ThreadpoolMgr::PriorCompletedWorkRequests = 0; -Volatile ThreadpoolMgr::PriorCompletedWorkRequestsTime; -Volatile ThreadpoolMgr::NextCompletedWorkRequestsTime; +BYTE ThreadpoolMgr::padding1[64]; +LONG ThreadpoolMgr::PriorCompletedWorkRequests = 0; +DWORD ThreadpoolMgr::PriorCompletedWorkRequestsTime; +DWORD ThreadpoolMgr::NextCompletedWorkRequestsTime; +BYTE ThreadpoolMgr::padding2[64]; + LARGE_INTEGER ThreadpoolMgr::CurrentSampleStartTime; int ThreadpoolMgr::ThreadAdjustmentInterval; @@ -1189,7 +1192,7 @@ void ThreadpoolMgr::AdjustMaxWorkersActive() DWORD currentTicks = GetTickCount(); LONG totalNumCompletions = Thread::GetTotalThreadPoolCompletionCount(); - LONG numCompletions = totalNumCompletions - PriorCompletedWorkRequests; + LONG numCompletions = totalNumCompletions - VolatileLoad(&PriorCompletedWorkRequests); LARGE_INTEGER startTime = CurrentSampleStartTime; LARGE_INTEGER endTime; @@ -1251,9 +1254,10 @@ void ThreadpoolMgr::AdjustMaxWorkersActive() } } - PriorCompletedWorkRequests = totalNumCompletions; + // Memory fences below writes + VolatileStore(&PriorCompletedWorkRequests, totalNumCompletions); PriorCompletedWorkRequestsTime = currentTicks; - NextCompletedWorkRequestsTime = PriorCompletedWorkRequestsTime + ThreadAdjustmentInterval; + NextCompletedWorkRequestsTime = currentTicks + ThreadAdjustmentInterval; CurrentSampleStartTime = endTime; } } diff --git a/src/vm/win32threadpool.h b/src/vm/win32threadpool.h index 5574512..39c53a1 100644 --- a/src/vm/win32threadpool.h +++ b/src/vm/win32threadpool.h @@ -1139,8 +1139,9 @@ public: if (CLRThreadpoolHosted()) return false; - DWORD requiredInterval = NextCompletedWorkRequestsTime - PriorCompletedWorkRequestsTime; - DWORD elapsedInterval = GetTickCount() - PriorCompletedWorkRequestsTime; + DWORD priorTime = PriorCompletedWorkRequestsTime; + DWORD requiredInterval = VolatileLoad(&NextCompletedWorkRequestsTime) - priorTime; // fences above read + DWORD elapsedInterval = GetTickCount() - priorTime; if (elapsedInterval >= requiredInterval) { ThreadCounter::Counts counts = WorkerCounter.GetCleanCounts(); @@ -1304,9 +1305,13 @@ private: static HillClimbing HillClimbingInstance; - static Volatile PriorCompletedWorkRequests; - static Volatile PriorCompletedWorkRequestsTime; - static Volatile NextCompletedWorkRequestsTime; + static BYTE padding1[64]; // padding to ensure own cache line + + static LONG PriorCompletedWorkRequests; + static DWORD PriorCompletedWorkRequestsTime; + static DWORD NextCompletedWorkRequestsTime; + + static BYTE padding2[64]; // padding to ensure own cache line static LARGE_INTEGER CurrentSampleStartTime; -- 2.7.4