Adjust fences and add padding
authorBen Adams <thundercat@illyriad.co.uk>
Fri, 29 Jul 2016 17:01:33 +0000 (18:01 +0100)
committerBen Adams <thundercat@illyriad.co.uk>
Tue, 9 Aug 2016 20:00:07 +0000 (21:00 +0100)
Commit migrated from https://github.com/dotnet/coreclr/commit/b8220e45b14ea4174e323b1ca7b74ac2fd128e0e

src/coreclr/src/vm/comthreadpool.cpp
src/coreclr/src/vm/threadpoolrequest.cpp
src/coreclr/src/vm/threadpoolrequest.h
src/coreclr/src/vm/win32threadpool.cpp
src/coreclr/src/vm/win32threadpool.h

index e9ffbaf..8231214 100644 (file)
@@ -251,6 +251,7 @@ FCIMPL0(FC_BOOL_RET, ThreadPoolNative::NotifyRequestComplete)
         pThread->HasCriticalRegion() ||
         pThread->HasThreadAffinity();
 
+    // Read fenced call
     bool shouldAdjustWorkers = ThreadpoolMgr::ShouldAdjustMaxWorkersActive();
 
     // 
@@ -266,15 +267,15 @@ FCIMPL0(FC_BOOL_RET, ThreadPoolNative::NotifyRequestComplete)
     {
         HELPER_METHOD_FRAME_BEGIN_RET_0();
 
+        if (needReset)
+            pThread->InternalReset(FALSE, TRUE, TRUE, FALSE);
+
         if (shouldAdjustWorkers)
         {
             ThreadpoolMgr::AdjustMaxWorkersActive();
             tal.Release();
         }
 
-        if (needReset)
-            pThread->InternalReset (FALSE, TRUE, TRUE, FALSE);
-
         HELPER_METHOD_FRAME_END();    
     }
 
index 5b970c8..3b5cb37 100644 (file)
@@ -362,7 +362,7 @@ void UnManagedPerAppDomainTPCount::SetAppDomainRequestsActive()
 {
     WRAPPER_NO_CONTRACT;
 #ifndef DACCESS_COMPILE
-    LONG count = m_outstandingThreadRequestCount;
+    LONG count = VolatileLoad(&m_outstandingThreadRequestCount);
     while (count < (LONG)ThreadpoolMgr::NumberOfProcessors)
     {
         LONG prevCount = FastInterlockCompareExchange(&m_outstandingThreadRequestCount, count+1, count);
@@ -383,7 +383,7 @@ void UnManagedPerAppDomainTPCount::SetAppDomainRequestsActive()
 bool FORCEINLINE UnManagedPerAppDomainTPCount::TakeActiveRequest()
 {
     LIMITED_METHOD_CONTRACT;
-    LONG count = m_outstandingThreadRequestCount;
+    LONG count = VolatileLoad(&m_outstandingThreadRequestCount);
 
     while (count > 0)
     {
@@ -602,7 +602,7 @@ void ManagedPerAppDomainTPCount::SetAppDomainRequestsActive()
     _ASSERTE(m_id.m_dwId != 0);
 
 #ifndef DACCESS_COMPILE
-        LONG count = m_numRequestsPending;
+        LONG count = VolatileLoad(&m_numRequestsPending);
         while (count != ADUnloading)
         {
             LONG prev = FastInterlockCompareExchange(&m_numRequestsPending, count+1, count);
@@ -631,12 +631,15 @@ void ManagedPerAppDomainTPCount::ClearAppDomainRequestsActive(BOOL bADU)
 
     if (bADU)
     {
-        m_numRequestsPending = ADUnloading;
+        VolatileStore(&m_numRequestsPending, ADUnloading);
     }
     else
     {
-        LONG count = m_numRequestsPending;
-        while (count != ADUnloading && count > 0)
+        LONG count = VolatileLoad(&m_numRequestsPending);
+        // Test is: count > 0 && count != ADUnloading
+        // Since:   const ADUnloading == -1
+        // Both are tested: (count > 0) means following also true  (count != ADUnloading)
+        while (count > 0)
         {
             LONG prev = FastInterlockCompareExchange(&m_numRequestsPending, 0, count);
             if (prev == count)
@@ -649,8 +652,11 @@ void ManagedPerAppDomainTPCount::ClearAppDomainRequestsActive(BOOL bADU)
 bool ManagedPerAppDomainTPCount::TakeActiveRequest()
 {
     LIMITED_METHOD_CONTRACT;
-    LONG count = m_numRequestsPending;
-    while (count != ADUnloading && count > 0)
+    LONG count = VolatileLoad(&m_numRequestsPending);
+    // Test is: count > 0 && count != ADUnloading
+    // Since:   const ADUnloading == -1
+    // Both are tested: (count > 0) means following also true (count != ADUnloading)
+    while (count > 0)
     {
         LONG prev = FastInterlockCompareExchange(&m_numRequestsPending, count-1, count);
         if (prev == count)
@@ -681,7 +687,7 @@ void ManagedPerAppDomainTPCount::ClearAppDomainUnloading()
     // it should be, but if it's smaller then we will be permanently out of sync with the
     // AD.
     //
-    m_numRequestsPending = ThreadpoolMgr::NumberOfProcessors;
+    VolatileStore(&m_numRequestsPending, (LONG)ThreadpoolMgr::NumberOfProcessors);
     if (!CLRThreadpoolHosted() && ThreadpoolMgr::IsInitialized())
     {
         ThreadpoolMgr::MaybeAddWorkingWorker();
index 3c332e7..c1ee9e3 100644 (file)
@@ -85,14 +85,16 @@ public:
     inline void ResetState()
     {
         LIMITED_METHOD_CONTRACT;
-        m_numRequestsPending = 0;
+        VolatileStore(&m_numRequestsPending, (LONG)0);
         m_id.m_dwId = 0;
     }
     
     inline BOOL IsRequestPending()
     {
         LIMITED_METHOD_CONTRACT;
-        return m_numRequestsPending != ADUnloading && m_numRequestsPending > 0;
+
+        LONG count = VolatileLoad(&m_numRequestsPending);
+        return count != ADUnloading && count > 0;
     }
 
     void SetAppDomainRequestsActive();
@@ -106,7 +108,7 @@ public:
         //has started running yet. That implies, no requests should be pending
         //or dispatched to this structure yet.
 
-        _ASSERTE(m_numRequestsPending != ADUnloading);
+        _ASSERTE(VolatileLoad(&m_numRequestsPending) != ADUnloading);
         _ASSERTE(m_id.m_dwId == 0);
 
         m_id = id;
@@ -119,7 +121,7 @@ public:
         //has started running yet. That implies, no requests should be pending
         //or dispatched to this structure yet.
 
-        _ASSERTE(m_numRequestsPending != ADUnloading);
+        _ASSERTE(VolatileLoad(&m_numRequestsPending) != ADUnloading);
         _ASSERTE(m_id.m_dwId == 0);
         _ASSERTE(m_index.m_dwIndex == UNUSED_THREADPOOL_INDEX);
 
@@ -135,7 +137,7 @@ public:
             //added removed at this time. So, make sure that the per-appdomain structures that 
             //have been cleared(reclaimed) don't have any pending requests to them.
 
-            _ASSERTE(m_numRequestsPending != ADUnloading);
+            _ASSERTE(VolatileLoad(&m_numRequestsPending) != ADUnloading);
             _ASSERTE(m_id.m_dwId == 0);
 
             return TRUE;
@@ -159,14 +161,14 @@ public:
     inline void SetAppDomainUnloading()
     {
         LIMITED_METHOD_CONTRACT;
-        m_numRequestsPending = ADUnloading;
+        VolatileStore(&m_numRequestsPending, ADUnloading);
     }
 
     inline void ClearAppDomainUnloading();
 
     inline BOOL IsAppDomainUnloading()
     {
-        return m_numRequestsPending.Load() == ADUnloading;
+        return VolatileLoad(&m_numRequestsPending) == ADUnloading;
     }
 
     void DispatchWorkItem(bool* foundWork, bool* wasNotRecalled);
@@ -174,7 +176,8 @@ public:
 private:
     struct {
         BYTE padding1[64]; // padding to ensure own cache line
-        Volatile<LONG> m_numRequestsPending;
+        // Only use with VolatileLoad+VolatileStore+FastInterlockCompareExchange
+        LONG m_numRequestsPending;
         BYTE padding2[64]; // padding to ensure own cache line
         ADID m_id;
         BYTE padding3[64]; // padding to ensure own cache line
@@ -217,13 +220,13 @@ public:
     {
         LIMITED_METHOD_CONTRACT;
         m_NumRequests = 0;
-        m_outstandingThreadRequestCount = 0;
+        VolatileStore(&m_outstandingThreadRequestCount, (LONG)0);
     }
 
     inline BOOL IsRequestPending()
     {
         LIMITED_METHOD_CONTRACT;
-        return m_outstandingThreadRequestCount != 0 ? TRUE : FALSE;
+        return VolatileLoad(&m_outstandingThreadRequestCount) != (LONG)0 ? TRUE : FALSE;
     }
 
     void SetAppDomainRequestsActive();
@@ -231,7 +234,7 @@ public:
     inline void ClearAppDomainRequestsActive(BOOL bADU)
     {
         LIMITED_METHOD_CONTRACT;
-        m_outstandingThreadRequestCount = 0;
+        VolatileStore(&m_outstandingThreadRequestCount, (LONG)0);
     }
 
     bool TakeActiveRequest();
@@ -281,7 +284,8 @@ private:
         BYTE padding1[64]; // padding to ensure own cache line
         ULONG m_NumRequests;
         BYTE padding2[64]; // padding to ensure own cache line
-        Volatile<LONG> m_outstandingThreadRequestCount;
+        // Only use with VolatileLoad+VolatileStore+FastInterlockCompareExchange
+        LONG m_outstandingThreadRequestCount;
         BYTE padding3[64]; // padding to ensure own cache line
         SpinLock m_lock;
     };
index 3696ad5..0df0f1e 100644 (file)
@@ -95,9 +95,12 @@ LONG    ThreadpoolMgr::cpuUtilizationAverage = 0;
 
 HillClimbing ThreadpoolMgr::HillClimbingInstance;
 
-Volatile<LONG> ThreadpoolMgr::PriorCompletedWorkRequests = 0;
-Volatile<DWORD> ThreadpoolMgr::PriorCompletedWorkRequestsTime;
-Volatile<DWORD> ThreadpoolMgr::NextCompletedWorkRequestsTime;
+BYTE ThreadpoolMgr::padding1[64];
+LONG ThreadpoolMgr::PriorCompletedWorkRequests = 0;
+DWORD ThreadpoolMgr::PriorCompletedWorkRequestsTime;
+DWORD ThreadpoolMgr::NextCompletedWorkRequestsTime;
+BYTE ThreadpoolMgr::padding2[64];
+
 LARGE_INTEGER ThreadpoolMgr::CurrentSampleStartTime;
 
 int ThreadpoolMgr::ThreadAdjustmentInterval;
@@ -1189,7 +1192,7 @@ void ThreadpoolMgr::AdjustMaxWorkersActive()
 
     DWORD currentTicks = GetTickCount();
     LONG totalNumCompletions = Thread::GetTotalThreadPoolCompletionCount();
-    LONG numCompletions = totalNumCompletions - PriorCompletedWorkRequests;
+    LONG numCompletions = totalNumCompletions - VolatileLoad(&PriorCompletedWorkRequests);
 
     LARGE_INTEGER startTime = CurrentSampleStartTime;
     LARGE_INTEGER endTime;
@@ -1251,9 +1254,10 @@ void ThreadpoolMgr::AdjustMaxWorkersActive()
             }
         }
 
-        PriorCompletedWorkRequests = totalNumCompletions;
+        // Memory fences below writes
+        VolatileStore(&PriorCompletedWorkRequests, totalNumCompletions);
         PriorCompletedWorkRequestsTime = currentTicks;
-        NextCompletedWorkRequestsTime = PriorCompletedWorkRequestsTime + ThreadAdjustmentInterval;
+        NextCompletedWorkRequestsTime = currentTicks + ThreadAdjustmentInterval;
         CurrentSampleStartTime = endTime;
     }
 }
index 5574512..39c53a1 100644 (file)
@@ -1139,8 +1139,9 @@ public:
         if (CLRThreadpoolHosted())
             return false;
 
-        DWORD requiredInterval = NextCompletedWorkRequestsTime - PriorCompletedWorkRequestsTime;
-        DWORD elapsedInterval = GetTickCount() - PriorCompletedWorkRequestsTime;
+        DWORD priorTime = PriorCompletedWorkRequestsTime; 
+        DWORD requiredInterval = VolatileLoad(&NextCompletedWorkRequestsTime) - priorTime; // fences above read
+        DWORD elapsedInterval = GetTickCount() - priorTime;
         if (elapsedInterval >= requiredInterval)
         {
             ThreadCounter::Counts counts = WorkerCounter.GetCleanCounts();
@@ -1304,9 +1305,13 @@ private:
     
     static HillClimbing HillClimbingInstance;
 
-    static Volatile<LONG> PriorCompletedWorkRequests;
-    static Volatile<DWORD> PriorCompletedWorkRequestsTime;
-    static Volatile<DWORD> NextCompletedWorkRequestsTime;
+    static BYTE padding1[64]; // padding to ensure own cache line
+
+    static LONG PriorCompletedWorkRequests;
+    static DWORD PriorCompletedWorkRequestsTime;
+    static DWORD NextCompletedWorkRequestsTime;
+
+    static BYTE padding2[64]; // padding to ensure own cache line
 
     static LARGE_INTEGER CurrentSampleStartTime;