[release/6.0] Fix stress issues around multiple threads throwing the same exceptions...
authorgithub-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Mon, 23 Aug 2021 22:55:48 +0000 (15:55 -0700)
committerGitHub <noreply@github.com>
Mon, 23 Aug 2021 22:55:48 +0000 (15:55 -0700)
* Fix stress issues around multiple threads throwing the same exceptions - The watson codebase manipulates the state of the following fields on Exception in a lock-free manner without locks if there are multiple threads throwing the same exception - _stackTrace - _stackTraceString - _remoteStackTraceString - _watsonBuckets - _ipForWatsonBuckets - The designed behavior is that these apis should "mostly" be correct, but as they are only used for fatal shutdown scenarios, exact correctness is not required for correct program execution - However, there are some race conditions that have been seen recently in testing 1. In some circumstances, the value will be explicitly read from multiple times, where the first read is to check for NULL, and then a second read is to read the actual value and use it in some way. In the presence of a race which sets the value to NULL, the runtime can crash. To fix this, the code is refactored in cases which could lead to crashes with a single read, and carrying around the read value to where it needs to go. 2. Since the C++ memory model generally allows a single read written in C++ to be converted into multiple reads if the compiler can prove that the read does not cross a lock/memory barrier, it is possible for the C++ compiler to inject multiple reads where the logic naturally only has 1. The fix for this is to utlilize the VolatileLoadWithoutBarrier api to specify that a read should happen once in cases where it might cause a problem.

Finally, the test45929 was tended to fail in GC stress as it would take a very long time to run under GC stress or on some hardware. Adjust it so that it shuts down after about 2.5 minutes.
- Do this instead of disabling running under gcstress as there is evidence that there may have been bugs seen during runs under gcstress.

Fixes #46803

* Rename as per suggestion

Co-authored-by: David Wrighton <davidwr@microsoft.com>
src/coreclr/debug/daccess/request.cpp
src/coreclr/vm/excep.cpp
src/coreclr/vm/exstatecommon.h
src/coreclr/vm/object.cpp
src/coreclr/vm/object.h
src/coreclr/vm/vars.hpp
src/tests/Regressions/coreclr/GitHub_45929/test45929.cs

index f075dbe..6ffcc76 100644 (file)
@@ -3842,11 +3842,11 @@ HRESULT ClrDataAccess::GetClrWatsonBucketsWorker(Thread * pThread, GenericModeBl
         if (oThrowable != NULL)
         {
             // Does the throwable have buckets?
-            if (((EXCEPTIONREF)oThrowable)->AreWatsonBucketsPresent())
+            U1ARRAYREF refWatsonBucketArray = ((EXCEPTIONREF)oThrowable)->GetWatsonBucketReference();
+            if (refWatsonBucketArray != NULL)
             {
                 // Get the watson buckets from the throwable for non-preallocated
                 // exceptions
-                U1ARRAYREF refWatsonBucketArray = ((EXCEPTIONREF)oThrowable)->GetWatsonBucketReference();
                 pBuckets = dac_cast<PTR_VOID>(refWatsonBucketArray->GetDataPtr());
             }
             else
index 74a1f9f..5249ea7 100644 (file)
@@ -9096,6 +9096,7 @@ void SetupWatsonBucketsForUEF(BOOL fUseLastThrownObject)
     struct
     {
         OBJECTREF oThrowable;
+        U1ARRAYREF oBuckets;
     } gc;
     ZeroMemory(&gc, sizeof(gc));
     GCPROTECT_BEGIN(gc);
@@ -9197,9 +9198,10 @@ void SetupWatsonBucketsForUEF(BOOL fUseLastThrownObject)
                 SetupWatsonBucketsForNonPreallocatedExceptions(gc.oThrowable);
             }
 
-            if (((EXCEPTIONREF)gc.oThrowable)->AreWatsonBucketsPresent())
+            gc.oBuckets = ((EXCEPTIONREF)gc.oThrowable)->GetWatsonBucketReference();
+            if (gc.oBuckets != NULL)
             {
-                pUEWatsonBucketTracker->CopyBucketsFromThrowable(gc.oThrowable);
+                pUEWatsonBucketTracker->CopyBuckets(gc.oBuckets);
             }
 
             if (pUEWatsonBucketTracker->RetrieveWatsonBuckets() == NULL)
@@ -9519,6 +9521,7 @@ BOOL SetupWatsonBucketsForFailFast(EXCEPTIONREF refException)
     {
         OBJECTREF refException;
         OBJECTREF oInnerMostExceptionThrowable;
+        U1ARRAYREF oBuckets;
     } gc;
     ZeroMemory(&gc, sizeof(gc));
     GCPROTECT_BEGIN(gc);
@@ -9669,10 +9672,11 @@ BOOL SetupWatsonBucketsForFailFast(EXCEPTIONREF refException)
                 }
 
                 // If it has the buckets, copy them over to the current Watson bucket tracker
-                if (((EXCEPTIONREF)gc.oInnerMostExceptionThrowable)->AreWatsonBucketsPresent())
+                gc.oBuckets = ((EXCEPTIONREF)gc.oInnerMostExceptionThrowable)->GetWatsonBucketReference();
+                if (gc.oBuckets != NULL)
                 {
                     pUEWatsonBucketTracker->ClearWatsonBucketDetails();
-                    pUEWatsonBucketTracker->CopyBucketsFromThrowable(gc.oInnerMostExceptionThrowable);
+                    pUEWatsonBucketTracker->CopyBuckets(gc.oBuckets);
                     if (pUEWatsonBucketTracker->RetrieveWatsonBuckets() != NULL)
                     {
                         LOG((LF_EH, LL_INFO1000, "SetupWatsonBucketsForFailFast - Got watson buckets from regular innermost exception.\n"));
@@ -9711,11 +9715,12 @@ BOOL SetupWatsonBucketsForFailFast(EXCEPTIONREF refException)
                 SetupWatsonBucketsForNonPreallocatedExceptions(gc.refException);
             }
 
-            if (((EXCEPTIONREF)gc.refException)->AreWatsonBucketsPresent())
+            gc.oBuckets = ((EXCEPTIONREF)gc.refException)->GetWatsonBucketReference();
+            if (gc.oBuckets != NULL)
             {
                 // Copy the buckets to the current watson bucket tracker
                 pUEWatsonBucketTracker->ClearWatsonBucketDetails();
-                pUEWatsonBucketTracker->CopyBucketsFromThrowable(gc.refException);
+                pUEWatsonBucketTracker->CopyBuckets(gc.oBuckets);
                 if (pUEWatsonBucketTracker->RetrieveWatsonBuckets() != NULL)
                 {
                     LOG((LF_EH, LL_INFO1000, "SetupWatsonBucketsForFailFast - Watson buckets copied from the exception object.\n"));
@@ -9950,6 +9955,9 @@ void SetupInitialThrowBucketDetails(UINT_PTR adjustedIp)
                     EX_TRY
                     {
                         CopyWatsonBucketsToThrowable(pUEWatsonBucketTracker->RetrieveWatsonBuckets());
+
+                        // Technically this assert can fail, as another thread could clear the buckets after
+                        // CopyWatsonBucketsToThrowable but before the assert runs, but it is very unlikely.
                         _ASSERTE(((EXCEPTIONREF)gc.oCurrentThrowable)->AreWatsonBucketsPresent());
                     }
                     EX_CATCH
@@ -10686,7 +10694,7 @@ void EHWatsonBucketTracker::Init()
 
 // This method copies the bucketing details from the specified throwable
 // to the current Watson Bucket tracker.
-void EHWatsonBucketTracker::CopyBucketsFromThrowable(OBJECTREF oThrowable)
+void EHWatsonBucketTracker::CopyBuckets(U1ARRAYREF oBuckets)
 {
 #ifndef DACCESS_COMPILE
     CONTRACTL
@@ -10694,8 +10702,7 @@ void EHWatsonBucketTracker::CopyBucketsFromThrowable(OBJECTREF oThrowable)
         NOTHROW;
         GC_NOTRIGGER;
         MODE_ANY;
-        PRECONDITION(oThrowable != NULL);
-        PRECONDITION(((EXCEPTIONREF)oThrowable)->AreWatsonBucketsPresent());
+        PRECONDITION(oBuckets != NULL);
         PRECONDITION(IsWatsonEnabled());
     }
     CONTRACTL_END;
@@ -10704,16 +10711,16 @@ void EHWatsonBucketTracker::CopyBucketsFromThrowable(OBJECTREF oThrowable)
 
     struct
     {
-        OBJECTREF oFrom;
+        U1ARRAYREF oFromBuckets;
     } _gc;
 
     ZeroMemory(&_gc, sizeof(_gc));
     GCPROTECT_BEGIN(_gc);
 
-    _gc.oFrom = oThrowable;
+    _gc.oFromBuckets = oBuckets;
 
-    LOG((LF_EH, LL_INFO1000, "EHWatsonBucketTracker::CopyEHWatsonBucketTracker - Copying bucketing details from throwable (%p) to tracker (%p)\n",
-                            OBJECTREFToObject(_gc.oFrom), this));
+    LOG((LF_EH, LL_INFO1000, "EHWatsonBucketTracker::CopyEHWatsonBucketTracker - Copying bucketing details from bucket (%p) to tracker (%p)\n",
+                            OBJECTREFToObject(_gc.oFromBuckets), this));
 
     // Watson bucket is a "GenericModeBlock" type. Set up an empty GenericModeBlock
     // to hold the bucket parameters.
@@ -10728,8 +10735,7 @@ void EHWatsonBucketTracker::CopyBucketsFromThrowable(OBJECTREF oThrowable)
     else
     {
         // Get the raw array data pointer
-        U1ARRAYREF refWatsonBucketArray = ((EXCEPTIONREF)_gc.oFrom)->GetWatsonBucketReference();
-        PTR_VOID pRawWatsonBucketArray = dac_cast<PTR_VOID>(refWatsonBucketArray->GetDataPtr());
+        PTR_VOID pRawWatsonBucketArray = dac_cast<PTR_VOID>(_gc.oFromBuckets->GetDataPtr());
 
         // Copy over the details to our new allocation
         memcpyNoGCRefs(pgmb, pRawWatsonBucketArray, sizeof(GenericModeBlock));
index ba8086a..b2abb9d 100644 (file)
@@ -483,7 +483,7 @@ public:
    EHWatsonBucketTracker();
    void Init();
    void CopyEHWatsonBucketTracker(const EHWatsonBucketTracker& srcTracker);
-   void CopyBucketsFromThrowable(OBJECTREF oThrowable);
+   void CopyBuckets(U1ARRAYREF oBuckets);
    void SaveIpForWatsonBucket(UINT_PTR ip);
    UINT_PTR RetrieveWatsonBucketIp();
    PTR_VOID RetrieveWatsonBuckets();
index 36b100d..af2201c 100644 (file)
@@ -1034,6 +1034,41 @@ OBJECTREF::OBJECTREF(const OBJECTREF & objref)
 
 
 //-------------------------------------------------------------
+// VolatileLoadWithoutBarrier constructor
+//-------------------------------------------------------------
+OBJECTREF::OBJECTREF(const OBJECTREF *pObjref, tagVolatileLoadWithoutBarrier tag)
+{
+    STATIC_CONTRACT_NOTHROW;
+    STATIC_CONTRACT_GC_NOTRIGGER;
+    STATIC_CONTRACT_MODE_COOPERATIVE;
+    STATIC_CONTRACT_FORBID_FAULT;
+
+    Object* objrefAsObj = VolatileLoadWithoutBarrier(&pObjref->m_asObj);
+    VALIDATEOBJECT(objrefAsObj);
+
+    // !!! If this assert is fired, there are two possibilities:
+    // !!! 1.  You are doing a type cast, e.g.  *(OBJECTREF*)pObj
+    // !!!     Instead, you should use ObjectToOBJECTREF(*(Object**)pObj),
+    // !!!                          or ObjectToSTRINGREF(*(StringObject**)pObj)
+    // !!! 2.  There is a real GC hole here.
+    // !!! Either way you need to fix the code.
+    _ASSERTE(Thread::IsObjRefValid(pObjref));
+    if ((objrefAsObj != 0) &&
+        ((IGCHeap*)GCHeapUtilities::GetGCHeap())->IsHeapPointer( (BYTE*)this ))
+    {
+        _ASSERTE(!"Write Barrier violation. Must use SetObjectReference() to assign OBJECTREF's into the GC heap!");
+    }
+    m_asObj = objrefAsObj;
+
+    if (m_asObj != 0) {
+        ENABLESTRESSHEAP();
+    }
+
+    Thread::ObjectRefNew(this);
+}
+
+
+//-------------------------------------------------------------
 // To allow NULL to be used as an OBJECTREF.
 //-------------------------------------------------------------
 OBJECTREF::OBJECTREF(TADDR nul)
index e35bd90..398b4f4 100644 (file)
@@ -2418,7 +2418,7 @@ public:
     OBJECTREF GetInnerException()
     {
         LIMITED_METHOD_DAC_CONTRACT;
-        return _innerException;
+        return VolatileLoadWithoutBarrierOBJECTREF(&_innerException);
     }
 
     // Returns the innermost exception object - equivalent of the
@@ -2431,7 +2431,7 @@ public:
         OBJECTREF oInnerMostException = NULL;
         OBJECTREF oCurrent = NULL;
 
-        oCurrent = _innerException;
+        oCurrent = GetInnerException();
         while(oCurrent != NULL)
         {
             oInnerMostException = oCurrent;
@@ -2469,7 +2469,7 @@ public:
     STRINGREF GetRemoteStackTraceString()
     {
         LIMITED_METHOD_DAC_CONTRACT;
-        return _remoteStackTraceString;
+        return (STRINGREF)VolatileLoadWithoutBarrierOBJECTREF(&_remoteStackTraceString);
     }
 
     void SetHelpURL(STRINGREF helpURL)
@@ -2512,7 +2512,7 @@ public:
     U1ARRAYREF GetWatsonBucketReference()
     {
         LIMITED_METHOD_CONTRACT;
-        return _watsonBuckets;
+        return (U1ARRAYREF)VolatileLoadWithoutBarrierOBJECTREF(&_watsonBuckets);
     }
 
     // This method will return a BOOL to indicate if the
@@ -2520,7 +2520,7 @@ public:
     BOOL AreWatsonBucketsPresent()
     {
         LIMITED_METHOD_CONTRACT;
-        return (_watsonBuckets != NULL)?TRUE:FALSE;
+        return (GetWatsonBucketReference() != NULL)?TRUE:FALSE;
     }
 
     // This method will save the IP to be used for watson bucketing.
@@ -2545,7 +2545,7 @@ public:
     {
         LIMITED_METHOD_CONTRACT;
 
-        return _ipForWatsonBuckets;
+        return VolatileLoadWithoutBarrier(&_ipForWatsonBuckets);
     }
 
     // README:
index 9400fd1..ac85794 100644 (file)
@@ -157,6 +157,8 @@ class OBJECTREF {
     };
 
     public:
+        enum class tagVolatileLoadWithoutBarrier { tag };
+
         //-------------------------------------------------------------
         // Default constructor, for non-initializing declarations:
         //
@@ -170,6 +172,12 @@ class OBJECTREF {
         OBJECTREF(const OBJECTREF & objref);
 
         //-------------------------------------------------------------
+        // Copy constructor, for passing OBJECTREF's as function arguments
+        // using a volatile without barrier load
+        //-------------------------------------------------------------
+        OBJECTREF(const OBJECTREF * pObjref, tagVolatileLoadWithoutBarrier tag);
+
+        //-------------------------------------------------------------
         // To allow NULL to be used as an OBJECTREF.
         //-------------------------------------------------------------
         OBJECTREF(TADDR nul);
@@ -302,6 +310,7 @@ class REF : public OBJECTREF
 #define OBJECTREFToObject(objref)  ((objref).operator-> ())
 #define ObjectToSTRINGREF(obj)     (STRINGREF(obj))
 #define STRINGREFToObject(objref)  (*( (StringObject**) &(objref) ))
+#define VolatileLoadWithoutBarrierOBJECTREF(pObj) (OBJECTREF(pObj, OBJECTREF::tagVolatileLoadWithoutBarrier::tag))
 
 // the while (0) syntax below is to force a trailing semicolon on users of the macro
 #define VALIDATEOBJECT(obj) do {if ((obj) != NULL) (obj)->Validate();} while (0)
@@ -316,6 +325,7 @@ class REF : public OBJECTREF
 #define OBJECTREFToObject(objref) ((PTR_Object) (objref))
 #define ObjectToSTRINGREF(obj)    ((PTR_StringObject) (obj))
 #define STRINGREFToObject(objref) ((PTR_StringObject) (objref))
+#define VolatileLoadWithoutBarrierOBJECTREF(pObj) VolatileLoadWithoutBarrier(pObj)
 
 #endif // _DEBUG_IMPL
 
index 72632a5..ba783a4 100644 (file)
@@ -2,6 +2,7 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 
 using System;
+using System.Diagnostics;
 using System.Reflection;
 using System.Runtime.ExceptionServices;
 using System.Threading;
@@ -46,18 +47,40 @@ namespace test45929
                 long progress = 0;
                 var test = new Test();
                 const int MaxCount = 1000000;
-                Parallel.For(
-                    0,
-                    MaxCount,
-                    new ParallelOptions() { MaxDegreeOfParallelism = Environment.ProcessorCount },
-                    i =>
+                int increment = 100;
+                bool done = false;
+                Stopwatch stopwatch = new Stopwatch();
+                stopwatch.Start();
+                Console.WriteLine($"{DateTime.Now} : {progress * 100D / MaxCount:000.0}% : {stopwatch.ElapsedMilliseconds}");
+
+                Action<int> makeProgress = i =>
                     {
-                        if (Interlocked.Increment(ref progress) % 10000 == 0)
+                        if (done) return;
+                        long newProgress = Interlocked.Increment(ref progress);
+                        if (newProgress % increment == 0)
                         {
-                            Console.WriteLine($"{DateTime.Now} : {progress * 100D / MaxCount:000.0}%");
+                            int newIncrement = (increment * 3) / 2;
+                            if (newIncrement > 10000)
+                                newIncrement = 10000;
+                            increment = newIncrement;
+
+                            Console.WriteLine($"{DateTime.Now} : {newProgress * 100D / MaxCount:000.0}% : {stopwatch.ElapsedMilliseconds}");
+                            if (stopwatch.ElapsedMilliseconds > 150000)
+                            {
+                                Console.WriteLine($"Attempting to finish early");
+                                done = true;
+                            }
                         }
                         test.Invoke();
-                    });
+                    };
+                
+                makeProgress(0);
+
+                Parallel.For(
+                    1,
+                    MaxCount,
+                    new ParallelOptions() { MaxDegreeOfParallelism = Environment.ProcessorCount },
+                    makeProgress);
             }
 
             public void Invoke()