Fix watson buckets setting race condition (#46636)
authorJan Vorlicek <janvorli@microsoft.com>
Thu, 7 Jan 2021 16:44:04 +0000 (17:44 +0100)
committerGitHub <noreply@github.com>
Thu, 7 Jan 2021 16:44:04 +0000 (17:44 +0100)
* Fix watson buckets setting race condition

The code that updates watson buckets in an exception from its inner
exception has a race that can cause runtime crashes when many threads
are rethrowing the same exception using ExceptionDispatchInfo.Throw.
At several places, we check if the inner exception contains watson
buckets and if that is true, we copy them to the outer exception.
However, the ExceptionDispatchInfo.Throw on another thread can reset
the buckets to the captured value, which might be null. So the
copying then crashes with access violation.

This change fixes the issue by reading the buckets reference once and
then performing the checks / copying using that extracted value.

It also adds a regression test that asserts quickly in debug / checked
builds.

* Reflect feedback

* Make the VALIDATEOBJECTREF race-resilient
* Add license header to the test
* Fix indentation

* Fix wrong VALIDATEOBJECTREF change

src/coreclr/vm/excep.cpp
src/coreclr/vm/exstatecommon.h
src/coreclr/vm/vars.hpp
src/tests/Regressions/coreclr/GitHub_45929/test45929.cs [new file with mode: 0644]
src/tests/Regressions/coreclr/GitHub_45929/test45929.csproj [new file with mode: 0644]

index a38438d..34afa5b 100644 (file)
@@ -9713,6 +9713,7 @@ void SetupInitialThrowBucketDetails(UINT_PTR adjustedIp)
     {
         OBJECTREF oCurrentThrowable;
         OBJECTREF oInnerMostExceptionThrowable;
+        U1ARRAYREF refSourceWatsonBucketArray;
     } gc;
     ZeroMemory(&gc, sizeof(gc));
 
@@ -9907,6 +9908,10 @@ void SetupInitialThrowBucketDetails(UINT_PTR adjustedIp)
     }
 
 phase1:
+    UINT_PTR ipForWatsonBuckets = 0;
+    gc.refSourceWatsonBucketArray = NULL;
+    bool gotWatsonBucketDetails = true;
+
     if (fAreBucketingDetailsPresent)
     {
         // Since we already have the buckets, simply bail out
@@ -9940,9 +9945,12 @@ phase1:
         }
         else
         {
+            ipForWatsonBuckets = ((EXCEPTIONREF)gc.oInnerMostExceptionThrowable)->GetIPForWatsonBuckets();
+            gc.refSourceWatsonBucketArray = ((EXCEPTIONREF)gc.oInnerMostExceptionThrowable)->GetWatsonBucketReference();
+
             // Do we have either the IP for Watson buckets or the buckets themselves?
-            fAreBucketingDetailsPresent = (((EXCEPTIONREF)gc.oInnerMostExceptionThrowable)->AreWatsonBucketsPresent() ||
-                                            ((EXCEPTIONREF)gc.oInnerMostExceptionThrowable)->IsIPForWatsonBucketsPresent());
+            fAreBucketingDetailsPresent = (gc.refSourceWatsonBucketArray != NULL ||
+                                           ipForWatsonBuckets != 0);
         }
     }
 
@@ -10028,16 +10036,12 @@ phase1:
     {
         // The inner exception object should be having either the IP for watson bucketing or the buckets themselves.
         // We shall copy over, whatever is available, to the current exception object.
-        _ASSERTE(gc.oInnerMostExceptionThrowable != NULL);
-        _ASSERTE(((EXCEPTIONREF)gc.oInnerMostExceptionThrowable)->AreWatsonBucketsPresent() ||
-                  ((EXCEPTIONREF)gc.oInnerMostExceptionThrowable)->IsIPForWatsonBucketsPresent());
-
-        if (((EXCEPTIONREF)gc.oInnerMostExceptionThrowable)->AreWatsonBucketsPresent())
+        if (gc.refSourceWatsonBucketArray != NULL)
         {
             EX_TRY
             {
                 // Copy the bucket details from innermost exception to the current exception object.
-                CopyWatsonBucketsFromThrowableToCurrentThrowable(gc.oInnerMostExceptionThrowable);
+                CopyWatsonBucketsFromThrowableToCurrentThrowable(gc.refSourceWatsonBucketArray);
             }
             EX_CATCH
             {
@@ -10046,17 +10050,24 @@ phase1:
 
             LOG((LF_EH, LL_INFO100, "SetupInitialThrowBucketDetails - Copied watson bucket details from the innermost exception\n"));
         }
-        else
+        else if (ipForWatsonBuckets != NULL)
         {
             // Copy the IP to the current exception object
-            ((EXCEPTIONREF)gc.oCurrentThrowable)->SetIPForWatsonBuckets(((EXCEPTIONREF)gc.oInnerMostExceptionThrowable)->GetIPForWatsonBuckets());
+            ((EXCEPTIONREF)gc.oCurrentThrowable)->SetIPForWatsonBuckets(ipForWatsonBuckets);
             LOG((LF_EH, LL_INFO100, "SetupInitialThrowBucketDetails - Copied watson bucket IP from the innermost exception\n"));
         }
+        else
+        {
+            gotWatsonBucketDetails = false;
+        }
     }
 
 done:
     // Set the flag that we have got the bucketing details
-    pExState->GetFlags()->SetGotWatsonBucketDetails();
+    if (gotWatsonBucketDetails)
+    {
+        pExState->GetFlags()->SetGotWatsonBucketDetails();
+    }
 
     GCPROTECT_END();
 
@@ -10065,7 +10076,7 @@ done:
 
 // This function is a wrapper to copy the watson bucket byte[] from the specified
 // throwable to the current throwable.
-void CopyWatsonBucketsFromThrowableToCurrentThrowable(OBJECTREF oThrowableFrom)
+void CopyWatsonBucketsFromThrowableToCurrentThrowable(U1ARRAYREF oManagedWatsonBuckets)
 {
 #ifndef DACCESS_COMPILE
 
@@ -10074,21 +10085,19 @@ void CopyWatsonBucketsFromThrowableToCurrentThrowable(OBJECTREF oThrowableFrom)
         GC_TRIGGERS;
         MODE_COOPERATIVE;
         THROWS;
-        PRECONDITION(oThrowableFrom != NULL);
-        PRECONDITION(!CLRException::IsPreallocatedExceptionObject(oThrowableFrom));
-        PRECONDITION(((EXCEPTIONREF)oThrowableFrom)->AreWatsonBucketsPresent());
+        PRECONDITION(oManagedWatsonBuckets != NULL);
         PRECONDITION(IsWatsonEnabled());
     }
     CONTRACTL_END;
 
     struct
     {
-        OBJECTREF oThrowableFrom;
+        U1ARRAYREF oSourceWatsonBuckets;
     } _gc;
 
     ZeroMemory(&_gc, sizeof(_gc));
     GCPROTECT_BEGIN(_gc);
-    _gc.oThrowableFrom = oThrowableFrom;
+    _gc.oSourceWatsonBuckets = oManagedWatsonBuckets;
 
     // Copy the watson buckets to the current throwable by NOT passing
     // the second argument that will default to NULL.
@@ -10096,7 +10105,7 @@ void CopyWatsonBucketsFromThrowableToCurrentThrowable(OBJECTREF oThrowableFrom)
     // CopyWatsonBucketsBetweenThrowables will pass that NULL to
     // CopyWatsonBucketsToThrowables that will make it copy the buckets
     // to the current throwable.
-    CopyWatsonBucketsBetweenThrowables(_gc.oThrowableFrom);
+    CopyWatsonBucketsBetweenThrowables(_gc.oSourceWatsonBuckets);
 
     GCPROTECT_END();
 
@@ -10108,7 +10117,7 @@ void CopyWatsonBucketsFromThrowableToCurrentThrowable(OBJECTREF oThrowableFrom)
 //
 // If the destination throwable is NULL, it will result in the buckets
 // being copied to the current throwable.
-void CopyWatsonBucketsBetweenThrowables(OBJECTREF oThrowableFrom, OBJECTREF oThrowableTo /*=NULL*/)
+void CopyWatsonBucketsBetweenThrowables(U1ARRAYREF oManagedWatsonBuckets, OBJECTREF oThrowableTo /*=NULL*/)
 {
 #ifndef DACCESS_COMPILE
 
@@ -10117,9 +10126,7 @@ void CopyWatsonBucketsBetweenThrowables(OBJECTREF oThrowableFrom, OBJECTREF oThr
         GC_TRIGGERS;
         MODE_COOPERATIVE;
         THROWS;
-        PRECONDITION(oThrowableFrom != NULL);
-        PRECONDITION(!CLRException::IsPreallocatedExceptionObject(oThrowableFrom));
-        PRECONDITION(((EXCEPTIONREF)oThrowableFrom)->AreWatsonBucketsPresent());
+        PRECONDITION(oManagedWatsonBuckets != NULL);
         PRECONDITION(IsWatsonEnabled());
     }
     CONTRACTL_END;
@@ -10128,7 +10135,7 @@ void CopyWatsonBucketsBetweenThrowables(OBJECTREF oThrowableFrom, OBJECTREF oThr
 
     struct
     {
-        OBJECTREF oFrom;
+        U1ARRAYREF oSourceWatsonBuckets;
         OBJECTREF oTo;
         OBJECTREF oWatsonBuckets;
     } _gc;
@@ -10136,7 +10143,7 @@ void CopyWatsonBucketsBetweenThrowables(OBJECTREF oThrowableFrom, OBJECTREF oThr
     ZeroMemory(&_gc, sizeof(_gc));
     GCPROTECT_BEGIN(_gc);
 
-    _gc.oFrom = oThrowableFrom;
+    _gc.oSourceWatsonBuckets = oManagedWatsonBuckets;
     _gc.oTo = (oThrowableTo == NULL)?GetThread()->GetThrowable():oThrowableTo;
     _ASSERTE(_gc.oTo != NULL);
 
@@ -10157,8 +10164,7 @@ void CopyWatsonBucketsBetweenThrowables(OBJECTREF oThrowableFrom, OBJECTREF oThr
     else
     {
         // Get the raw array data pointer of the source array
-        U1ARRAYREF refSourceWatsonBucketArray = ((EXCEPTIONREF)_gc.oFrom)->GetWatsonBucketReference();
-        PTR_VOID pRawSourceWatsonBucketArray = dac_cast<PTR_VOID>(refSourceWatsonBucketArray->GetDataPtr());
+        PTR_VOID pRawSourceWatsonBucketArray = dac_cast<PTR_VOID>(_gc.oSourceWatsonBuckets->GetDataPtr());
 
         // Get the raw array data pointer to the destination array
         U1ARRAYREF refDestWatsonBucketArray = (U1ARRAYREF)_gc.oWatsonBuckets;
@@ -10310,6 +10316,7 @@ void SetStateForWatsonBucketing(BOOL fIsRethrownException, OBJECTHANDLE ohOrigin
     {
         OBJECTREF oCurrentThrowable;
         OBJECTREF oInnerMostExceptionThrowable;
+        U1ARRAYREF refSourceWatsonBucketArray;
     } gc;
     ZeroMemory(&gc, sizeof(gc));
     GCPROTECT_BEGIN(gc);
@@ -10484,6 +10491,8 @@ void SetStateForWatsonBucketing(BOOL fIsRethrownException, OBJECTHANDLE ohOrigin
         // when we get the IP for bucketing.
         if (!fCreateBucketsForExceptionBeingThrown)
         {
+            bool gotWatsonBucketDetails = true;
+
             // Preallocated exception objects do not have inner exception objects.
             // Thus, if we are here, then the current throwable cannot be
             // a preallocated exception object.
@@ -10533,15 +10542,16 @@ void SetStateForWatsonBucketing(BOOL fIsRethrownException, OBJECTHANDLE ohOrigin
             {
                 // Assert that the inner exception has the Watson buckets
                 _ASSERTE(gc.oInnerMostExceptionThrowable != NULL);
-                _ASSERTE(((EXCEPTIONREF)gc.oInnerMostExceptionThrowable)->AreWatsonBucketsPresent() ||
-                         ((EXCEPTIONREF)gc.oInnerMostExceptionThrowable)->IsIPForWatsonBucketsPresent());
 
-                if (((EXCEPTIONREF)gc.oInnerMostExceptionThrowable)->AreWatsonBucketsPresent())
+                gc.refSourceWatsonBucketArray = ((EXCEPTIONREF)gc.oInnerMostExceptionThrowable)->GetWatsonBucketReference();
+                UINT_PTR ipForWatsonBuckets = ((EXCEPTIONREF)gc.oInnerMostExceptionThrowable)->GetIPForWatsonBuckets();
+
+                if (gc.refSourceWatsonBucketArray != NULL)
                 {
                     // Copy the bucket information from the inner exception object to the current throwable
                     EX_TRY
                     {
-                        CopyWatsonBucketsFromThrowableToCurrentThrowable(gc.oInnerMostExceptionThrowable);
+                        CopyWatsonBucketsFromThrowableToCurrentThrowable(gc.refSourceWatsonBucketArray);
                     }
                     EX_CATCH
                     {
@@ -10550,16 +10560,23 @@ void SetStateForWatsonBucketing(BOOL fIsRethrownException, OBJECTHANDLE ohOrigin
                     }
                     EX_END_CATCH(SwallowAllExceptions);
                 }
-                else
+                else if (ipForWatsonBuckets != NULL)
                 {
                     // Copy the IP for Watson bucketing to the exception object
-                    ((EXCEPTIONREF)gc.oCurrentThrowable)->SetIPForWatsonBuckets(((EXCEPTIONREF)gc.oInnerMostExceptionThrowable)->GetIPForWatsonBuckets());
+                    ((EXCEPTIONREF)gc.oCurrentThrowable)->SetIPForWatsonBuckets(ipForWatsonBuckets);
+                }
+                else
+                {
+                    gotWatsonBucketDetails = false;
                 }
             }
 
-            // Set the flag that we got bucketing details for the exception
-            pCurExState->GetFlags()->SetGotWatsonBucketDetails();
-            LOG((LF_EH, LL_INFO1000, "SetStateForWatsonBucketing - Using innermost exception details for Watson bucketing for thrown exception.\n"));
+            if (gotWatsonBucketDetails)
+            {
+                // Set the flag that we got bucketing details for the exception
+                pCurExState->GetFlags()->SetGotWatsonBucketDetails();
+                LOG((LF_EH, LL_INFO1000, "SetStateForWatsonBucketing - Using innermost exception details for Watson bucketing for thrown exception.\n"));
+            }
         }
 done:;
     }
index 92a4c45..ba8086a 100644 (file)
@@ -508,8 +508,8 @@ public:
 
 void SetStateForWatsonBucketing(BOOL fIsRethrownException, OBJECTHANDLE ohOriginalException);
 BOOL CopyWatsonBucketsToThrowable(PTR_VOID pUnmanagedBuckets, OBJECTREF oTargetThrowable = NULL);
-void CopyWatsonBucketsFromThrowableToCurrentThrowable(OBJECTREF oThrowableFrom);
-void CopyWatsonBucketsBetweenThrowables(OBJECTREF oThrowableFrom, OBJECTREF oThrowableTo = NULL);
+void CopyWatsonBucketsFromThrowableToCurrentThrowable(U1ARRAYREF oManagedWatsonBuckets);
+void CopyWatsonBucketsBetweenThrowables(U1ARRAYREF oManagedWatsonBuckets, OBJECTREF oThrowableTo = NULL);
 void SetupInitialThrowBucketDetails(UINT_PTR adjustedIp);
 BOOL SetupWatsonBucketsForFailFast(EXCEPTIONREF refException);
 void SetupWatsonBucketsForUEF(BOOL fUseLastThrownObject);
index e39603f..d3c00a7 100644 (file)
@@ -298,15 +298,15 @@ class REF : public OBJECTREF
 
 };
 
-// the while (0) syntax below is to force a trailing semicolon on users of the macro
-#define VALIDATEOBJECTREF(objref) do {if ((objref) != NULL) (objref).Validate();} while (0)
-#define VALIDATEOBJECT(obj) do {if ((obj) != NULL) (obj)->Validate();} while (0)
-
 #define ObjectToOBJECTREF(obj)     (OBJECTREF(obj))
 #define OBJECTREFToObject(objref)  ((objref).operator-> ())
 #define ObjectToSTRINGREF(obj)     (STRINGREF(obj))
 #define STRINGREFToObject(objref)  (*( (StringObject**) &(objref) ))
 
+// the while (0) syntax below is to force a trailing semicolon on users of the macro
+#define VALIDATEOBJECT(obj) do {if ((obj) != NULL) (obj)->Validate();} while (0)
+#define VALIDATEOBJECTREF(objref) do { Object* validateObjectRefObj = OBJECTREFToObject(objref); VALIDATEOBJECT(validateObjectRefObj); } while (0)
+
 #else   // _DEBUG_IMPL
 
 #define VALIDATEOBJECTREF(objref)
diff --git a/src/tests/Regressions/coreclr/GitHub_45929/test45929.cs b/src/tests/Regressions/coreclr/GitHub_45929/test45929.cs
new file mode 100644 (file)
index 0000000..72632a5
--- /dev/null
@@ -0,0 +1,127 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Reflection;
+using System.Runtime.ExceptionServices;
+using System.Threading;
+using System.Threading.Tasks;
+
+namespace test45929
+{
+    public class Program
+    {
+        static int Main(string[] _)
+        {
+            Console.WriteLine("The test started.");
+            Console.WriteLine("Progress:");
+
+            ThreadPool.GetMinThreads(out int _, out int cptMin);
+            ThreadPool.SetMinThreads(3500, cptMin);
+
+            Test.Run();
+
+            Console.WriteLine("Finished successfully");
+
+            return 100;
+        }
+
+        class Test
+        {
+            readonly TestCore methods;
+            MethodInfo methodInfo;
+
+            public Test()
+            {
+                methods = new TestCore();
+                methodInfo = GetMethod("ExceptionDispatchInfoCaptureThrow");
+                if (methodInfo is null)
+                {
+                    throw new InvalidOperationException("The methodInfo object is missing or empty.");
+                }
+            }
+
+            public static void Run()
+            {
+                long progress = 0;
+                var test = new Test();
+                const int MaxCount = 1000000;
+                Parallel.For(
+                    0,
+                    MaxCount,
+                    new ParallelOptions() { MaxDegreeOfParallelism = Environment.ProcessorCount },
+                    i =>
+                    {
+                        if (Interlocked.Increment(ref progress) % 10000 == 0)
+                        {
+                            Console.WriteLine($"{DateTime.Now} : {progress * 100D / MaxCount:000.0}%");
+                        }
+                        test.Invoke();
+                    });
+            }
+
+            public void Invoke()
+            {
+                try
+                {
+                    methodInfo.Invoke(methods, null);
+                }
+                catch
+                {
+                    // Ignore
+                }
+            }
+
+            static MethodInfo GetMethod(string methodName)
+            {
+                foreach (MethodInfo method in typeof(TestCore).GetMethods(BindingFlags.Public | BindingFlags.Instance | BindingFlags.DeclaredOnly))
+                {
+                    if (methodName == method.Name)
+                    {
+                        return method;
+                    }
+                }
+                return null;
+            }
+
+            class TestCore
+            {
+                const int ExpirySeconds = 1000;
+
+                // An exception instance that gets refreshed every ExpirySeconds.
+                (ExceptionDispatchInfo Exception, DateTime CacheExpiryDateTime) exceptionCache;
+
+                /// <summary>
+                /// Captures and throws the a cached instance of an exception.
+                /// </summary>
+                public void ExceptionDispatchInfoCaptureThrow()
+                {
+                    var error = GetCachedError();
+                    error.Throw();
+                }
+
+                ExceptionDispatchInfo GetCachedError()
+                {
+                    try
+                    {
+                        var cache = exceptionCache;
+                        if (cache.Exception != null)
+                        {
+                            if (exceptionCache.CacheExpiryDateTime > DateTime.UtcNow)
+                            {
+                                return cache.Exception;
+                            }
+                        }
+                        throw new Exception("Test");
+                    }
+                    catch (Exception ex)
+                    {
+                        ExceptionDispatchInfo edi = ExceptionDispatchInfo.Capture(ex);
+                        exceptionCache = (edi, DateTime.UtcNow.AddSeconds(ExpirySeconds));
+                        return edi;
+                    }
+                }
+            }
+        }
+    }
+}
diff --git a/src/tests/Regressions/coreclr/GitHub_45929/test45929.csproj b/src/tests/Regressions/coreclr/GitHub_45929/test45929.csproj
new file mode 100644 (file)
index 0000000..4b4ce1f
--- /dev/null
@@ -0,0 +1,10 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <CLRTestKind>BuildAndRun</CLRTestKind>
+    <CLRTestPriority>1</CLRTestPriority>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="test45929.cs" />
+  </ItemGroup>
+</Project>