Fix deadlock in EventPipeEventDispatcher (#90053)
authorDavid Mason <davmason@microsoft.com>
Mon, 14 Aug 2023 18:19:13 +0000 (11:19 -0700)
committerGitHub <noreply@github.com>
Mon, 14 Aug 2023 18:19:13 +0000 (11:19 -0700)
src/libraries/System.Private.CoreLib/src/Resources/Strings.resx
src/libraries/System.Private.CoreLib/src/System/Diagnostics/Tracing/EventPipeEventDispatcher.cs
src/libraries/System.Private.CoreLib/src/System/Diagnostics/Tracing/EventSource.cs
src/native/eventpipe/ep.c

index 5f3d727..b35ba38 100644 (file)
   <data name="EventSource_ChannelTypeDoesNotMatchEventChannelValue" xml:space="preserve">
     <value>Channel {0} does not match event channel value {1}.</value>
   </data>
+  <data name="EventSource_CouldNotEnableEventPipe" xml:space="preserve">
+    <value>Failed to open an EventPipe session for NativeRuntimeEventSource.</value>
+  </data>
   <data name="EventSource_DataDescriptorsOutOfRange" xml:space="preserve">
     <value>Data descriptors are out of range.</value>
   </data>
index c9daf4e..efd4b8c 100644 (file)
@@ -27,12 +27,13 @@ namespace System.Diagnostics.Tracing
         private readonly IntPtr m_RuntimeProviderID;
 
         private ulong m_sessionID;
-        private DateTime m_syncTimeUtc;
-        private long m_syncTimeQPC;
-        private long m_timeQPCFrequency;
 
-        private bool m_stopDispatchTask;
+        private CancellationTokenSource? m_dispatchTaskCancellationSource;
         private Task? m_dispatchTask;
+
+        // We take this lock to synchronize access to the shared session state. It is important to never take the EventSource.EventListenersLock while
+        // holding this, or we can deadlock. Unfortunately calling in to EventSource at all can take the EventListenersLock in ways that are not obvious,
+        // so don't call in to EventSource or other EventListeners while holding this unless you are certain it can't take the EventListenersLock.
         private readonly object m_dispatchControlLock = new object();
         private readonly Dictionary<EventListener, EventListenerSubscription> m_subscriptions = new Dictionary<EventListener, EventListenerSubscription>();
 
@@ -46,32 +47,18 @@ namespace System.Diagnostics.Tracing
 
         internal void SendCommand(EventListener eventListener, EventCommand command, bool enable, EventLevel level, EventKeywords matchAnyKeywords)
         {
-            lock (EventListener.EventListenersLock)
+            lock (m_dispatchControlLock)
             {
                 if (command == EventCommand.Update && enable)
                 {
-                    lock (m_dispatchControlLock)
-                    {
-                        // Add the new subscription.  This will overwrite an existing subscription for the listener if one exists.
-                        m_subscriptions[eventListener] = new EventListenerSubscription(matchAnyKeywords, level);
-
-                        // Commit the configuration change.
-                        CommitDispatchConfiguration();
-                    }
+                    // Add the new subscription.  This will overwrite an existing subscription for the listener if one exists.
+                    m_subscriptions[eventListener] = new EventListenerSubscription(matchAnyKeywords, level);
                 }
                 else if (command == EventCommand.Update && !enable)
                 {
-                    RemoveEventListener(eventListener);
+                    // Remove the event listener from the list of subscribers.
+                    m_subscriptions.Remove(eventListener);
                 }
-            }
-        }
-
-        internal void RemoveEventListener(EventListener listener)
-        {
-            lock (m_dispatchControlLock)
-            {
-                // Remove the event listener from the list of subscribers.
-                m_subscriptions.Remove(listener);
 
                 // Commit the configuration change.
                 CommitDispatchConfiguration();
@@ -82,13 +69,8 @@ namespace System.Diagnostics.Tracing
         {
             Debug.Assert(Monitor.IsEntered(m_dispatchControlLock));
 
-            // Ensure that the dispatch task is stopped.
-            // This is a no-op if the task is already stopped.
-            StopDispatchTask();
-
-            // Stop tracing.
-            // This is a no-op if it's already disabled.
-            EventPipeInternal.Disable(m_sessionID);
+            // Signal that the thread should shut down
+            SetStopDispatchTask();
 
             // Check to see if tracing should be enabled.
             if (m_subscriptions.Count <= 0)
@@ -122,7 +104,10 @@ namespace System.Diagnostics.Tracing
             };
 
             m_sessionID = EventPipeInternal.Enable(null, EventPipeSerializationFormat.NetTrace, DefaultEventListenerCircularMBSize, providerConfiguration);
-            Debug.Assert(m_sessionID != 0);
+            if (m_sessionID == 0)
+            {
+                throw new EventSourceException(SR.EventSource_CouldNotEnableEventPipe);
+            }
 
             // Get the session information that is required to properly dispatch events.
             EventPipeSessionInfo sessionInfo;
@@ -134,48 +119,49 @@ namespace System.Diagnostics.Tracing
                 }
             }
 
-            m_syncTimeUtc = DateTime.FromFileTimeUtc(sessionInfo.StartTimeAsUTCFileTime);
-            m_syncTimeQPC = sessionInfo.StartTimeStamp;
-            m_timeQPCFrequency = sessionInfo.TimeStampFrequency;
+
+            DateTime syncTimeUtc = DateTime.FromFileTimeUtc(sessionInfo.StartTimeAsUTCFileTime);
+            long syncTimeQPC = sessionInfo.StartTimeStamp;
+            long timeQPCFrequency = sessionInfo.TimeStampFrequency;
 
             // Start the dispatch task.
-            StartDispatchTask();
+            StartDispatchTask(m_sessionID, syncTimeUtc, syncTimeQPC, timeQPCFrequency);
         }
 
-        private void StartDispatchTask()
+        private void StartDispatchTask(ulong sessionID, DateTime syncTimeUtc, long syncTimeQPC, long timeQPCFrequency)
         {
             Debug.Assert(Monitor.IsEntered(m_dispatchControlLock));
+            Debug.Assert(sessionID != 0);
 
-            if (m_dispatchTask == null)
-            {
-                m_stopDispatchTask = false;
-                m_dispatchTask = Task.Factory.StartNew(DispatchEventsToEventListeners, CancellationToken.None, TaskCreationOptions.LongRunning, TaskScheduler.Default);
-            }
+            m_dispatchTaskCancellationSource = new CancellationTokenSource();
+            Task? previousDispatchTask = m_dispatchTask;
+            m_dispatchTask = Task.Factory.StartNew(() => DispatchEventsToEventListeners(sessionID, syncTimeUtc, syncTimeQPC, timeQPCFrequency, previousDispatchTask, m_dispatchTaskCancellationSource.Token), CancellationToken.None, TaskCreationOptions.LongRunning, TaskScheduler.Default);
         }
 
-        private void StopDispatchTask()
+        private void SetStopDispatchTask()
         {
             Debug.Assert(Monitor.IsEntered(m_dispatchControlLock));
 
             if (m_dispatchTask != null)
             {
-                m_stopDispatchTask = true;
+                Debug.Assert(m_dispatchTaskCancellationSource != null);
+                m_dispatchTaskCancellationSource?.Cancel();
                 EventPipeInternal.SignalSession(m_sessionID);
-                m_dispatchTask.Wait();
-                m_dispatchTask = null;
             }
         }
 
-        private unsafe void DispatchEventsToEventListeners()
+        private unsafe void DispatchEventsToEventListeners(ulong sessionID, DateTime syncTimeUtc, long syncTimeQPC, long timeQPCFrequency, Task? previousDispatchTask, CancellationToken token)
         {
+            Debug.Assert(sessionID != 0);
+            previousDispatchTask?.Wait(CancellationToken.None);
+
             // Struct to fill with the call to GetNextEvent.
             EventPipeEventInstanceData instanceData;
-
-            while (!m_stopDispatchTask)
+            while (!token.IsCancellationRequested)
             {
                 bool eventsReceived = false;
                 // Get the next event.
-                while (!m_stopDispatchTask && EventPipeInternal.GetNextEvent(m_sessionID, &instanceData))
+                while (!token.IsCancellationRequested && EventPipeInternal.GetNextEvent(sessionID, &instanceData))
                 {
                     eventsReceived = true;
 
@@ -184,36 +170,39 @@ namespace System.Diagnostics.Tracing
                     {
                         // Dispatch the event.
                         ReadOnlySpan<byte> payload = new ReadOnlySpan<byte>((void*)instanceData.Payload, (int)instanceData.PayloadLength);
-                        DateTime dateTimeStamp = TimeStampToDateTime(instanceData.TimeStamp);
+                        DateTime dateTimeStamp = TimeStampToDateTime(instanceData.TimeStamp, syncTimeUtc, syncTimeQPC, timeQPCFrequency);
                         NativeRuntimeEventSource.Log.ProcessEvent(instanceData.EventID, instanceData.ThreadID, dateTimeStamp, instanceData.ActivityId, instanceData.ChildActivityId, payload);
                     }
                 }
 
                 // Wait for more events.
-                if (!m_stopDispatchTask)
+                if (!token.IsCancellationRequested)
                 {
                     if (!eventsReceived)
                     {
-                        EventPipeInternal.WaitForSessionSignal(m_sessionID, Timeout.Infinite);
+                        EventPipeInternal.WaitForSessionSignal(sessionID, Timeout.Infinite);
                     }
 
                     Thread.Sleep(10);
                 }
             }
+
+            // Disable the old session. This can happen asynchronously since we aren't using the old session anymore
+            EventPipeInternal.Disable(sessionID);
         }
 
         /// <summary>
         /// Converts a QueryPerformanceCounter (QPC) timestamp to a UTC DateTime.
         /// </summary>
-        private DateTime TimeStampToDateTime(long timeStamp)
+        private static DateTime TimeStampToDateTime(long timeStamp, DateTime syncTimeUtc, long syncTimeQPC, long timeQPCFrequency)
         {
             if (timeStamp == long.MaxValue)
             {
                 return DateTime.MaxValue;
             }
 
-            Debug.Assert((m_syncTimeUtc.Ticks != 0) && (m_syncTimeQPC != 0) && (m_timeQPCFrequency != 0));
-            long inTicks = (long)((timeStamp - m_syncTimeQPC) * 10000000.0 / m_timeQPCFrequency) + m_syncTimeUtc.Ticks;
+            Debug.Assert((syncTimeUtc.Ticks != 0) && (syncTimeQPC != 0) && (timeQPCFrequency != 0));
+            long inTicks = (long)((timeStamp - syncTimeQPC) * 10000000.0 / timeQPCFrequency) + syncTimeUtc.Ticks;
             if ((inTicks < 0) || (DateTime.MaxTicks < inTicks))
             {
                 inTicks = DateTime.MaxTicks;
index cf84520..d1b11dc 100644 (file)
@@ -4075,6 +4075,11 @@ namespace System.Diagnostics.Tracing
                 }
                 Validate();
             }
+
+#if FEATURE_PERFTRACING
+            // Remove the listener from the EventPipe dispatcher. EventCommand.Update with enable==false removes it.
+            EventPipeEventDispatcher.Instance.SendCommand(this, EventCommand.Update, false, EventLevel.LogAlways, (EventKeywords)0);
+#endif // FEATURE_PERFTRACING
         }
         // We don't expose a Dispose(bool), because the contract is that you don't have any non-syncronous
         // 'cleanup' associated with this object
@@ -4391,11 +4396,6 @@ namespace System.Diagnostics.Tracing
                     }
                 }
             }
-
-#if FEATURE_PERFTRACING
-            // Remove the listener from the EventPipe dispatcher.
-            EventPipeEventDispatcher.Instance.RemoveEventListener(listenerToRemove);
-#endif // FEATURE_PERFTRACING
         }
 
 
index dd327ef..d007702 100644 (file)
@@ -665,7 +665,7 @@ disable_helper (EventPipeSessionID id)
                ep_provider_callback_data_queue_fini (provider_callback_data_queue);
 
 #ifdef EP_CHECKED_BUILD
-               if (ep_volatile_load_number_of_sessions () == 0)
+               if (ep_volatile_load_number_of_sessions () == 0 && ep_volatile_load_eventpipe_state () != EP_STATE_SHUTTING_DOWN)
                        EP_ASSERT (ep_rt_providers_validate_all_disabled ());
 #endif
 
@@ -1432,7 +1432,9 @@ ep_shutdown (void)
 
        for (uint32_t i = 0; i < EP_MAX_NUMBER_OF_SESSIONS; ++i) {
                EventPipeSession *session = ep_volatile_load_session (i);
-               if (session)
+               // Do not shut down listener sessions on shutdown, the processing thread will
+               // still be trying to process events in the background until the process is torn down
+               if (session && session->session_type != EP_SESSION_TYPE_LISTENER)
                        ep_disable ((EventPipeSessionID)session);
        }