Fix log scope construction (#4323)
authorJoe Schmitt <1146681+schmittjoseph@users.noreply.github.com>
Thu, 19 Oct 2023 18:32:17 +0000 (11:32 -0700)
committerGitHub <noreply@github.com>
Thu, 19 Oct 2023 18:32:17 +0000 (11:32 -0700)
This PR reworks how log scopes are reconstructed. The current
implementation has a several issues that results in it only working in
single-threaded contexts. Once multiple threads are involved, scopes
will be incorrectly associated to log messages in addition to scopes
being deleted early while they are still active.

I've described how the new approach works in the source code so it
doesn't get lost after this PR is merged. I've also left a comment with
a visual representation of edge cases with concurrent traces.

/cc @dotnet/dotnet-monitor

---------

Co-authored-by: Wiktor Kopec <wiktork@microsoft.com>
src/Microsoft.Diagnostics.Monitoring.EventPipe/Configuration/LoggingSourceConfiguration.cs
src/Microsoft.Diagnostics.Monitoring.EventPipe/Logs/EventLogsPipeline.cs
src/Microsoft.Diagnostics.Monitoring.EventPipe/Logs/EventLogsPipelineSettings.cs
src/tests/EventPipeTracee/Program.cs
src/tests/Microsoft.Diagnostics.Monitoring.EventPipe/EventLogsPipelineUnitTests.cs

index 82dae4f329f1c10f5df3e2c6e3342dbcb8abea19..d65acdee05f591164f889f1cca11cd0aaaaed8b4 100644 (file)
@@ -6,6 +6,7 @@ using System.Collections.Generic;
 using System.Diagnostics.Tracing;
 using System.Text;
 using Microsoft.Diagnostics.NETCore.Client;
+using Microsoft.Diagnostics.Tracing.Parsers;
 using Microsoft.Extensions.Logging;
 
 namespace Microsoft.Diagnostics.Monitoring.EventPipe
@@ -15,21 +16,24 @@ namespace Microsoft.Diagnostics.Monitoring.EventPipe
         private readonly string _filterSpecs;
         private readonly long _keywords;
         private readonly EventLevel _level;
+        private readonly bool _collectScopes;
 
         /// <summary>
         /// Creates a new logging source configuration.
         /// </summary>
-        public LoggingSourceConfiguration(LogLevel level, LogMessageType messageType, IDictionary<string, LogLevel?> filterSpecs, bool useAppFilters)
+        public LoggingSourceConfiguration(LogLevel level, LogMessageType messageType, IDictionary<string, LogLevel?> filterSpecs, bool useAppFilters,
+            bool collectScopes)
         {
             RequestRundown = false;
             _filterSpecs = ToFilterSpecsString(filterSpecs, useAppFilters);
             _keywords = (long)ToKeywords(messageType);
             _level = ToEventLevel(level);
+            _collectScopes = collectScopes;
         }
 
         public override IList<EventPipeProvider> GetProviders()
         {
-            return new List<EventPipeProvider>()
+            List<EventPipeProvider> providers = new()
             {
                 new EventPipeProvider(
                     MicrosoftExtensionsLoggingProviderName,
@@ -41,6 +45,17 @@ namespace Microsoft.Diagnostics.Monitoring.EventPipe
                         }
                 )
             };
+
+            if (_collectScopes)
+            {
+                // Activity correlation
+                providers.Add(new EventPipeProvider(
+                    TplEventSource,
+                    EventLevel.Informational,
+                    (long)TplEtwProviderTraceEventParser.Keywords.TasksFlowActivityIds));
+            }
+
+            return providers;
         }
 
         private static string ToFilterSpecsString(IDictionary<string, LogLevel?> filterSpecs, bool useAppFilters)
index 063bd342fbdc3c7b732f225c15609f0d361219a2..0b6f0824f6a90cb2bb7c934a024e418c3041db80 100644 (file)
@@ -30,7 +30,8 @@ namespace Microsoft.Diagnostics.Monitoring.EventPipe
                     Settings.LogLevel,
                     LogMessageType.FormattedMessage | LogMessageType.JsonMessage,
                     Settings.FilterSpecs,
-                    Settings.UseAppFilters);
+                    Settings.UseAppFilters,
+                    Settings.CollectScopes);
             }
             catch (NotSupportedException ex)
             {
@@ -42,45 +43,55 @@ namespace Microsoft.Diagnostics.Monitoring.EventPipe
         {
             string lastFormattedMessage = string.Empty;
 
-            Dictionary<Guid, LogActivityItem> logActivities = new();
-            Stack<Guid> stack = new();
+            //
+            // We enable TplEventSource's TasksFlowActivityIds as part of our configuration to enable activity correlation.
+            // This means that each time an event start occurs the current ActivityId will branch creating a new one with a RelatedActivityId equal to where it branched from.
+            // Combining this with the fact that scopes are handled as ActivityJson/{Start,Stop} means the ActivityId will branch each time a scope starts.
+            // When a log message occurs, it'll have an ActivityId equal to the latest applicable scope.
+            //
+            // By maintaining a tree with the branching data, we can construct the full scope for a log message:
+            // - Each time the ActivityId branches, create a node in the tree with it's parent being the node corresponding to the RelatedActivityId.
+            //   - Each node has corresponding scope data.
+            // - When a log message occurs, grab the node with the corresponding ActivityId and backtrack to the root of the tree. Each node visited is included as part of the log's scope.
+            //
+            // NOTE: There are edge cases with concurrent traces, this is described in greater detail above our backtracking code.
+            //
+            Dictionary<Guid, LogScopeItem> activityIdToScope = new();
+
+            if (Settings.CollectScopes)
+            {
+                eventSource.Dynamic.AddCallbackForProviderEvent(LoggingSourceConfiguration.MicrosoftExtensionsLoggingProviderName, "ActivityJson/Start", (traceEvent) => {
+                    if (traceEvent.ActivityID == Guid.Empty)
+                    {
+                        // Unexpected
+                        return;
+                    }
 
-            eventSource.Dynamic.AddCallbackForProviderEvent(LoggingSourceConfiguration.MicrosoftExtensionsLoggingProviderName, "ActivityJson/Start", (traceEvent) => {
-                int factoryId = (int)traceEvent.PayloadByName("FactoryID");
-                string categoryName = (string)traceEvent.PayloadByName("LoggerName");
-                string argsJson = (string)traceEvent.PayloadByName("ArgumentsJson");
+                    string argsJson = (string)traceEvent.PayloadByName("ArgumentsJson");
 
-                // TODO: Store this information by logger factory id
-                LogActivityItem item = new()
-                {
-                    ActivityID = traceEvent.ActivityID,
-                    ScopedObject = new LogObject(JsonDocument.Parse(argsJson).RootElement),
-                };
+                    // TODO: Store this information by logger factory id
+                    LogScopeItem item = new()
+                    {
+                        ActivityID = traceEvent.ActivityID,
+                        ScopedObject = new LogObject(JsonDocument.Parse(argsJson).RootElement),
+                    };
 
-                if (stack.Count > 0)
-                {
-                    Guid parentId = stack.Peek();
-                    if (logActivities.TryGetValue(parentId, out LogActivityItem parentItem))
+                    if (activityIdToScope.TryGetValue(traceEvent.RelatedActivityID, out LogScopeItem parentItem))
                     {
                         item.Parent = parentItem;
                     }
-                }
 
-                stack.Push(traceEvent.ActivityID);
-                logActivities[traceEvent.ActivityID] = item;
-            });
-
-            eventSource.Dynamic.AddCallbackForProviderEvent(LoggingSourceConfiguration.MicrosoftExtensionsLoggingProviderName, "ActivityJson/Stop", (traceEvent) => {
-                int factoryId = (int)traceEvent.PayloadByName("FactoryID");
-                string categoryName = (string)traceEvent.PayloadByName("LoggerName");
+                    if (activityIdToScope.Count < Settings.ScopeLimit || activityIdToScope.ContainsKey(traceEvent.ActivityID))
+                    {
+                        activityIdToScope[traceEvent.ActivityID] = item;
+                    }
+                });
 
-                //If we begin collection in the middle of a request, we can receive a stop without having a start.
-                if (stack.Count > 0)
-                {
-                    stack.Pop();
-                    logActivities.Remove(traceEvent.ActivityID);
-                }
-            });
+                eventSource.Dynamic.AddCallbackForProviderEvent(LoggingSourceConfiguration.MicrosoftExtensionsLoggingProviderName, "ActivityJson/Stop", (traceEvent) => {
+                    // Not all stopped event ActivityIds will exist in our tree since there may be scopes already active when we start the trace session.
+                    _ = activityIdToScope.Remove(traceEvent.ActivityID);
+                });
+            }
 
             eventSource.Dynamic.AddCallbackForProviderEvent(LoggingSourceConfiguration.MicrosoftExtensionsLoggingProviderName, "MessageJson", (traceEvent) => {
                 // Level, FactoryID, LoggerName, EventID, EventName, ExceptionJson, ArgumentsJson
@@ -110,14 +121,29 @@ namespace Microsoft.Diagnostics.Monitoring.EventPipe
                 ILogger logger = _factory.CreateLogger(categoryName);
                 List<IDisposable> scopes = new();
 
-                if (logActivities.TryGetValue(traceEvent.ActivityID, out LogActivityItem logActivityItem))
+                //
+                // The MessageJson event will occur with an ActivityId equal to the most relevant activity branch and we can backtrack to the root of the tree
+                // to grab all applicable scopes (where each node we visit is an applicable scope).
+                //
+                // Ideally the ActivityId will always exist in our tree, however if another trace is ongoing that is interested in an event start
+                // within the same async context as our log message then there will be nodes+edges that our tree is unaware of.
+                // This is because TplEventSource's TasksFlowActivityIds is a singleton implementation that is shared for all traces,
+                // regardless of if the other traces have TasksFlowActivityIds enabled.
+                //
+                // In this scenario there's still a chance that only a single branch has occurred and we're the first event logged with the newly branched ActivityId.
+                // In which case we can use the RelatedActivityId to find our way back onto the tree.
+                //
+                // If not then we will be operating on a subtree without a way of getting back to the root node and will only have a subset (if any) of the
+                // applicable scopes.
+                //
+                if (activityIdToScope.TryGetValue(traceEvent.ActivityID, out LogScopeItem scopeItem) ||
+                    activityIdToScope.TryGetValue(traceEvent.RelatedActivityID, out scopeItem))
                 {
-                    // REVIEW: Does order matter here? We're combining everything anyways.
-                    while (logActivityItem != null)
+                    while (scopeItem != null)
                     {
-                        scopes.Add(logger.BeginScope(logActivityItem.ScopedObject));
+                        scopes.Add(logger.BeginScope(scopeItem.ScopedObject));
 
-                        logActivityItem = logActivityItem.Parent;
+                        scopeItem = scopeItem.Parent;
                     }
                 }
 
@@ -190,13 +216,13 @@ namespace Microsoft.Diagnostics.Monitoring.EventPipe
             return state.ToString();
         }
 
-        private class LogActivityItem
+        private class LogScopeItem
         {
             public Guid ActivityID { get; set; }
 
             public LogObject ScopedObject { get; set; }
 
-            public LogActivityItem Parent { get; set; }
+            public LogScopeItem Parent { get; set; }
         }
     }
 }
index c505b819549561f879bcfde2d78fe6d3732193d1..f8133f53a1e7cc6d4adf4b456a51608aadef3056 100644 (file)
@@ -16,5 +16,9 @@ namespace Microsoft.Diagnostics.Monitoring.EventPipe
 
         // This setting will collect logs for the application-defined categories and levels.
         public bool UseAppFilters { get; set; } = true;
+
+        public bool CollectScopes { get; set; } = true;
+
+        public int ScopeLimit { get; set; } = 10_000;
     }
 }
index 0d379b97d71e3a88d5a5bf809cc8cba1e9e5fac4..1decd119a0fd0829f735b2dea68c142d6a8bba6c 100644 (file)
@@ -17,7 +17,7 @@ namespace EventPipeTracee
     {
         private const string AppLoggerCategoryName = "AppLoggerCategory";
 
-        public static int Main(string[] args)
+        public static async Task<int> Main(string[] args)
         {
             int pid = Process.GetCurrentProcess().Id;
             string pipeServerName = args.Length > 0 ? args[0] : null;
@@ -89,7 +89,7 @@ namespace EventPipeTracee
                 }).ConfigureAwait(true);
             }
 
-            TestBodyCore(customCategoryLogger, appCategoryLogger);
+            await TestBodyCore(customCategoryLogger, appCategoryLogger).ConfigureAwait(false);
 
             Console.WriteLine($"{pid} EventPipeTracee: signal end of test data");
             Console.Out.Flush();
@@ -123,17 +123,49 @@ namespace EventPipeTracee
         }
 
         // TODO At some point we may want parameters to choose different test bodies.
-        private static void TestBodyCore(ILogger customCategoryLogger, ILogger appCategoryLogger)
+        private static async Task TestBodyCore(ILogger customCategoryLogger, ILogger appCategoryLogger)
         {
-            //Json data is always converted to strings for ActivityStart events.
-            using (IDisposable scope = customCategoryLogger.BeginScope(new Dictionary<string, object> {
+            TaskCompletionSource secondSetScopes = new(TaskCreationOptions.RunContinuationsAsynchronously);
+            TaskCompletionSource firstFinishedLogging = new(TaskCreationOptions.RunContinuationsAsynchronously);
+            TaskCompletionSource secondFinishedLogging = new(TaskCreationOptions.RunContinuationsAsynchronously);
+
+            Task firstTask = Task.Run(async () => {
+                using (IDisposable scope = customCategoryLogger.BeginScope(new Dictionary<string, object> {
                     { "IntValue", "5" },
                     { "BoolValue", "true" },
                     { "StringValue", "test" } }.ToList()))
-            {
-                customCategoryLogger.LogInformation("Some warning message with {Arg}", 6);
-            }
+                {
+                    // Await for the other task to add its scopes.
+                    await secondSetScopes.Task.ConfigureAwait(false);
+
+                    customCategoryLogger.LogInformation("Some warning message with {Arg}", 6);
+
+                    // Signal other task to log
+                    firstFinishedLogging.SetResult();
+
+                    // Do not dispose scopes until the other task is done
+                    await secondFinishedLogging.Task.ConfigureAwait(false);
+                }
+            });
+
+            Task secondTask = Task.Run(async () => {
+                using (IDisposable scope = customCategoryLogger.BeginScope(new Dictionary<string, object> {
+                    { "IntValue", "6" },
+                    { "BoolValue", "false" },
+                    { "StringValue", "string" } }.ToList()))
+                {
+                    // Signal that we added our scopes and wait for the other task to log
+                    secondSetScopes.SetResult();
+                    await firstFinishedLogging.Task.ConfigureAwait(false);
+                    customCategoryLogger.LogInformation("Some other message with {Arg}", 7);
+                    secondFinishedLogging.SetResult();
+                }
+            });
 
+            await firstTask.ConfigureAwait(false);
+            await secondTask.ConfigureAwait(false);
+
+            //Json data is always converted to strings for ActivityStart events.
             customCategoryLogger.LogWarning(new EventId(7, "AnotherEventId"), "Another message");
 
             appCategoryLogger.LogInformation("Information message.");
index dfe8753b1f0188557b93a8f45a6be782e41638c5..c449efc5d1aa419029e0014b06cd6c9dd1783526 100644 (file)
@@ -263,6 +263,18 @@ namespace Microsoft.Diagnostics.Monitoring.EventPipe.UnitTests
             Assert.Equal(string.Empty, result.EventName);
             Validate(result.Scopes, ("BoolValue", "true"), ("StringValue", "test"), ("IntValue", "5"));
             Validate(result.Arguments, ("Arg", "6"));
+
+            message = reader.ReadLine();
+            Assert.NotNull(message);
+
+            result = JsonSerializer.Deserialize<LoggerTestResult>(message);
+            Assert.Equal("Some other message with 7", result.Message);
+            Assert.Equal(LoggerRemoteTestName, result.Category);
+            Assert.Equal("Information", result.LogLevel);
+            Assert.Equal(0, result.EventId);
+            Assert.Equal(string.Empty, result.EventName);
+            Validate(result.Scopes, ("BoolValue", "false"), ("StringValue", "string"), ("IntValue", "6"));
+            Validate(result.Arguments, ("Arg", "7"));
         }
 
         private static void ValidateLoggerRemoteCategoryWarningMessage(StreamReader reader)