From 97de8816f495e1af73586b45024cd76bf81c0ec1 Mon Sep 17 00:00:00 2001 From: Eugene Zhirov Date: Tue, 21 Nov 2023 09:27:55 +0100 Subject: [PATCH] Add RequestStackwalk parameter to EventPipeSession (#4290) Client-side of https://github.com/dotnet/runtime/pull/84077 and the implementation of https://github.com/dotnet/diagnostics/issues/3696. To simplify the interface I made `EventPipeSessionConfiguration` public and introduced a new method in the DiagnosticsClient: `Task StartEventPipeSessionAsync(EventPipeSessionConfiguration configuration, CancellationToken token)`. This is the only method that supports disabling the stackwalk so no additional overloads with a new bool parameter and no synchronous counterpart. I believe it'd be easier to use and maintain a single async method with the options rather than creating more overloads or default parameters but I may not have all the context here so please correct me if you think it's a bad idea. To deal with the backward compatibility I only use `CollectTracingV3` when necessary i.e. when `RequestStackwalk` option is set to false. I think it's a good compromise between the added complexity and potentially surprising behavior: * when the client is old and the runtime is new everything works because the runtime supports `CollectTracingV2` * when the client is new but the runtime is old everything works until the new option is used. When it's used the session won't start as `CollectTracingV3` doesn't exist server side: there'd be no clear error message but it's documented in the option summary. * when both the client and the runtime are new either `CollectTracingV2` or `CollectTracingV3` may be used transparently for the user * we may use the same trick when we introduce `CollectTracingV4` The alternative is to implement version negotiation of some sort but I'd like to have your opinion before attempting this as handling the errors correctly wouldn't be easy (f.e. in [my current fork](https://github.com/criteo-forks/diagnostics/commit/3946b4a88bbb08cdbbf4e71364fd46d32c4ab3dc#diff-e8365039cd36eae3dec611784fc7076be7dadeda1007733412aaaa63f40a748fR39) I just hide the exception) The testing turned out to be a bit complex as I needed to convert EventPipe stream to `TraceLog` to be able to read the stacktraces. I couldn't achieve that without writing data to a file. Afaiu the stackwalk may not work correctly without the rundown that only happens at the end of the session so I wonder if looking at the stacktraces with a live session is even possible (though iirc netfw+ETW could do that back in the days) ? Thanks for your time ! --- documentation/design-docs/ipc-protocol.md | 75 +++++++++++- .../DiagnosticsClient/DiagnosticsClient.cs | 25 +++- .../DiagnosticsClient/EventPipeSession.cs | 18 +-- .../EventPipeSessionConfiguration.cs | 107 +++++++++++++++--- .../DiagnosticsIpc/IpcCommands.cs | 1 + .../DiagnosticsClientApiShim.cs | 11 ++ .../DiagnosticsClientApiShimExtensions.cs | 5 + .../EventPipeSessionTests.cs | 69 +++++++++++ 8 files changed, 278 insertions(+), 33 deletions(-) diff --git a/documentation/design-docs/ipc-protocol.md b/documentation/design-docs/ipc-protocol.md index a8c2496a9..d55f815a4 100644 --- a/documentation/design-docs/ipc-protocol.md +++ b/documentation/design-docs/ipc-protocol.md @@ -370,7 +370,7 @@ enum class ProfilerCommandId : uint8_t AttachProfiler = 0x01, // future } -``` +``` See: [Profiler Commands](#Profiler-Commands) ```c++ @@ -460,7 +460,7 @@ Payload array providers } -provider_config +provider_config { ulong keywords, uint logLevel, @@ -482,7 +482,7 @@ Followed by an Optional Continuation of a `nettrace` format stream of events. Command Code: `0x0203` -The `CollectTracing2` Command is an extension of the `CollectTracing` command - its behavior is the same as `CollectTracing` command, except that it has another field that lets you specify whether rundown events should be fired by the runtime. +The `CollectTracing2` command is an extension of the `CollectTracing` command - its behavior is the same as `CollectTracing` command, except that it has another field that lets you specify whether rundown events should be fired by the runtime. #### Inputs: @@ -500,7 +500,7 @@ A `provider_config` is composed of the following data: * `string filter_data` (optional): Filter information > see ETW documentation for a more detailed explanation of Keywords, Filters, and Log Level. -> +> #### Returns (as an IPC Message Payload): Header: `{ Magic; 28; 0xFF00; 0x0000; }` @@ -520,7 +520,7 @@ Payload array providers } -provider_config +provider_config { ulong keywords, uint logLevel, @@ -538,7 +538,70 @@ Payload ``` Followed by an Optional Continuation of a `nettrace` format stream of events. -### `StopTracing` +### `CollectTracing3` + +Command Code: `0x0204` + +The `CollectTracing3` command is an extension of the `CollectTracing2` command - its behavior is the same as `CollectTracing2` command, except that it has another field that lets you specify whether the stackwalk should be made for each event. + +#### Inputs: + +Header: `{ Magic; Size; 0x0203; 0x0000 }` + +* `uint circularBufferMB`: The size of the circular buffer used for buffering event data while streaming +* `uint format`: 0 for the legacy NetPerf format and 1 for the NetTrace format +* `bool requestRundown`: Indicates whether rundown should be fired by the runtime. +* `bool requestStackwalk`: Indicates whether stacktrace information should be recorded. +* `array providers`: The providers to turn on for the streaming session + +A `provider_config` is composed of the following data: +* `ulong keywords`: The keywords to turn on with this providers +* `uint logLevel`: The level of information to turn on +* `string provider_name`: The name of the provider +* `string filter_data` (optional): Filter information + +> see ETW documentation for a more detailed explanation of Keywords, Filters, and Log Level. +> +#### Returns (as an IPC Message Payload): + +Header: `{ Magic; 28; 0xFF00; 0x0000; }` + +`CollectTracing2` returns: +* `ulong sessionId`: the ID for the stream session starting on the current connection + +##### Details: + +Input: +``` +Payload +{ + uint circularBufferMB, + uint format, + bool requestRundown, + bool requestStackwalk, + array providers +} + +provider_config +{ + ulong keywords, + uint logLevel, + string provider_name, + string filter_data (optional) +} +``` + +Returns: +```c +Payload +{ + ulong sessionId +} +``` +Followed by an Optional Continuation of a `nettrace` format stream of events. + + +### `StopTracing` Command Code: `0x0201` diff --git a/src/Microsoft.Diagnostics.NETCore.Client/DiagnosticsClient/DiagnosticsClient.cs b/src/Microsoft.Diagnostics.NETCore.Client/DiagnosticsClient/DiagnosticsClient.cs index 18f273b79..564ade999 100644 --- a/src/Microsoft.Diagnostics.NETCore.Client/DiagnosticsClient/DiagnosticsClient.cs +++ b/src/Microsoft.Diagnostics.NETCore.Client/DiagnosticsClient/DiagnosticsClient.cs @@ -70,7 +70,8 @@ namespace Microsoft.Diagnostics.NETCore.Client /// public EventPipeSession StartEventPipeSession(IEnumerable providers, bool requestRundown = true, int circularBufferMB = DefaultCircularBufferMB) { - return EventPipeSession.Start(_endpoint, providers, requestRundown, circularBufferMB); + EventPipeSessionConfiguration config = new(providers, circularBufferMB, requestRundown: requestRundown, requestStackwalk: true); + return EventPipeSession.Start(_endpoint, config); } /// @@ -84,7 +85,8 @@ namespace Microsoft.Diagnostics.NETCore.Client /// public EventPipeSession StartEventPipeSession(EventPipeProvider provider, bool requestRundown = true, int circularBufferMB = DefaultCircularBufferMB) { - return EventPipeSession.Start(_endpoint, new[] { provider }, requestRundown, circularBufferMB); + EventPipeSessionConfiguration config = new(new[] {provider}, circularBufferMB, requestRundown: requestRundown, requestStackwalk: true); + return EventPipeSession.Start(_endpoint, config); } /// @@ -100,7 +102,8 @@ namespace Microsoft.Diagnostics.NETCore.Client public Task StartEventPipeSessionAsync(IEnumerable providers, bool requestRundown, int circularBufferMB = DefaultCircularBufferMB, CancellationToken token = default) { - return EventPipeSession.StartAsync(_endpoint, providers, requestRundown, circularBufferMB, token); + EventPipeSessionConfiguration config = new(providers, circularBufferMB, requestRundown: requestRundown, requestStackwalk: true); + return EventPipeSession.StartAsync(_endpoint, config, token); } /// @@ -116,7 +119,21 @@ namespace Microsoft.Diagnostics.NETCore.Client public Task StartEventPipeSessionAsync(EventPipeProvider provider, bool requestRundown, int circularBufferMB = DefaultCircularBufferMB, CancellationToken token = default) { - return EventPipeSession.StartAsync(_endpoint, new[] { provider }, requestRundown, circularBufferMB, token); + EventPipeSessionConfiguration config = new(new[] {provider}, circularBufferMB, requestRundown: requestRundown, requestStackwalk: true); + return EventPipeSession.StartAsync(_endpoint, config, token); + } + + /// + /// Start tracing the application and return an EventPipeSession object + /// + /// Configuration of this EventPipeSession + /// The token to monitor for cancellation requests. + /// + /// An EventPipeSession object representing the EventPipe session that just started. + /// + public Task StartEventPipeSessionAsync(EventPipeSessionConfiguration configuration, CancellationToken token) + { + return EventPipeSession.StartAsync(_endpoint, configuration, token); } /// diff --git a/src/Microsoft.Diagnostics.NETCore.Client/DiagnosticsClient/EventPipeSession.cs b/src/Microsoft.Diagnostics.NETCore.Client/DiagnosticsClient/EventPipeSession.cs index d61aace34..355a815fe 100644 --- a/src/Microsoft.Diagnostics.NETCore.Client/DiagnosticsClient/EventPipeSession.cs +++ b/src/Microsoft.Diagnostics.NETCore.Client/DiagnosticsClient/EventPipeSession.cs @@ -28,16 +28,16 @@ namespace Microsoft.Diagnostics.NETCore.Client public Stream EventStream => _response.Continuation; - internal static EventPipeSession Start(IpcEndpoint endpoint, IEnumerable providers, bool requestRundown, int circularBufferMB) + internal static EventPipeSession Start(IpcEndpoint endpoint, EventPipeSessionConfiguration config) { - IpcMessage requestMessage = CreateStartMessage(providers, requestRundown, circularBufferMB); + IpcMessage requestMessage = CreateStartMessage(config); IpcResponse? response = IpcClient.SendMessageGetContinuation(endpoint, requestMessage); return CreateSessionFromResponse(endpoint, ref response, nameof(Start)); } - internal static async Task StartAsync(IpcEndpoint endpoint, IEnumerable providers, bool requestRundown, int circularBufferMB, CancellationToken cancellationToken) + internal static async Task StartAsync(IpcEndpoint endpoint, EventPipeSessionConfiguration config, CancellationToken cancellationToken) { - IpcMessage requestMessage = CreateStartMessage(providers, requestRundown, circularBufferMB); + IpcMessage requestMessage = CreateStartMessage(config); IpcResponse? response = await IpcClient.SendMessageGetContinuationAsync(endpoint, requestMessage, cancellationToken).ConfigureAwait(false); return CreateSessionFromResponse(endpoint, ref response, nameof(StartAsync)); } @@ -81,10 +81,14 @@ namespace Microsoft.Diagnostics.NETCore.Client } } - private static IpcMessage CreateStartMessage(IEnumerable providers, bool requestRundown, int circularBufferMB) + private static IpcMessage CreateStartMessage(EventPipeSessionConfiguration config) { - EventPipeSessionConfiguration config = new(circularBufferMB, EventPipeSerializationFormat.NetTrace, providers, requestRundown); - return new IpcMessage(DiagnosticsServerCommandSet.EventPipe, (byte)EventPipeCommandId.CollectTracing2, config.SerializeV2()); + // To keep backward compatibility with older runtimes we only use newer serialization format when needed + // V3 has added support to disable the stacktraces + bool shouldUseV3 = !config.RequestStackwalk; + EventPipeCommandId command = shouldUseV3 ? EventPipeCommandId.CollectTracing3 : EventPipeCommandId.CollectTracing2; + byte[] payload = shouldUseV3 ? config.SerializeV3() : config.SerializeV2(); + return new IpcMessage(DiagnosticsServerCommandSet.EventPipe, (byte)command, payload); } private static EventPipeSession CreateSessionFromResponse(IpcEndpoint endpoint, ref IpcResponse? response, string operationName) diff --git a/src/Microsoft.Diagnostics.NETCore.Client/DiagnosticsClient/EventPipeSessionConfiguration.cs b/src/Microsoft.Diagnostics.NETCore.Client/DiagnosticsClient/EventPipeSessionConfiguration.cs index 0e837d908..d4884b650 100644 --- a/src/Microsoft.Diagnostics.NETCore.Client/DiagnosticsClient/EventPipeSessionConfiguration.cs +++ b/src/Microsoft.Diagnostics.NETCore.Client/DiagnosticsClient/EventPipeSessionConfiguration.cs @@ -4,6 +4,7 @@ using System; using System.Collections.Generic; using System.IO; +using System.Linq; namespace Microsoft.Diagnostics.NETCore.Client { @@ -13,9 +14,29 @@ namespace Microsoft.Diagnostics.NETCore.Client NetTrace } - internal class EventPipeSessionConfiguration + public sealed class EventPipeSessionConfiguration { - public EventPipeSessionConfiguration(int circularBufferSizeMB, EventPipeSerializationFormat format, IEnumerable providers, bool requestRundown = true) + /// + /// Creates a new configuration object for the EventPipeSession. + /// For details, see the documentation of each property of this object. + /// + /// An IEnumerable containing the list of Providers to turn on. + /// The size of the runtime's buffer for collecting events in MB + /// If true, request rundown events from the runtime. + /// If true, record a stacktrace for every emitted event. + public EventPipeSessionConfiguration( + IEnumerable providers, + int circularBufferSizeMB = 256, + bool requestRundown = true, + bool requestStackwalk = true) : this(circularBufferSizeMB, EventPipeSerializationFormat.NetTrace, providers, requestRundown, requestStackwalk) + {} + + private EventPipeSessionConfiguration( + int circularBufferSizeMB, + EventPipeSerializationFormat format, + IEnumerable providers, + bool requestRundown, + bool requestStackwalk) { if (circularBufferSizeMB == 0) { @@ -35,36 +56,60 @@ namespace Microsoft.Diagnostics.NETCore.Client CircularBufferSizeInMB = circularBufferSizeMB; Format = format; RequestRundown = requestRundown; + RequestStackwalk = requestStackwalk; _providers = new List(providers); } + /// + /// If true, request rundown events from the runtime. + /// + /// Rundown events are needed to correctly decode the stacktrace information for dynamically generated methods. + /// Rundown happens at the end of the session. It increases the time needed to finish the session and, for large applications, may have important impact on the final trace file size. + /// Consider to set this parameter to false if you don't need stacktrace information or if you're analyzing events on the fly. + /// + /// public bool RequestRundown { get; } + + /// + /// The size of the runtime's buffer for collecting events in MB. + /// If the buffer size is too small to accommodate all in-flight events some events may be lost. + /// public int CircularBufferSizeInMB { get; } - public EventPipeSerializationFormat Format { get; } + /// + /// If true, record a stacktrace for every emitted event. + /// + /// The support of this parameter only comes with NET 9. Before, the stackwalk is always enabled and if this property is set to false the connection attempt will fail. + /// Disabling the stackwalk makes event collection overhead considerably less + /// Note that some events may choose to omit the stacktrace regardless of this parameter, specifically the events emitted from the native runtime code. + /// If the stacktrace collection is disabled application-wide (using the env variable DOTNET_EventPipeEnableStackwalk) this parameter is ignored. + /// + /// + public bool RequestStackwalk { get; } + + /// + /// Providers to enable for this session. + /// public IReadOnlyCollection Providers => _providers.AsReadOnly(); private readonly List _providers; - public byte[] SerializeV2() + internal EventPipeSerializationFormat Format { get; } + } + + internal static class EventPipeSessionConfigurationExtensions + { + public static byte[] SerializeV2(this EventPipeSessionConfiguration config) { byte[] serializedData = null; using (MemoryStream stream = new()) using (BinaryWriter writer = new(stream)) { - writer.Write(CircularBufferSizeInMB); - writer.Write((uint)Format); - writer.Write(RequestRundown); - - writer.Write(Providers.Count); - foreach (EventPipeProvider provider in Providers) - { - writer.Write(unchecked((ulong)provider.Keywords)); - writer.Write((uint)provider.EventLevel); + writer.Write(config.CircularBufferSizeInMB); + writer.Write((uint)config.Format); + writer.Write(config.RequestRundown); - writer.WriteString(provider.Name); - writer.WriteString(provider.GetArgumentString()); - } + SerializeProviders(config, writer); writer.Flush(); serializedData = stream.ToArray(); @@ -73,6 +118,36 @@ namespace Microsoft.Diagnostics.NETCore.Client return serializedData; } + public static byte[] SerializeV3(this EventPipeSessionConfiguration config) + { + byte[] serializedData = null; + using (MemoryStream stream = new()) + using (BinaryWriter writer = new(stream)) + { + writer.Write(config.CircularBufferSizeInMB); + writer.Write((uint)config.Format); + writer.Write(config.RequestRundown); + writer.Write(config.RequestStackwalk); + + SerializeProviders(config, writer); + writer.Flush(); + serializedData = stream.ToArray(); + } + + return serializedData; + } + + private static void SerializeProviders(EventPipeSessionConfiguration config, BinaryWriter writer) + { + writer.Write(config.Providers.Count); + foreach (EventPipeProvider provider in config.Providers) + { + writer.Write(unchecked((ulong)provider.Keywords)); + writer.Write((uint)provider.EventLevel); + writer.WriteString(provider.Name); + writer.WriteString(provider.GetArgumentString()); + } + } } } diff --git a/src/Microsoft.Diagnostics.NETCore.Client/DiagnosticsIpc/IpcCommands.cs b/src/Microsoft.Diagnostics.NETCore.Client/DiagnosticsIpc/IpcCommands.cs index f9eb3019e..b9d6ea30a 100644 --- a/src/Microsoft.Diagnostics.NETCore.Client/DiagnosticsIpc/IpcCommands.cs +++ b/src/Microsoft.Diagnostics.NETCore.Client/DiagnosticsIpc/IpcCommands.cs @@ -25,6 +25,7 @@ namespace Microsoft.Diagnostics.NETCore.Client StopTracing = 0x01, CollectTracing = 0x02, CollectTracing2 = 0x03, + CollectTracing3 = 0x04, } internal enum DumpCommandId : byte diff --git a/src/tests/Microsoft.Diagnostics.NETCore.Client/DiagnosticsClientApiShim.cs b/src/tests/Microsoft.Diagnostics.NETCore.Client/DiagnosticsClientApiShim.cs index 21aa5a6e4..40458a841 100644 --- a/src/tests/Microsoft.Diagnostics.NETCore.Client/DiagnosticsClientApiShim.cs +++ b/src/tests/Microsoft.Diagnostics.NETCore.Client/DiagnosticsClientApiShim.cs @@ -63,6 +63,17 @@ namespace Microsoft.Diagnostics.NETCore.Client } } + public async Task StartEventPipeSession(EventPipeSessionConfiguration config, TimeSpan timeout) + { + if (_useAsync) + { + CancellationTokenSource cancellation = new(timeout); + return await _client.StartEventPipeSessionAsync(config, cancellation.Token).ConfigureAwait(false); + } + + throw new NotSupportedException($"{nameof(StartEventPipeSession)} with config parameter is only supported on async path"); + } + public async Task StartEventPipeSession(IEnumerable providers, TimeSpan timeout) { if (_useAsync) diff --git a/src/tests/Microsoft.Diagnostics.NETCore.Client/DiagnosticsClientApiShimExtensions.cs b/src/tests/Microsoft.Diagnostics.NETCore.Client/DiagnosticsClientApiShimExtensions.cs index dc5492e5f..2f83077f4 100644 --- a/src/tests/Microsoft.Diagnostics.NETCore.Client/DiagnosticsClientApiShimExtensions.cs +++ b/src/tests/Microsoft.Diagnostics.NETCore.Client/DiagnosticsClientApiShimExtensions.cs @@ -37,6 +37,11 @@ namespace Microsoft.Diagnostics.NETCore.Client return shim.StartEventPipeSession(provider, DefaultPositiveVerificationTimeout); } + public static Task StartEventPipeSession(this DiagnosticsClientApiShim shim, EventPipeSessionConfiguration config) + { + return shim.StartEventPipeSession(config, DefaultPositiveVerificationTimeout); + } + public static Task EnablePerfMap(this DiagnosticsClientApiShim shim, PerfMapType type) { return shim.EnablePerfMap(type, DefaultPositiveVerificationTimeout); diff --git a/src/tests/Microsoft.Diagnostics.NETCore.Client/EventPipeSessionTests.cs b/src/tests/Microsoft.Diagnostics.NETCore.Client/EventPipeSessionTests.cs index 9bfeb9c9e..e5344e074 100644 --- a/src/tests/Microsoft.Diagnostics.NETCore.Client/EventPipeSessionTests.cs +++ b/src/tests/Microsoft.Diagnostics.NETCore.Client/EventPipeSessionTests.cs @@ -4,10 +4,14 @@ using System; using System.Collections.Generic; using System.Diagnostics.Tracing; +using System.IO; +using System.Linq; using System.Threading; using System.Threading.Tasks; using Microsoft.Diagnostics.TestHelpers; using Microsoft.Diagnostics.Tracing; +using Microsoft.Diagnostics.Tracing.Etlx; +using Microsoft.Diagnostics.Tracing.Session; using Xunit; using Xunit.Abstractions; using Xunit.Extensions; @@ -170,5 +174,70 @@ namespace Microsoft.Diagnostics.NETCore.Client Assert.True(session.EventStream != null); runner.Stop(); } + + [SkippableTheory, MemberData(nameof(Configurations))] + public async Task StartEventPipeSessionWithoutStackwalkTestAsync(TestConfiguration testConfig) + { + if (testConfig.RuntimeFrameworkVersionMajor < 9) + { + throw new SkipTestException("Not supported on < .NET 9.0"); + } + + await using TestRunner runner = await TestRunner.Create(testConfig, _output, "Tracee"); + await runner.Start(testProcessTimeout: 60_000); + DiagnosticsClientApiShim clientShim = new(new DiagnosticsClient(runner.Pid), useAsync: true); + + var config = new EventPipeSessionConfiguration( + new[] { + new EventPipeProvider("System.Runtime", EventLevel.Informational, 0, new Dictionary() { + { "EventCounterIntervalSec", "1" } + }) + }, + circularBufferSizeMB: 256, + requestRundown: true, + requestStackwalk: true); + + string nettraceFileName = Path.GetTempFileName(); + using (EventPipeSession session = await clientShim.StartEventPipeSession(config)) + { + var tmpFileStream = File.Create(nettraceFileName); + + Task streamTask = Task.Run(() => { + try + { + session.EventStream.CopyTo(tmpFileStream); + } + catch (Exception ex) + { + // This exception can happen if the target process exits while EventPipeEventSource is in the middle of reading from the pipe. + runner.WriteLine($"Error encountered while processing events {ex}"); + } + finally + { + runner.WakeupTracee(); + } + }); + runner.WriteLine("Waiting for stream Task"); + streamTask.Wait(10000); + runner.WriteLine("Done waiting for stream Task"); + session.Stop(); + await streamTask; + + tmpFileStream.Close(); + runner.WriteLine($"EventPipe file is written, size: {new FileInfo(nettraceFileName).Length} bytes"); + } + + string etlxFileName = TraceLog.CreateFromEventPipeDataFile(nettraceFileName); + using (TraceLog log = TraceLog.OpenOrConvert(etlxFileName)) + { + foreach (TraceEvent e in log.Events) + { + runner.WriteLine($"PARSED {e.ProviderName} {e.EventName} {e.CallStack()?.CodeAddress?.FullMethodName}"); + Assert.True(e.CallStack() == null); + } + } + + runner.Stop(); + } } } -- 2.34.1