* Refactoring to remove polling for type cache clearing session
authorJohn Salem <josalem@microsoft.com>
Sat, 2 Nov 2019 01:10:17 +0000 (18:10 -0700)
committerJohn Salem <josalem@microsoft.com>
Sat, 2 Nov 2019 01:10:17 +0000 (18:10 -0700)
* Switch to a static function on Stopwatch to avoid multi-threading
concerns with Stopwatch.Elapsed

* Change tool name when writing memory graph

* Add explicit null check on gcDumpSession to catch NRE in the event
EventPipe failed to connect.

* Add comment in README about future of code shared with PerfView

* Refactor to remove wrapping task for type flush session
* make EventPipeSession disposable to ensure stream is closed in case of
exception in the reader
* add using statements around source.Process calls
* add additional logging to verbose log

* Add additional logging
* add timeout option

* Remove unnecessary using statements

* Update src/Tools/dotnet-gcdump/CommandLine/CollectCommandHandler.cs

Co-Authored-By: Noah Falk <noahfalk@users.noreply.github.com>
* Update src/Tools/dotnet-gcdump/DotNetHeapDump/README.md

Co-Authored-By: Noah Falk <noahfalk@users.noreply.github.com>
* Fix weird unintended docs change

* Refactor to reduce multi-threading
* use cancellation token to cancel tasks on ctrl-c
* swallow TaskCancellationExceptions
* use volatile members for bools that are used between threads

* Check dump status on return to amke sure we don't throw additional
errors if event collection fails

* fix last whitespace issue

documentation/design-docs/ipc-protocol.md
documentation/dotnet-gcdump-instructions.md
src/Tools/dotnet-gcdump/CommandLine/CollectCommandHandler.cs
src/Tools/dotnet-gcdump/DotNetHeapDump/EventPipeDotNetHeapDumper.cs
src/Tools/dotnet-gcdump/DotNetHeapDump/README.md

index 6d122f1648f840665f98f809e4ca465b104a77c3..7c9025b403ea042564ae85fd461da5032e9e3ee2 100644 (file)
@@ -578,7 +578,7 @@ Header: `{ Magic; Size; 0x0101; 0x0000 }`
   * WithHeap = 2,
   * Triage = 3,
   * Full = 4
-* `uint diagnostics`: The providers to turn on for the streaming session
+* `uint diagnostics`: If set to 1, log to console the dump generation diagnostics
   * `0` or `1` for on or off
 
 #### Returns (as an IPC Message Payload):
index 1f0b8ae3e8813609995e35c5ee6d1376ebfc493b..e144e4ce70468b801cc2013c270bedc2a2c14935 100644 (file)
@@ -74,4 +74,5 @@ Options:
                                      where YYYYMMDD is Year/Month/Day and HHMMSS is Hour/Minute/Second. Otherwise, it is the full path
                                      and file name of the dump.
   -v, --verbose                      Output the log while collecting the gcdump
+  -t, --timeout <timeout>            Give up on collecting the gcdump if it takes longer the this many seconds. The default value is 30s
 ```
index 82cda9eec71ee2ee43649c0cfbae08bba7aaa62e..3eecca44c985f3e5db2d63625b7595ff98edb2fd 100644 (file)
@@ -18,7 +18,7 @@ namespace Microsoft.Diagnostics.Tools.GCDump
 {
     internal static class CollectCommandHandler
     {
-        delegate Task<int> CollectDelegate(CancellationToken ct, IConsole console, int processId, string output, bool verbose);
+        delegate Task<int> CollectDelegate(CancellationToken ct, IConsole console, int processId, string output, int timeout, bool verbose);
 
         /// <summary>
         /// Collects a gcdump from a currently running process.
@@ -28,7 +28,7 @@ namespace Microsoft.Diagnostics.Tools.GCDump
         /// <param name="processId">The process to collect the gcdump from.</param>
         /// <param name="output">The output path for the collected gcdump.</param>
         /// <returns></returns>
-        private static async Task<int> Collect(CancellationToken ct, IConsole console, int processId, string output, bool verbose)
+        private static async Task<int> Collect(CancellationToken ct, IConsole console, int processId, string output, int timeout, bool verbose)
         {
             try
             {
@@ -53,16 +53,31 @@ namespace Microsoft.Diagnostics.Tools.GCDump
                 {
                     var memoryGraph = new Graphs.MemoryGraph(50_000);
                     var heapInfo = new DotNetHeapInfo();
-                    EventPipeDotNetHeapDumper.DumpFromEventPipe(ct, processId, memoryGraph, verbose ? Console.Out : TextWriter.Null, heapInfo);
+                    if (!EventPipeDotNetHeapDumper.DumpFromEventPipe(ct, processId, memoryGraph, verbose ? Console.Out : TextWriter.Null, timeout, heapInfo))
+                        return false;
                     memoryGraph.AllowReading();
-                    GCHeapDump.WriteMemoryGraph(memoryGraph, outputFileInfo.FullName, "dotnet-trace");
+                    GCHeapDump.WriteMemoryGraph(memoryGraph, outputFileInfo.FullName, "dotnet-gcdump");
+                    return true;
                 });
 
-                await dumpTask;
+                var fDumpSuccess = await dumpTask;
 
-                outputFileInfo.Refresh();
-                Console.Out.WriteLine($"\tFinished writing {outputFileInfo.Length} bytes.");
-                return 0;
+                if (fDumpSuccess)
+                {
+                    outputFileInfo.Refresh();
+                    Console.Out.WriteLine($"\tFinished writing {outputFileInfo.Length} bytes.");
+                    return 0;
+                }
+                else if (ct.IsCancellationRequested)
+                {
+                    Console.Out.WriteLine($"\tCancelled.");
+                    return -1;
+                }
+                else
+                {
+                    Console.Out.WriteLine($"\tFailed to collect gcdump. Try running with '-v' for more information.");
+                    return -1;
+                }
             }
             catch (Exception ex)
             {
@@ -78,7 +93,8 @@ namespace Microsoft.Diagnostics.Tools.GCDump
                 symbols: new Option[] {
                     ProcessIdOption(),
                     OutputPathOption(),
-                    VerboseOption()
+                    VerboseOption(),
+                    TimeoutOption()
                 },
                 handler: HandlerDescriptor.FromDelegate((CollectDelegate)Collect).GetCommandHandler());
 
@@ -102,5 +118,13 @@ namespace Microsoft.Diagnostics.Tools.GCDump
                 description: $"Output the log while collecting the gcdump",
                 argument: new Argument<bool>(defaultValue: false) { Name = "verbose" },
                 isHidden: false);
+
+        private static int DefaultTimeout = 30;
+        private static Option TimeoutOption() =>
+            new Option(
+                aliases: new[] { "-t", "--timeout" },
+                description: $"Give up on collecting the gcdump if it takes longer than this many seconds. The default value is {DefaultTimeout}s",
+                argument: new Argument<int>(defaultValue: DefaultTimeout) { Name = "timeout" },
+                isHidden: false);
     }
 }
index a1ce9044a4be7ab44f3e8301bf148f6fac38a59e..440a2b2e5130a884da2dae0fe4dbf1e734a93475 100644 (file)
@@ -11,14 +11,16 @@ using System;
 using System.Collections.Generic;
 using System.Diagnostics;
 using System.IO;
-using System.Linq;
-using System.Text;
+using System.Threading;
 using System.Threading.Tasks;
 
 namespace Microsoft.Diagnostics.Tools.GCDump
 {
     public static class EventPipeDotNetHeapDumper
     {
+        internal static volatile bool eventPipeDataPresent = false;
+        internal static volatile bool dumpComplete = false;
+
         /// <summary>
         /// Given a factory for creating an EventPipe session with the appropriate provider and keywords turned on,
         /// generate a GCHeapDump using the resulting events.  The correct keywords and provider name
@@ -30,121 +32,115 @@ namespace Microsoft.Diagnostics.Tools.GCDump
         /// <param name="log"></param>
         /// <param name="dotNetInfo"></param>
         /// <returns></returns>
-        public static bool DumpFromEventPipe(CancellationToken ct, int processID, MemoryGraph memoryGraph, TextWriter log, DotNetHeapInfo dotNetInfo = null)
+        public static bool DumpFromEventPipe(CancellationToken ct, int processID, MemoryGraph memoryGraph, TextWriter log, int timeout, DotNetHeapInfo dotNetInfo = null)
         {
-            var sw = Stopwatch.StartNew();
+            var startTicks = Stopwatch.GetTimestamp();
+            Func<TimeSpan> getElapsed = () => TimeSpan.FromTicks(Stopwatch.GetTimestamp() - startTicks);
+
             var dumper = new DotNetHeapDumpGraphReader(log)
             {
                 DotNetHeapInfo = dotNetInfo
             };
-            bool dumpComplete = false;
-            bool listening = false;
 
-            EventPipeSession gcDumpSession = null;
-            Task readerTask = null;
             try
             {
-                bool eventPipeDataPresent = false;
-                TimeSpan lastEventPipeUpdate = sw.Elapsed;
-                EventPipeSession typeFlushSession = null;
+                TimeSpan lastEventPipeUpdate = getElapsed();
                 bool fDone = false;
-                var otherListening = false;
-                log.WriteLine("{0,5:n1}s: Creating type table flushing task", sw.Elapsed.TotalSeconds);
-                var typeTableFlushTask = Task.Factory.StartNew(() =>
+                log.WriteLine("{0,5:n1}s: Creating type table flushing task", getElapsed().TotalSeconds);
+
+                using (EventPipeSession typeFlushSession = new EventPipeSession(processID, new List<Provider> { new Provider("Microsoft-DotNETCore-SampleProfiler") }, false))
                 {
-                    typeFlushSession = new EventPipeSession(processID, new List<Provider> { new Provider("Microsoft-DotNETCore-SampleProfiler") }, false);
-                    otherListening = true;
-                    log.WriteLine("{0,5:n1}s: Flushing the type table", sw.Elapsed.TotalSeconds);
-                    typeFlushSession.Source.AllEvents += Task.Run(() => 
-                    {
+                    log.WriteLine("{0,5:n1}s: Flushing the type table", getElapsed().TotalSeconds);
+                    typeFlushSession.Source.AllEvents += (traceEvent) => {
                         if (!fDone)
                         {
                             fDone = true;
-                            typeFlushSession.EndSession();
+                            Task.Run(() => typeFlushSession.EndSession())
+                                .ContinueWith(_ => typeFlushSession.Source.StopProcessing());
                         }
-                    });
+                    };
+
                     typeFlushSession.Source.Process();
-                    log.WriteLine("{0,5:n1}s: Done flushing the type table", sw.Elapsed.TotalSeconds);
-                });
+                    log.WriteLine("{0,5:n1}s: Done flushing the type table", getElapsed().TotalSeconds);
+                }
 
-                await typeTableFlushTask;
 
-                // Set up a separate thread that will listen for EventPipe events coming back telling us we succeeded. 
-                readerTask = Task.Factory.StartNew(delegate
-                {
-                    // Start the providers and trigger the GCs.  
-                    log.WriteLine("{0,5:n1}s: Requesting a .NET Heap Dump", sw.Elapsed.TotalSeconds);
+                // Start the providers and trigger the GCs.  
+                log.WriteLine("{0,5:n1}s: Requesting a .NET Heap Dump", getElapsed().TotalSeconds);
 
-                    gcDumpSession = new EventPipeSession(processID, new List<Provider> { new Provider("Microsoft-Windows-DotNETRuntime", (ulong)(ClrTraceEventParser.Keywords.GCHeapSnapshot)) });
-                    int gcNum = -1;
+                using EventPipeSession gcDumpSession = new EventPipeSession(processID, new List<Provider> { new Provider("Microsoft-Windows-DotNETRuntime", (ulong)(ClrTraceEventParser.Keywords.GCHeapSnapshot)) });
+                log.WriteLine("{0,5:n1}s: gcdump EventPipe Session started", getElapsed().TotalSeconds);
 
-                    gcDumpSession.Source.Clr.GCStart += delegate (GCStartTraceData data)
+                int gcNum = -1;
+
+                gcDumpSession.Source.Clr.GCStart += delegate (GCStartTraceData data)
+                {
+                    if (data.ProcessID != processID)
                     {
-                        if (data.ProcessID != processID)
-                        {
-                            return;
-                        }
+                        return;
+                    }
 
-                        eventPipeDataPresent = true;
+                    eventPipeDataPresent = true;
 
-                        if (gcNum < 0 && data.Depth == 2 && data.Type != GCType.BackgroundGC)
-                        {
-                            gcNum = data.Count;
-                            log.WriteLine("{0,5:n1}s: .NET Dump Started...", sw.Elapsed.TotalSeconds);
-                        }
-                    };
-
-                    gcDumpSession.Source.Clr.GCStop += delegate (GCEndTraceData data)
+                    if (gcNum < 0 && data.Depth == 2 && data.Type != GCType.BackgroundGC)
                     {
-                        if (data.ProcessID != processID)
-                        {
-                            return;
-                        }
-
-                        if (data.Count == gcNum)
-                        {
-                            log.WriteLine("{0,5:n1}s: .NET GC Complete.", sw.Elapsed.TotalSeconds);
-                            dumpComplete = true;
-                        }
-                    };
+                        gcNum = data.Count;
+                        log.WriteLine("{0,5:n1}s: .NET Dump Started...", getElapsed().TotalSeconds);
+                    }
+                };
 
-                    gcDumpSession.Source.Clr.GCBulkNode += delegate (GCBulkNodeTraceData data)
+                gcDumpSession.Source.Clr.GCStop += delegate (GCEndTraceData data)
+                {
+                    if (data.ProcessID != processID)
                     {
-                        if (data.ProcessID != processID)
-                        {
-                            return;
-                        }
+                        return;
+                    }
 
-                        eventPipeDataPresent = true;
+                    if (data.Count == gcNum)
+                    {
+                        log.WriteLine("{0,5:n1}s: .NET GC Complete.", getElapsed().TotalSeconds);
+                        dumpComplete = true;
+                    }
+                };
 
-                        if ((sw.Elapsed - lastEventPipeUpdate).TotalMilliseconds > 500)
-                        {
-                            log.WriteLine("{0,5:n1}s: Making GC Heap Progress...", sw.Elapsed.TotalSeconds);
-                        }
+                gcDumpSession.Source.Clr.GCBulkNode += delegate (GCBulkNodeTraceData data)
+                {
+                    if (data.ProcessID != processID)
+                    {
+                        return;
+                    }
 
-                        lastEventPipeUpdate = sw.Elapsed;
-                    };
+                    eventPipeDataPresent = true;
 
-                    if (memoryGraph != null)
+                    if ((getElapsed() - lastEventPipeUpdate).TotalMilliseconds > 500)
                     {
-                        dumper.SetupCallbacks(memoryGraph, gcDumpSession.Source, processID.ToString());
+                        log.WriteLine("{0,5:n1}s: Making GC Heap Progress...", getElapsed().TotalSeconds);
                     }
 
-                    listening = true;
-                    gcDumpSession.Source.Process();
-                    log.WriteLine("{0,5:n1}s: EventPipe Listener dying", sw.Elapsed.TotalSeconds);
-                });
+                    lastEventPipeUpdate = getElapsed();
+                };
 
-                // Wait for thread above to start listening (should be very fast)
-                while (!listening)
+                if (memoryGraph != null)
                 {
-                    readerTask.Wait(1);
+                    dumper.SetupCallbacks(memoryGraph, gcDumpSession.Source, processID.ToString());
                 }
 
+                // Set up a separate thread that will listen for EventPipe events coming back telling us we succeeded. 
+                Task readerTask = Task.Run(() =>
+                {
+                    // cancelled before we began
+                    if (ct.IsCancellationRequested)
+                        return;
+                    log.WriteLine("{0,5:n1}s: Starting to process events", getElapsed().TotalSeconds);
+                    gcDumpSession.Source.Process();
+                    log.WriteLine("{0,5:n1}s: EventPipe Listener dying", getElapsed().TotalSeconds);
+                }, ct);
+
                 for (; ; )
                 {
                     if (ct.IsCancellationRequested)
                     {
+                        log.WriteLine("{0,5:n1}s: Cancelling...", getElapsed().TotalSeconds);
                         break;
                     }
 
@@ -153,15 +149,15 @@ namespace Microsoft.Diagnostics.Tools.GCDump
                         break;
                     }
 
-                    if (!eventPipeDataPresent && sw.Elapsed.TotalSeconds > 5)      // Assume it started within 5 seconds.  
+                    if (!eventPipeDataPresent && getElapsed().TotalSeconds > 5)      // Assume it started within 5 seconds.  
                     {
-                        log.WriteLine("{0,5:n1}s: Assume no .NET Heap", sw.Elapsed.TotalSeconds);
+                        log.WriteLine("{0,5:n1}s: Assume no .NET Heap", getElapsed().TotalSeconds);
                         break;
                     }
 
-                    if (sw.Elapsed.TotalSeconds > 30)       // Time out after 30 seconds. 
+                    if (getElapsed().TotalSeconds > timeout)       // Time out after `timeout` seconds. defaults to 30s.
                     {
-                        log.WriteLine("{0,5:n1}s: Timed out after 20 seconds", sw.Elapsed.TotalSeconds);
+                        log.WriteLine("{0,5:n1}s: Timed out after {1} seconds", getElapsed().TotalSeconds, timeout);
                         break;
                     }
 
@@ -171,11 +167,33 @@ namespace Microsoft.Diagnostics.Tools.GCDump
                     }
                 }
 
-                log.WriteLine("{0,5:n1}s: Shutting down EventPipe session", sw.Elapsed.TotalSeconds);
-                gcDumpSession.EndSession();
+                var stopTask = Task.Run(() =>
+                {
+                    log.WriteLine("{0,5:n1}s: Shutting down gcdump EventPipe session", getElapsed().TotalSeconds);
+                    gcDumpSession.EndSession();
+                    log.WriteLine("{0,5:n1}s: gcdump EventPipe session shut down", getElapsed().TotalSeconds);
+                }, ct);
+
+                try
+                {
+                    while (!Task.WaitAll(new Task[] { readerTask, stopTask }, 1000))
+                        log.WriteLine("{0,5:n1}s: still reading...", getElapsed().TotalSeconds);
+                }
+                catch (AggregateException ae) // no need to throw if we're just cancelling the tasks
+                {
+                    foreach (var e in ae.Flatten().InnerExceptions)
+                    {
+                        if (!(e is TaskCanceledException))
+                        {
+                            throw ae;
+                        }
+                    }
+                }
+
+                log.WriteLine("{0,5:n1}s: gcdump EventPipe Session closed", getElapsed().TotalSeconds);
 
-                while (!readerTask.Wait(1000))
-                    log.WriteLine("{0,5:n1}s: still reading...", sw.Elapsed.TotalSeconds);
+                if (ct.IsCancellationRequested)
+                    return false;
 
                 if (eventPipeDataPresent)
                 {
@@ -184,16 +202,16 @@ namespace Microsoft.Diagnostics.Tools.GCDump
             }
             catch (Exception e)
             {
-                log.WriteLine($"{sw.Elapsed.TotalSeconds:0,5:n1}s: [Error] Exception during gcdump: {e.ToString()}");
+                log.WriteLine($"{getElapsed().TotalSeconds,5:n1}s: [Error] Exception during gcdump: {e.ToString()}");
             }
 
-            log.WriteLine("[{0,5:n1}s: Done Dumping .NET heap success={1}]", sw.Elapsed.TotalSeconds, dumpComplete);
+            log.WriteLine("[{0,5:n1}s: Done Dumping .NET heap success={1}]", getElapsed().TotalSeconds, dumpComplete);
 
             return dumpComplete;
         }
     }
 
-    internal class EventPipeSession
+    internal class EventPipeSession : IDisposable
     {
         private List<Provider> _providers;
         private Stream _eventPipeStream;
@@ -223,5 +241,27 @@ namespace Microsoft.Diagnostics.Tools.GCDump
         {
             EventPipeClient.StopTracing(_pid, _sessionId);
         }
+
+        #region IDisposable Support
+        private bool disposedValue = false; // To detect redundant calls
+
+        protected virtual void Dispose(bool disposing)
+        {
+            if (!disposedValue)
+            {
+                if (disposing)
+                {
+                    _eventPipeStream?.Dispose();
+                    _source?.Dispose();
+                }
+                disposedValue = true;
+            }
+        }
+
+        public void Dispose()
+        {
+            Dispose(true);
+        }
+        #endregion
     }
 }
index c9a078762b1a6969062f5793753da07967520af6..71774afead0b464842dbab555dba257be562df2d 100644 (file)
@@ -1,12 +1,14 @@
 # DotNetHeapDump
 
-The following code files were copied in entirety with minimal changes from the Microsoft/Perfview repository on github: https://github.com/Microsoft/PerfView.
+The following code files were copied in their entirety from the Microsoft/Perfview repository on github: https://github.com/Microsoft/PerfView.
 
 This was done simply because refactoring the TraceEvent library to include these classes proved to be too disruptive. Diamond dependency refactoring and
 mismatched target frameworks made the refactoring too disruptive to the TraceEvent library.
 
 This code should be treated as read-only.  Any changes that _do_ need to be made should be mirrored to Microsoft/PerfView _and_ documented here.
 
+The intent is to unify these code files with the code in Microsoft/PerfView, either via automated mirroring, or exposing the logic through TraceEvent.
+
 ## Files:
 
 * DotNetHeapInfo.cs (https://github.com/microsoft/perfview/blob/76dc28af873e27aa8c4f9ce8efa0971a2c738165/src/HeapDumpCommon/DotNetHeapInfo.cs)
@@ -43,4 +45,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
\ No newline at end of file
+SOFTWARE.