From: Pat Gavlin Date: Thu, 16 Mar 2017 00:28:42 +0000 (-0700) Subject: Take crash dumps for timed-out tests. X-Git-Tag: submit/tizen/20210909.063632~11030^2~7698^2 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=a0b691dbdff800904f6a64dc6a9525ddc8c275f0;p=platform%2Fupstream%2Fdotnet%2Fruntime.git Take crash dumps for timed-out tests. This change updates the test infrastructure to collect crash dumps for tests that time out. We've been seeing a number of tests that only time out in CI (e.g. dotnet/coreclr#10076); hopefully this will help us root-cause the timeouts. Commit migrated from https://github.com/dotnet/coreclr/commit/6f30002fc917cdacd4d0565f05ded920ab17f830 --- diff --git a/src/coreclr/netci.groovy b/src/coreclr/netci.groovy index 1399a3a..983ca2f 100755 --- a/src/coreclr/netci.groovy +++ b/src/coreclr/netci.groovy @@ -1515,7 +1515,7 @@ def static calculateBuildCommands(def newJob, def scenario, def branch, def isPR gcTestArguments = "${scenario} sequential" } - runtestArguments = "${lowerConfiguration} ${arch} ${gcstressStr} ${crossgenStr} ${runcrossgentestsStr} ${runjitstressStr} ${runjitstressregsStr} ${runjitmioptsStr} ${runjitforcerelocsStr} ${runjitdisasmStr} ${gcTestArguments}" + runtestArguments = "${lowerConfiguration} ${arch} ${gcstressStr} ${crossgenStr} ${runcrossgentestsStr} ${runjitstressStr} ${runjitstressregsStr} ${runjitmioptsStr} ${runjitforcerelocsStr} ${runjitdisasmStr} ${gcTestArguments} collectdumps" if (Constants.jitStressModeScenarios.containsKey(scenario)) { def stepScriptLocation = "%WORKSPACE%\\SetStressModes.bat" diff --git a/src/coreclr/tests/runtest.cmd b/src/coreclr/tests/runtest.cmd index 89e2571..d112455 100644 --- a/src/coreclr/tests/runtest.cmd +++ b/src/coreclr/tests/runtest.cmd @@ -44,6 +44,7 @@ set __LongGCTests= set __GCSimulatorTests= set __AgainstPackages= set __JitDisasm= +set __CollectDumps= :Arg_Loop if "%1" == "" goto ArgsDone @@ -84,6 +85,7 @@ if /i "%1" == "link" (set DoLink=true&set ILLINK=%2&shift&shift REM change it to COMPlus_GCStress when we stop using xunit harness if /i "%1" == "gcstresslevel" (set __GCSTRESSLEVEL=%2&set __TestTimeout=1800000&shift&shift&goto Arg_Loop) +if /i "%1" == "collectdumps" (set __CollectDumps=true&shift&goto Arg_Loop) if /i not "%1" == "msbuildargs" goto SkipMsbuildArgs :: All the rest of the args will be collected and passed directly to msbuild. @@ -207,12 +209,33 @@ if not exist %XunitTestBinBase% ( echo %__MsgPrefix%Run "buildtest.cmd %__BuildArch% %__BuildType%" to build the tests first. exit /b 1 ) + +if "%__CollectDumps%"=="true" ( + :: Install dumpling + set "__DumplingHelperPath=%__ProjectDir%\..\Tools\DumplingHelper.py" + python "!__DumplingHelperPath!" install_dumpling + + :: Create the crash dump folder if necessary + set "__CrashDumpFolder=%tmp%\CoreCLRTestCrashDumps" + if not exist "!__CrashDumpFolder!" ( + mkdir "!__CrashDumpFolder!" + ) + + :: Grab the current time before execution begins. This will be used to determine which crash dumps + :: will be uploaded. + for /f "delims=" %%a in ('python !__DumplingHelperPath! get_timestamp') do @set __StartTime=%%a +) + echo %__MsgPrefix%CORE_ROOT that will be used is: %CORE_ROOT% echo %__MsgPrefix%Starting the test run ... set __BuildLogRootName=TestRunResults call :msbuild "%__ProjectFilesDir%\runtest.proj" /p:Runtests=true /clp:showcommandline +if "%__CollectDumps%"=="true" ( + python "%__DumplingHelperPath%" collect_dump %errorlevel% "%__CrashDumpFolder%" %__StartTime% "CoreCLR_Tests" +) + if errorlevel 1 ( echo Test Run failed. Refer to the following: echo Html report: %__TestRunHtmlLog% diff --git a/src/coreclr/tests/src/Common/Coreclr.TestWrapper/Coreclr.TestWrapper.csproj b/src/coreclr/tests/src/Common/Coreclr.TestWrapper/Coreclr.TestWrapper.csproj index eb9a414..b25a6d6 100644 --- a/src/coreclr/tests/src/Common/Coreclr.TestWrapper/Coreclr.TestWrapper.csproj +++ b/src/coreclr/tests/src/Common/Coreclr.TestWrapper/Coreclr.TestWrapper.csproj @@ -17,6 +17,7 @@ 7a9bfb7d false BuildOnly + true diff --git a/src/coreclr/tests/src/Common/Coreclr.TestWrapper/CoreclrTestWrapperLib.cs b/src/coreclr/tests/src/Common/Coreclr.TestWrapper/CoreclrTestWrapperLib.cs index 5c04827..e431f3a 100644 --- a/src/coreclr/tests/src/Common/Coreclr.TestWrapper/CoreclrTestWrapperLib.cs +++ b/src/coreclr/tests/src/Common/Coreclr.TestWrapper/CoreclrTestWrapperLib.cs @@ -7,12 +7,89 @@ using System; using System.Collections.Generic; using System.Diagnostics; using System.IO; +using System.Runtime.InteropServices; using System.Text; using System.Threading; using System.Threading.Tasks; +using Microsoft.Win32.SafeHandles; namespace CoreclrTestLib { + static class DbgHelp + { + public enum MiniDumpType : int + { + MiniDumpNormal = 0x00000000, + MiniDumpWithDataSegs = 0x00000001, + MiniDumpWithFullMemory = 0x00000002, + MiniDumpWithHandleData = 0x00000004, + MiniDumpFilterMemory = 0x00000008, + MiniDumpScanMemory = 0x00000010, + MiniDumpWithUnloadedModules = 0x00000020, + MiniDumpWithIndirectlyReferencedMemory = 0x00000040, + MiniDumpFilterModulePaths = 0x00000080, + MiniDumpWithProcessThreadData = 0x00000100, + MiniDumpWithPrivateReadWriteMemory = 0x00000200, + MiniDumpWithoutOptionalData = 0x00000400, + MiniDumpWithFullMemoryInfo = 0x00000800, + MiniDumpWithThreadInfo = 0x00001000, + MiniDumpWithCodeSegs = 0x00002000, + MiniDumpWithoutAuxiliaryState = 0x00004000, + MiniDumpWithFullAuxiliaryState = 0x00008000, + MiniDumpWithPrivateWriteCopyMemory = 0x00010000, + MiniDumpIgnoreInaccessibleMemory = 0x00020000, + MiniDumpWithTokenInformation = 0x00040000, + MiniDumpWithModuleHeaders = 0x00080000, + MiniDumpFilterTriage = 0x00100000, + MiniDumpValidTypeFlags = 0x001fffff + } + + [DllImport("DbgHelp.dll", SetLastError = true)] + public static extern bool MiniDumpWriteDump(IntPtr handle, int processId, SafeFileHandle file, MiniDumpType dumpType, IntPtr exceptionParam, IntPtr userStreamParam, IntPtr callbackParam); + } + + static class Kernel32 + { + public const int MAX_PATH = 260; + public const int ERROR_NO_MORE_FILES = 0x12; + + public enum Toolhelp32Flags : uint + { + TH32CS_INHERIT = 0x80000000, + TH32CS_SNAPHEAPLIST = 0x00000001, + TH32CS_SNAPMODULE = 0x00000008, + TH32CS_SNAPMODULE32 = 0x00000010, + TH32CS_SNAPPROCESS = 0x00000002, + TH32CS_SNAPTHREAD = 0x00000004 + }; + + public unsafe struct ProcessEntry32 + { + public int Size; + public int Usage; + public int ProcessID; + public IntPtr DefaultHeapID; + public int ModuleID; + public int Threads; + public int ParentProcessID; + public int PriClassBase; + public int Flags; + public fixed char ExeFile[MAX_PATH]; + } + + [DllImport("kernel32.dll")] + public static extern bool CloseHandle(IntPtr handle); + + [DllImport("kernel32.dll", SetLastError = true)] + public static extern IntPtr CreateToolhelp32Snapshot(Toolhelp32Flags flags, int processId); + + [DllImport("kernel32.dll", SetLastError = true, CharSet = CharSet.Unicode)] + public static extern bool Process32First(IntPtr snapshot, ref ProcessEntry32 entry); + + [DllImport("kernel32.dll", SetLastError = true, CharSet = CharSet.Unicode)] + public static extern bool Process32Next(IntPtr snapshot, ref ProcessEntry32 entry); + } + public class CoreclrTestWrapperLib { public const int EXIT_SUCCESS_CODE = 0; @@ -22,6 +99,63 @@ namespace CoreclrTestLib public const int DEFAULT_TIMEOUT = 1000 * 60*10; public const string GC_STRESS_LEVEL = "__GCSTRESSLEVEL"; + public const string COLLECT_DUMPS_ENVIRONMENT_VAR = "__CollectDumps"; + public const string CRASH_DUMP_FOLDER_ENVIRONMENT_VAR = "__CrashDumpFolder"; + + static bool CollectCrashDump(Process process, string path) + { + using (var crashDump = File.OpenWrite(path)) + { + var flags = DbgHelp.MiniDumpType.MiniDumpWithFullMemory | DbgHelp.MiniDumpType.MiniDumpIgnoreInaccessibleMemory; + return DbgHelp.MiniDumpWriteDump(process.Handle, process.Id, crashDump.SafeFileHandle, flags, IntPtr.Zero, IntPtr.Zero, IntPtr.Zero); + } + } + + static unsafe bool TryFindChildProcessByName(Process process, string childName, out Process child) + { + IntPtr snapshot = Kernel32.CreateToolhelp32Snapshot(Kernel32.Toolhelp32Flags.TH32CS_SNAPPROCESS, 0); + if (snapshot == IntPtr.Zero) + { + child = null; + return false; + } + + try + { + int ppid = process.Id; + + var processEntry = new Kernel32.ProcessEntry32 { Size = sizeof(Kernel32.ProcessEntry32) }; + + bool success = Kernel32.Process32First(snapshot, ref processEntry); + while (success) + { + if (processEntry.ParentProcessID == ppid) + { + try + { + Process c = Process.GetProcessById(processEntry.ProcessID); + if (c.ProcessName.Equals(childName, StringComparison.OrdinalIgnoreCase)) + { + child = c; + return true; + } + c.Dispose(); + } + catch {} + } + + success = Kernel32.Process32Next(snapshot, ref processEntry); + } + + child = null; + return false; + } + finally + { + Kernel32.CloseHandle(snapshot); + } + } + public int RunTest(string executable, string outputFile, string errorFile) { Debug.Assert(outputFile != errorFile); @@ -39,6 +173,9 @@ namespace CoreclrTestLib string operatingSystem = System.Environment.GetEnvironmentVariable("OS"); bool runningInWindows = (operatingSystem != null && operatingSystem.StartsWith("Windows")); + // We can't yet take crash dumps on non-Windows OSs for timed-out tests + bool collectCrashDumps = runningInWindows && Environment.GetEnvironmentVariable(COLLECT_DUMPS_ENVIRONMENT_VAR) != null; + var outputStream = new FileStream(outputFile, FileMode.Create); var errorStream = new FileStream(errorFile, FileMode.Create); @@ -92,6 +229,23 @@ namespace CoreclrTestLib outputWriter.WriteLine("\ncmdLine:" + executable + " Timed Out"); errorWriter.WriteLine("\ncmdLine:" + executable + " Timed Out"); + + if (collectCrashDumps) + { + string crashDumpFolder = Environment.GetEnvironmentVariable(CRASH_DUMP_FOLDER_ENVIRONMENT_VAR); + if (crashDumpFolder != null) + { + Process childProcess; + if (TryFindChildProcessByName(process, "corerun", out childProcess)) + { + string crashDumpPath = Path.Combine(Path.GetFullPath(crashDumpFolder), string.Format("crashdump_{0}.dmp", childProcess.Id)); + if (CollectCrashDump(childProcess, crashDumpPath)) + { + Console.WriteLine("Collected crash dump {0}", crashDumpPath); + } + } + } + } } outputWriter.WriteLine("Test Harness Exitcode is : " + exitCode.ToString()); diff --git a/src/coreclr/tests/src/Common/Desktop.Coreclr.TestWrapper/Desktop.Coreclr.TestWrapper.csproj b/src/coreclr/tests/src/Common/Desktop.Coreclr.TestWrapper/Desktop.Coreclr.TestWrapper.csproj index b70f332..c3cd97f 100644 --- a/src/coreclr/tests/src/Common/Desktop.Coreclr.TestWrapper/Desktop.Coreclr.TestWrapper.csproj +++ b/src/coreclr/tests/src/Common/Desktop.Coreclr.TestWrapper/Desktop.Coreclr.TestWrapper.csproj @@ -14,6 +14,7 @@ true 512 BuildOnly + true true