[Merge-on-Red] - Implement Test Process Watcher (#78742)
authorIvan Diaz Sanchez <ivdiazsa@microsoft.com>
Fri, 10 Mar 2023 20:06:53 +0000 (12:06 -0800)
committerGitHub <noreply@github.com>
Fri, 10 Mar 2023 20:06:53 +0000 (12:06 -0800)
Initial implementation of the test watcher that looks out for hangs and freezes during test runs.

src/coreclr/CMakeLists.txt
src/native/watchdog/CMakeLists.txt [new file with mode: 0644]
src/native/watchdog/watchdog.cpp [new file with mode: 0644]
src/tests/Common/CLRTest.Execute.Bash.targets
src/tests/Common/CLRTest.Execute.Batch.targets
src/tests/Common/helixpublishwitharcade.proj

index e7cd64d24922a020a01d6aeefa0edbe0ff0ce6c5..67b773bf7878762dfa8257e13ade7b4fe6b39b06 100644 (file)
@@ -119,6 +119,11 @@ else()
     endif()
 endif()
 
+#----------------------------------------------------
+# Build the test watchdog alongside the CLR
+#----------------------------------------------------
+add_subdirectory("${CLR_SRC_NATIVE_DIR}/watchdog" test-watchdog)
+
 # Add this subdir. We install the headers for the jit.
 add_subdirectory(pal/prebuilt/inc)
 
@@ -275,3 +280,4 @@ endif(NOT CLR_CMAKE_HOST_MACCATALYST AND NOT CLR_CMAKE_HOST_IOS AND NOT CLR_CMAK
 if(CLR_CROSS_COMPONENTS_BUILD)
   include(crosscomponents.cmake)
 endif(CLR_CROSS_COMPONENTS_BUILD)
+
diff --git a/src/native/watchdog/CMakeLists.txt b/src/native/watchdog/CMakeLists.txt
new file mode 100644 (file)
index 0000000..723e105
--- /dev/null
@@ -0,0 +1,4 @@
+add_executable_clr(watchdog ${CMAKE_CURRENT_LIST_DIR}/watchdog.cpp)
+install_clr(TARGETS watchdog DESTINATIONS . COMPONENT hosts)
+install_clr(TARGETS watchdog DESTINATIONS . COMPONENT nativeaot)
+
diff --git a/src/native/watchdog/watchdog.cpp b/src/native/watchdog/watchdog.cpp
new file mode 100644 (file)
index 0000000..1dc6f74
--- /dev/null
@@ -0,0 +1,136 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+#include <cstdio>
+#include <cstdlib>
+#include <errno.h>
+#include <signal.h>
+
+#ifdef TARGET_WINDOWS
+
+#include <windows.h>
+#include <string>
+
+#else // !TARGET_WINDOWS
+
+#include <chrono>
+#include <sys/wait.h>
+#include <thread>
+#include <unistd.h>
+#include <vector>
+
+#endif // TARGET_WINDOWS
+
+int run_timed_process(const long, const int, const char *[]);
+
+#ifdef TARGET_X86
+int __cdecl main(const int argc, const char *argv[])
+#else
+int main(const int argc, const char *argv[])
+#endif
+{
+    if (argc < 3)
+    {
+        printf("There are missing arguments. Got %d instead of 3+ :(\n", argc);
+        return EXIT_FAILURE;
+    }
+
+    const long timeout_sec = strtol(argv[1], nullptr, 10);
+    int exit_code = run_timed_process(timeout_sec * 1000L, argc-2, &argv[2]);
+
+    printf("App Exit Code: %d\n", exit_code);
+    return exit_code;
+}
+
+int run_timed_process(const long timeout_ms, const int proc_argc, const char *proc_argv[])
+{
+#ifdef TARGET_WINDOWS
+    std::string cmdline(proc_argv[0]);
+
+    for (int i = 1; i < proc_argc; i++)
+    {
+        cmdline.append(" ");
+        cmdline.append(proc_argv[i]);
+    }
+
+    STARTUPINFOA startup_info;
+    PROCESS_INFORMATION proc_info;
+    unsigned long exit_code;
+
+    ZeroMemory(&startup_info, sizeof(startup_info));
+    startup_info.cb = sizeof(startup_info);
+    ZeroMemory(&proc_info, sizeof(proc_info));
+
+    if (!CreateProcessA(NULL, &cmdline[0], NULL, NULL, FALSE, 0, NULL, NULL,
+                       &startup_info, &proc_info))
+    {
+        int error_code = GetLastError();
+        printf("Process creation failed... Code %d.\n", error_code);
+        return error_code;
+    }
+
+    WaitForSingleObject(proc_info.hProcess, timeout_ms);
+    GetExitCodeProcess(proc_info.hProcess, &exit_code);
+
+    CloseHandle(proc_info.hProcess);
+    CloseHandle(proc_info.hThread);
+    return exit_code;
+
+#else // !TARGET_WINDOWS
+
+    const int check_interval_ms = 25;
+    int check_count = 0;
+    std::vector<const char*> args;
+
+    pid_t child_pid;
+    int child_status;
+    int wait_code;
+
+    for (int i = 0; i < proc_argc; i++)
+    {
+        args.push_back(proc_argv[i]);
+    }
+    args.push_back(NULL);
+
+    child_pid = fork();
+
+    if (child_pid < 0)
+    {
+        // Fork failed. No memory remaining available :(
+        printf("Fork failed... Returning ENOMEM.\n");
+        return ENOMEM;
+    }
+    else if (child_pid == 0)
+    {
+        // Instructions for child process!
+        execv(args[0], const_cast<char* const*>(args.data()));
+    }
+    else
+    {
+        do
+        {
+            // Instructions for the parent process!
+            wait_code = waitpid(child_pid, &child_status, WNOHANG);
+
+            if (wait_code == -1)
+                return EINVAL;
+
+            std::this_thread::sleep_for(std::chrono::milliseconds(check_interval_ms));
+
+            if (wait_code)
+            {
+                if (WIFEXITED(child_status))
+                    return WEXITSTATUS(child_status);
+            }
+            check_count++;
+
+        } while (check_count < (timeout_ms / check_interval_ms));
+    }
+
+    printf("Child process took too long. Timed out... Exiting...\n");
+    kill(child_pid, SIGKILL);
+
+#endif // TARGET_WINDOWS
+    return ETIMEDOUT;
+}
+
index 27557752e3137f986964a3a24894850d75a115d0..5cc68ec20eff077c709e3b2a37f747612c9155ad 100644 (file)
@@ -187,12 +187,18 @@ fi
         <Command><![CDATA[        export __DotEnv="${i#*=}"
         if [ ! -f "$__DotEnv" ]
         then
-            echo "The Debugger FullPath %5C%22${__DotEnv}%5C%22 does not exist"
+            echo "The dotenv file FullPath %5C%22${__DotEnv}%5C%22 does not exist"
             usage
         fi
         export __DotEnvArg=-e ${__DotEnv}]]></Command>
         <Description>A dotenv file to pass to corerun to set environment variables for the test run.</Description>
       </BashCLRTestExecutionScriptArgument>
+
+      <BashCLRTestExecutionScriptArgument Include="usewatcher">
+        <HasParam>false</HasParam>
+        <Command><![CDATA[        _RunWithWatcher=1]]></Command>
+        <Description>Run the tests using the test watcher.</Description>
+      </BashCLRTestExecutionScriptArgument>
     </ItemGroup>
 
       <PropertyGroup>
@@ -250,10 +256,11 @@ then
     exit 1
   fi
 
-  # Copy CORECLR native binaries to $LinkBin,
+  # Copy CORECLR native binaries and the test watcher to $LinkBin,
   # so that we can run the test based on that directory
   cp $CORE_ROOT/*.so $LinkBin/
   cp $CORE_ROOT/corerun $LinkBin/
+  cp $CORE_ROOT/watchdog $LinkBin/
 
   # Copy some files that may be arguments
   for f in *.txt;
@@ -283,6 +290,7 @@ fi
       </PropertyGroup>
       <PropertyGroup>
       <CLRTestRunFile Condition="'$(CLRTestIsHosted)'=='true'">"$CORE_ROOT/corerun" $(CoreRunArgs)  ${__DotEnvArg}</CLRTestRunFile>
+      <WatcherRunFile>"$CORE_ROOT/watchdog" 300</WatcherRunFile>
 
       <!-- Note that this overwrites CLRTestBashPreCommands rather than adding to it. -->
       <CLRTestBashPreCommands Condition="'$(CLRTestKind)' == 'BuildAndRun' and '$(TargetArchitecture)' == 'wasm'"><![CDATA[
@@ -318,6 +326,9 @@ fi
 if [ ! -z "$CLRCustomTestLauncher" ];
 then
     LAUNCHER="$CLRCustomTestLauncher $PWD/"
+elif [ "$_RunWithWatcher" == 1 ];
+then
+    LAUNCHER="$(WatcherRunFile) $(CLRTestRunFile)"
 else
     LAUNCHER="$_DebuggerFullPath $_DebuggerArgsSeparator $(CLRTestRunFile)"
 fi
@@ -346,8 +357,11 @@ $(BashLinkerTestLaunchCmds)
 if [ ! -z "$CLRCustomTestLauncher" ];
 then
     LAUNCHER="$CLRCustomTestLauncher $PWD/"
+elif [ "$_RunWithWatcher" == 1 ];
+then
+    LAUNCHER="$(WatcherRunFile) $(CLRTestRunFile)"
 else
-    LAUNCHER="$_DebuggerFullPath $(CLRTestRunFile)"
+    LAUNCHER="$_DebuggerFullPath $_DebuggerArgsSeparator $(CLRTestRunFile)"
 fi
 
 $(BashIlrtTestLaunchCmds)
@@ -484,7 +498,7 @@ usage()
 for i in "$@"
     do
         case $i in
-        -?|-h|--help)
+          -?|-h|--help|/?|/h|/help)
         usage
         %3B%3B
 @(BashCLRTestExecutionScriptArgument -> '        -%(Identity)%(ParamText)|/%(Identity)%(ParamText))
@@ -534,6 +548,7 @@ ReleaseLock()
 }
 cd "$%28dirname "${BASH_SOURCE[0]}")"
 LockFile="lock"
+_RunWithWatcher=0
 
 
 # The __TestEnv variable may be used to specify a script to source before the test.
index db74a66be45b5e9b9746f452ecb988822c9eb0b4..2e0d05e8c635b92b28faaa51cefc787139eab51c 100644 (file)
@@ -216,6 +216,14 @@ Exit /b 0
         ]]></Command>
         <Description>Set CORE_ROOT to the specified value before running the test.</Description>
       </BatchCLRTestExecutionScriptArgument>
+
+      <BatchCLRTestExecutionScriptArgument Include="usewatcher">
+        <HasParam>false</HasParam>
+        <Command><![CDATA[
+    set /A _RunWithWatcher=1
+        ]]></Command>
+        <Description>Run the tests using the test watcher.</Description>
+      </BatchCLRTestExecutionScriptArgument>
     </ItemGroup>
 
       <PropertyGroup>
@@ -260,17 +268,18 @@ IF defined DoLink (
       Exit /b 1
     )
 
-    REM Copy CORECLR native binaries to %LinkBin%, so that we can run the test based on that directory
+    REM Copy CORECLR native binaries and the test watcher to %LinkBin%, so that we can run the test based on that directory
     copy %CORE_ROOT%\clrjit.dll %LinkBin% > nul 2> nul
     copy %CORE_ROOT%\coreclr.dll %LinkBin% > nul 2> nul
     copy %CORE_ROOT%\mscorrc.dll %LinkBin% > nul 2> nul
     copy %CORE_ROOT%\CoreRun.exe %LinkBin% > nul 2> nul
+    copy %CORE_ROOT%\watchdog.exe %LinkBin% > nul 2> nul
 
     REM Copy some files that may be arguments
     copy *.txt %LinkBin% > nul 2> nul
 
     set ExePath=%LinkBin%\$(InputAssemblyName)
-    set CORE_ROOT=%scriptPath%LinkBin%
+    set CORE_ROOT=%scriptPath%\%LinkBin%
 )
 ]]>
           </BatchLinkerTestLaunchCmds>
@@ -289,6 +298,8 @@ if defined DoLink (
       </PropertyGroup>
       <PropertyGroup>
       <CLRTestRunFile Condition="'$(CLRTestIsHosted)'=='true'">"%CORE_ROOT%\corerun.exe" $(CoreRunArgs) %__DotEnvArg%</CLRTestRunFile>
+      <WatcherRunFile>"%CORE_ROOT%\watchdog.exe" 300</WatcherRunFile>
+
       <BatchCopyCoreShimLocalCmds Condition="'$(CLRTestScriptLocalCoreShim)' == 'true'"><![CDATA[
 REM Local CoreShim requested - see MSBuild property 'CLRTestScriptLocalCoreShim'
 ECHO Copying '%CORE_ROOT%\CoreShim.dll'...
@@ -301,6 +312,8 @@ $(BatchCopyCoreShimLocalCmds)
 
 IF NOT "%CLRCustomTestLauncher%"=="" (
   set LAUNCHER=call %CLRCustomTestLauncher% %scriptPath%
+) ELSE IF %_RunWithWatcher% EQU 1 (
+  set LAUNCHER=$(WatcherRunFile) $(CLRTestRunFile)
 ) ELSE (
   set LAUNCHER=%_DebuggerFullPath% $(CLRTestRunFile)
 )
@@ -425,6 +438,7 @@ setlocal ENABLEDELAYEDEXPANSION
 set "lockFolder=%~dp0\lock"
 pushd %~dp0
 set "scriptPath=%~dp0"
+set /A _RunWithWatcher=0
 
 $(BatchCLRTestArgPrep)
 $(BatchCLRTestExitCodePrep)
index d6cdd0d5e09229da9a24c2a3b7630fb64ae38809..247bd3c0179f7f49d38e56db13a39781d47cfa58 100644 (file)
       <HelixCommandLines Condition="'$(TestWrapperTargetsWindows)' != 'true'" Include="export TEST_HARNESS_STRIPE_TO_EXECUTE=.0.1" />
       <HelixCommandLines Condition="'$(TestWrapperTargetsWindows)' != 'true'" Include="chmod +x $(_MergedWrapperRunScriptRelative)" />
       <!-- Force assemblies to lazy-load for LLVM AOT test runs to enable using tests that fail at AOT time (and as a result can't be AOTd) -->
-      <HelixCommandLines Condition="'$(RuntimeVariant)' == 'llvmfullaot'" Include="$(_MergedWrapperRunScriptPrefix)$(_MergedWrapperRunScriptRelative) --aot-lazy-assembly-load" />
-      <HelixCommandLines Condition="'$(RuntimeVariant)' != 'llvmfullaot'" Include="$(_MergedWrapperRunScriptPrefix)$(_MergedWrapperRunScriptRelative)" />
+      <HelixCommandLines Condition="'$(RuntimeVariant)' == 'llvmfullaot'" Include="$(_MergedWrapperRunScriptPrefix)$(_MergedWrapperRunScriptRelative) -usewatcher --aot-lazy-assembly-load" />
+      <HelixCommandLines Condition="'$(RuntimeVariant)' != 'llvmfullaot'" Include="$(_MergedWrapperRunScriptPrefix)$(_MergedWrapperRunScriptRelative) -usewatcher" />
       <HelixCommandLines Include="$(XUnitLogCheckerCommand)" />
     </ItemGroup>
 
 
   <ItemGroup Condition=" '$(UsesHelixSdk)' == 'true' ">
     <HelixCorrelationPayload Include="$(CoreRootDirectory)" />
-    <HelixCorrelationPayload Include="$(XUnitLogCheckerDirectory)" />
+
+    <!-- Browser-Wasm follows a very different workflow, which is currently out of scope of the Log Checker. -->
+    <HelixCorrelationPayload Include="$(XUnitLogCheckerDirectory)" Condition="'$(TargetsBrowser)' != 'true'" />
 
     <LegacyPayloads Include="$([System.IO.Directory]::GetDirectories($(LegacyPayloadsRootDirectory)))" Condition="Exists('$(LegacyPayloadsRootDirectory)')" />
     <LegacyPayloads Update="@(LegacyPayloads)">