Initial implementation of the test watcher that looks out for hangs and freezes during test runs.
+# Build the test watchdog alongside the CLR
+add_subdirectory("${CLR_SRC_NATIVE_DIR}/watchdog" test-watchdog)
# Add this subdir. We install the headers for the jit.
--- /dev/null
+add_executable_clr(watchdog ${CMAKE_CURRENT_LIST_DIR}/watchdog.cpp)
+install_clr(TARGETS watchdog DESTINATIONS . COMPONENT hosts)
+install_clr(TARGETS watchdog DESTINATIONS . COMPONENT nativeaot)
--- /dev/null
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+#include <cstdio>
+#include <cstdlib>
+#include <errno.h>
+#include <signal.h>
+#include <windows.h>
+#include <string>
+#include <chrono>
+#include <sys/wait.h>
+#include <thread>
+#include <unistd.h>
+#include <vector>
+int run_timed_process(const long, const int, const char *[]);
+#ifdef TARGET_X86
+int __cdecl main(const int argc, const char *argv[])
+int main(const int argc, const char *argv[])
+ if (argc < 3)
+ {
+ printf("There are missing arguments. Got %d instead of 3+ :(\n", argc);
+ return EXIT_FAILURE;
+ }
+ const long timeout_sec = strtol(argv[1], nullptr, 10);
+ int exit_code = run_timed_process(timeout_sec * 1000L, argc-2, &argv[2]);
+ printf("App Exit Code: %d\n", exit_code);
+ return exit_code;
+int run_timed_process(const long timeout_ms, const int proc_argc, const char *proc_argv[])
+ std::string cmdline(proc_argv[0]);
+ for (int i = 1; i < proc_argc; i++)
+ {
+ cmdline.append(" ");
+ cmdline.append(proc_argv[i]);
+ }
+ STARTUPINFOA startup_info;
+ unsigned long exit_code;
+ ZeroMemory(&startup_info, sizeof(startup_info));
+ startup_info.cb = sizeof(startup_info);
+ ZeroMemory(&proc_info, sizeof(proc_info));
+ if (!CreateProcessA(NULL, &cmdline[0], NULL, NULL, FALSE, 0, NULL, NULL,
+ &startup_info, &proc_info))
+ {
+ int error_code = GetLastError();
+ printf("Process creation failed... Code %d.\n", error_code);
+ return error_code;
+ }
+ WaitForSingleObject(proc_info.hProcess, timeout_ms);
+ GetExitCodeProcess(proc_info.hProcess, &exit_code);
+ CloseHandle(proc_info.hProcess);
+ CloseHandle(proc_info.hThread);
+ return exit_code;
+ const int check_interval_ms = 25;
+ int check_count = 0;
+ std::vector<const char*> args;
+ pid_t child_pid;
+ int child_status;
+ int wait_code;
+ for (int i = 0; i < proc_argc; i++)
+ {
+ args.push_back(proc_argv[i]);
+ }
+ args.push_back(NULL);
+ child_pid = fork();
+ if (child_pid < 0)
+ {
+ // Fork failed. No memory remaining available :(
+ printf("Fork failed... Returning ENOMEM.\n");
+ return ENOMEM;
+ }
+ else if (child_pid == 0)
+ {
+ // Instructions for child process!
+ execv(args[0], const_cast<char* const*>(;
+ }
+ else
+ {
+ do
+ {
+ // Instructions for the parent process!
+ wait_code = waitpid(child_pid, &child_status, WNOHANG);
+ if (wait_code == -1)
+ return EINVAL;
+ std::this_thread::sleep_for(std::chrono::milliseconds(check_interval_ms));
+ if (wait_code)
+ {
+ if (WIFEXITED(child_status))
+ return WEXITSTATUS(child_status);
+ }
+ check_count++;
+ } while (check_count < (timeout_ms / check_interval_ms));
+ }
+ printf("Child process took too long. Timed out... Exiting...\n");
+ kill(child_pid, SIGKILL);
+ return ETIMEDOUT;
<Command><![CDATA[ export __DotEnv="${i#*=}"
if [ ! -f "$__DotEnv" ]
- echo "The Debugger FullPath %5C%22${__DotEnv}%5C%22 does not exist"
+ echo "The dotenv file FullPath %5C%22${__DotEnv}%5C%22 does not exist"
export __DotEnvArg=-e ${__DotEnv}]]></Command>
<Description>A dotenv file to pass to corerun to set environment variables for the test run.</Description>
+ <BashCLRTestExecutionScriptArgument Include="usewatcher">
+ <HasParam>false</HasParam>
+ <Command><![CDATA[ _RunWithWatcher=1]]></Command>
+ <Description>Run the tests using the test watcher.</Description>
+ </BashCLRTestExecutionScriptArgument>
exit 1
- # Copy CORECLR native binaries to $LinkBin,
+ # Copy CORECLR native binaries and the test watcher to $LinkBin,
# so that we can run the test based on that directory
cp $CORE_ROOT/*.so $LinkBin/
cp $CORE_ROOT/corerun $LinkBin/
+ cp $CORE_ROOT/watchdog $LinkBin/
# Copy some files that may be arguments
for f in *.txt;
<CLRTestRunFile Condition="'$(CLRTestIsHosted)'=='true'">"$CORE_ROOT/corerun" $(CoreRunArgs) ${__DotEnvArg}</CLRTestRunFile>
+ <WatcherRunFile>"$CORE_ROOT/watchdog" 300</WatcherRunFile>
<!-- Note that this overwrites CLRTestBashPreCommands rather than adding to it. -->
<CLRTestBashPreCommands Condition="'$(CLRTestKind)' == 'BuildAndRun' and '$(TargetArchitecture)' == 'wasm'"><![CDATA[
if [ ! -z "$CLRCustomTestLauncher" ];
LAUNCHER="$CLRCustomTestLauncher $PWD/"
+elif [ "$_RunWithWatcher" == 1 ];
+ LAUNCHER="$(WatcherRunFile) $(CLRTestRunFile)"
LAUNCHER="$_DebuggerFullPath $_DebuggerArgsSeparator $(CLRTestRunFile)"
if [ ! -z "$CLRCustomTestLauncher" ];
LAUNCHER="$CLRCustomTestLauncher $PWD/"
+elif [ "$_RunWithWatcher" == 1 ];
+ LAUNCHER="$(WatcherRunFile) $(CLRTestRunFile)"
- LAUNCHER="$_DebuggerFullPath $(CLRTestRunFile)"
+ LAUNCHER="$_DebuggerFullPath $_DebuggerArgsSeparator $(CLRTestRunFile)"
for i in "$@"
case $i in
- -?|-h|--help)
+ -?|-h|--help|/?|/h|/help)
@(BashCLRTestExecutionScriptArgument -> ' -%(Identity)%(ParamText)|/%(Identity)%(ParamText))
cd "$%28dirname "${BASH_SOURCE[0]}")"
# The __TestEnv variable may be used to specify a script to source before the test.
<Description>Set CORE_ROOT to the specified value before running the test.</Description>
+ <BatchCLRTestExecutionScriptArgument Include="usewatcher">
+ <HasParam>false</HasParam>
+ <Command><![CDATA[
+ set /A _RunWithWatcher=1
+ ]]></Command>
+ <Description>Run the tests using the test watcher.</Description>
+ </BatchCLRTestExecutionScriptArgument>
Exit /b 1
- REM Copy CORECLR native binaries to %LinkBin%, so that we can run the test based on that directory
+ REM Copy CORECLR native binaries and the test watcher to %LinkBin%, so that we can run the test based on that directory
copy %CORE_ROOT%\clrjit.dll %LinkBin% > nul 2> nul
copy %CORE_ROOT%\coreclr.dll %LinkBin% > nul 2> nul
copy %CORE_ROOT%\mscorrc.dll %LinkBin% > nul 2> nul
copy %CORE_ROOT%\CoreRun.exe %LinkBin% > nul 2> nul
+ copy %CORE_ROOT%\watchdog.exe %LinkBin% > nul 2> nul
REM Copy some files that may be arguments
copy *.txt %LinkBin% > nul 2> nul
set ExePath=%LinkBin%\$(InputAssemblyName)
- set CORE_ROOT=%scriptPath%LinkBin%
+ set CORE_ROOT=%scriptPath%\%LinkBin%
<CLRTestRunFile Condition="'$(CLRTestIsHosted)'=='true'">"%CORE_ROOT%\corerun.exe" $(CoreRunArgs) %__DotEnvArg%</CLRTestRunFile>
+ <WatcherRunFile>"%CORE_ROOT%\watchdog.exe" 300</WatcherRunFile>
<BatchCopyCoreShimLocalCmds Condition="'$(CLRTestScriptLocalCoreShim)' == 'true'"><![CDATA[
REM Local CoreShim requested - see MSBuild property 'CLRTestScriptLocalCoreShim'
ECHO Copying '%CORE_ROOT%\CoreShim.dll'...
IF NOT "%CLRCustomTestLauncher%"=="" (
set LAUNCHER=call %CLRCustomTestLauncher% %scriptPath%
+) ELSE IF %_RunWithWatcher% EQU 1 (
+ set LAUNCHER=$(WatcherRunFile) $(CLRTestRunFile)
) ELSE (
set LAUNCHER=%_DebuggerFullPath% $(CLRTestRunFile)
set "lockFolder=%~dp0\lock"
pushd %~dp0
set "scriptPath=%~dp0"
+set /A _RunWithWatcher=0
<HelixCommandLines Condition="'$(TestWrapperTargetsWindows)' != 'true'" Include="export TEST_HARNESS_STRIPE_TO_EXECUTE=.0.1" />
<HelixCommandLines Condition="'$(TestWrapperTargetsWindows)' != 'true'" Include="chmod +x $(_MergedWrapperRunScriptRelative)" />
<!-- Force assemblies to lazy-load for LLVM AOT test runs to enable using tests that fail at AOT time (and as a result can't be AOTd) -->
- <HelixCommandLines Condition="'$(RuntimeVariant)' == 'llvmfullaot'" Include="$(_MergedWrapperRunScriptPrefix)$(_MergedWrapperRunScriptRelative) --aot-lazy-assembly-load" />
- <HelixCommandLines Condition="'$(RuntimeVariant)' != 'llvmfullaot'" Include="$(_MergedWrapperRunScriptPrefix)$(_MergedWrapperRunScriptRelative)" />
+ <HelixCommandLines Condition="'$(RuntimeVariant)' == 'llvmfullaot'" Include="$(_MergedWrapperRunScriptPrefix)$(_MergedWrapperRunScriptRelative) -usewatcher --aot-lazy-assembly-load" />
+ <HelixCommandLines Condition="'$(RuntimeVariant)' != 'llvmfullaot'" Include="$(_MergedWrapperRunScriptPrefix)$(_MergedWrapperRunScriptRelative) -usewatcher" />
<HelixCommandLines Include="$(XUnitLogCheckerCommand)" />
<ItemGroup Condition=" '$(UsesHelixSdk)' == 'true' ">
<HelixCorrelationPayload Include="$(CoreRootDirectory)" />
- <HelixCorrelationPayload Include="$(XUnitLogCheckerDirectory)" />
+ <!-- Browser-Wasm follows a very different workflow, which is currently out of scope of the Log Checker. -->
+ <HelixCorrelationPayload Include="$(XUnitLogCheckerDirectory)" Condition="'$(TargetsBrowser)' != 'true'" />
<LegacyPayloads Include="$([System.IO.Directory]::GetDirectories($(LegacyPayloadsRootDirectory)))" Condition="Exists('$(LegacyPayloadsRootDirectory)')" />
<LegacyPayloads Update="@(LegacyPayloads)">