From 63cde8f0f419b45426bc9ba9aa06962ced03daf8 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Mon, 26 Nov 2018 13:27:46 -0800 Subject: [PATCH] Updating runtest.py to support printing coredump information (#21149) * Updating runtest.py to support printing coredump information * Adding some comments to the coredump collection methods in runtest.py --- tests/runtest.py | 223 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- tests/runtest.sh | 3 + 2 files changed, 224 insertions(+), 2 deletions(-) diff --git a/tests/runtest.py b/tests/runtest.py index 360afc3..6081bc7 100755 --- a/tests/runtest.py +++ b/tests/runtest.py @@ -41,6 +41,7 @@ import argparse import datetime +import fnmatch import json import math import os @@ -126,6 +127,7 @@ parser.add_argument("--generate_layout", dest="generate_layout", action="store_t parser.add_argument("--generate_layout_only", dest="generate_layout_only", action="store_true", default=False) parser.add_argument("--analyze_results_only", dest="analyze_results_only", action="store_true", default=False) parser.add_argument("--verbose", dest="verbose", action="store_true", default=False) +parser.add_argument("--limited_core_dumps", dest="limited_core_dumps", action="store_true", default=False) # Only used on Unix parser.add_argument("-test_native_bin_location", dest="test_native_bin_location", nargs='?', default=None) @@ -137,6 +139,7 @@ parser.add_argument("-test_native_bin_location", dest="test_native_bin_location" g_verbose = False gc_stress_c = False gc_stress = False +coredump_pattern = "" file_name_cache = defaultdict(lambda: None) ################################################################################ @@ -548,11 +551,13 @@ def get_environment(test_env=None): def call_msbuild(coreclr_repo_location, dotnetcli_location, + test_location, host_os, arch, build_type, is_illink=False, - sequential=False): + sequential=False, + limited_core_dumps=False): """ Call msbuild to run the tests built. Args: @@ -591,6 +596,9 @@ def call_msbuild(coreclr_repo_location, if is_illink: command += ["/p:RunTestsViaIllink=true"] + if limited_core_dumps: + command += ["/p:LimitedCoreDumps=true"] + log_path = os.path.join(logs_dir, "TestRunResults_%s_%s_%s" % (host_os, arch, build_type)) build_log = log_path + ".log" wrn_log = log_path + ".wrn" @@ -622,6 +630,9 @@ def call_msbuild(coreclr_repo_location, proc.kill() sys.exit(1) + if limited_core_dumps: + inspect_and_delete_coredump_files(host_os, arch, test_location) + return proc.returncode def running_in_ci(): @@ -705,6 +716,207 @@ def correct_line_endings(host_os, test_location, root=True): with open(test_location, 'w') as file_handle: file_handle.write(content) +def setup_coredump_generation(host_os): + """ Configures the environment so that the current process and any child + processes can generate coredumps. + + Args: + host_os (String) : os + + Notes: + This is only support for OSX and Linux, it does nothing on Windows. + This will print a message if setting the rlimit fails but will otherwise + continue execution, as some systems will already be configured correctly + and it is not necessarily a failure to not collect coredumps. + """ + global coredump_pattern + + if host_os == "OSX": + coredump_pattern = subprocess.check_output("sysctl -n kern.corefile", shell=True).rstrip() + elif host_os == "Linux": + with open("/proc/sys/kernel/core_pattern", "r") as f: + coredump_pattern = f.read().rstrip() + else: + print("CoreDump generation not enabled due to unsupported OS: %s" % host_os) + return + + # resource is only available on Unix platforms + import resource + + if coredump_pattern != "core" and coredump_pattern != "core.%P": + print("CoreDump generation not enabled due to unsupported coredump pattern: %s" % coredump_pattern) + return + else: + print("CoreDump pattern: %s" % coredump_pattern) + + # We specify 'shell=True' as the command may otherwise fail (some systems will + # complain that the executable cannot be found in the current directory). + rlimit_core = subprocess.check_output("ulimit -c", shell=True).rstrip() + + if rlimit_core != "unlimited": + try: + # This can fail on certain platforms. ARM64 in particular gives: "ValueError: not allowed to raise maximum limit" + resource.setrlimit(resource.RLIMIT_CORE, (resource.RLIM_INFINITY, resource.RLIM_INFINITY)) + except: + print("Failed to enable CoreDump generation. rlimit_core: %s" % rlimit_core) + return + + rlimit_core = subprocess.check_output("ulimit -c", shell=True).rstrip() + + if rlimit_core != "unlimited": + print("Failed to enable CoreDump generation. rlimit_core: %s" % rlimit_core) + return + + print("CoreDump generation enabled") + + if host_os == "Linux" and os.path.isfile("/proc/self/coredump_filter"): + # Include memory in private and shared file-backed mappings in the dump. + # This ensures that we can see disassembly from our shared libraries when + # inspecting the contents of the dump. See 'man core' for details. + with open("/proc/self/coredump_filter", "w") as f: + f.write("0x3F") + +def print_info_from_coredump_file(host_os, arch, coredump_name, executable_name): + """ Prints information from the specified coredump to the console + + Args: + host_os (String) : os + arch (String) : architecture + coredump_name (String) : name of the coredump to print + executable_name (String) : name of the executable that generated the coredump + + Notes: + This is only support for OSX and Linux, it does nothing on Windows. + This defaults to lldb on OSX and gdb on Linux. + For both lldb and db, it backtraces all threads. For gdb, it also prints local + information for every frame. This option is not available as a built-in for lldb. + """ + if not os.path.isfile(executable_name): + print("Not printing coredump due to missing executable: %s" % executable_name) + return + + if not os.path.isfile(coredump_name): + print("Not printing coredump due to missing coredump: %s" % coredump_name) + return + + command = "" + + if host_os == "OSX": + command = "lldb -c %s -b -o 'bt all' -o 'disassemble -b -p'" % coredump_name + elif host_os == "Linux": + command = "gdb --batch -ex \"thread apply all bt full\" -ex \"disassemble /r $pc\" -ex \"quit\" %s %s" % (executable_name, coredump_name) + else: + print("Not printing coredump due to unsupported OS: %s" % host_os) + return + + print("Printing info from coredump: %s" % coredump_name) + + proc_failed = False + + try: + sys.stdout.flush() # flush output before creating sub-process + + # We specify 'shell=True' as the command may otherwise fail (some systems will + # complain that the executable cannot be found in the current directory). + proc = subprocess.Popen(command, shell=True) + proc.communicate() + + if proc.returncode != 0: + proc_failed = True + except: + proc_failed = True + + if proc_failed: + print("Failed to print coredump: %s" % coredump_name) + +def preserve_coredump_file(coredump_name, root_storage_location="/tmp/coredumps_coreclr"): + """ Copies the specified coredump to a new randomly named temporary directory under + root_storage_location to ensure it is accessible after the workspace is cleaned. + + Args: + coredump_name (String) : name of the coredump to print + root_storage_location (String) : the directory under which to copy coredump_name + + Notes: + root_storage_location defaults to a folder under /tmp to ensure that it is cleaned + up on next reboot (or after the OS configured time elapses for the folder). + """ + if not os.path.exists(root_storage_location): + os.mkdir(root_storage_location) + + # This creates a temporary directory under `root_storage_location` to ensure it doesn'tag + # conflict with any coredumps from past runs. + storage_location = tempfile.mkdtemp('', '', root_storage_location) + + # Only preserve the dump if the directory is empty. Otherwise, do nothing. + # This is a way to prevent us from storing/uploading too many dumps. + if os.path.isfile(coredump_name) and not os.listdir(storage_location): + print("Copying coredump file %s to %s" % (coredump_name, storage_location)) + shutil.copy2(coredump_name, storage_location) + # TODO: Support uploading to dumpling + +def inspect_and_delete_coredump_file(host_os, arch, coredump_name): + """ Prints information from the specified coredump and creates a backup of it + + Args: + host_os (String) : os + arch (String) : architecture + coredump_name (String) : name of the coredump to print + """ + print_info_from_coredump_file(host_os, arch, coredump_name, "%s/corerun" % os.environ["CORE_ROOT"]) + preserve_coredump_file(coredump_name) + os.remove(coredump_name) + +def inspect_and_delete_coredump_files(host_os, arch, test_location): + """ Finds all coredumps under test_location, prints some basic information about them + to the console, and creates a backup of the dumps for further investigation + + Args: + host_os (String) : os + arch (String) : architecture + test_location (String) : the folder under which to search for coredumps + """ + # This function prints some basic information from core files in the current + # directory and deletes them immediately. Based on the state of the system, it may + # also upload a core file to the dumpling service. + # (see preserve_core_file). + + # Depending on distro/configuration, the core files may either be named "core" + # or "core." by default. We will read /proc/sys/kernel/core_uses_pid to + # determine which one it is. + # On OS X/macOS, we checked the kern.corefile value before enabling core dump + # generation, so we know it always includes the PID. + coredump_name_uses_pid=False + + print("Looking for coredumps...") + + if "%P" in coredump_pattern: + coredump_name_uses_pid=True + elif host_os == "Linux" and os.path.isfile("/proc/sys/kernel/core_uses_pid"): + with open("/proc/sys/kernel/core_uses_pid", "r") as f: + if f.read().rstrip() == "1": + coredump_name_uses_pid=True + + filter_pattern = "" + regex_pattern = "" + matched_file_count = 0 + + if coredump_name_uses_pid: + filter_pattern = "core.*" + regex_pattern = "core.[0-9]+" + else: + filter_pattern = "core" + regex_pattern = "core" + + for dir_path, dir_names, file_names in os.walk(test_location): + for file_name in fnmatch.filter(file_names, filter_pattern): + if re.match(regex_pattern, file_name): + print("Found coredump: %s in %s" % (file_name, dir_path)) + matched_file_count += 1 + inspect_and_delete_coredump_file(host_os, arch, os.path.join(dir_path, file_name)) + + print("Found %s coredumps." % matched_file_count) + def run_tests(host_os, arch, build_type, @@ -719,7 +931,8 @@ def run_tests(host_os, is_ilasm=False, is_illink=False, run_crossgen_tests=False, - run_sequential=False): + run_sequential=False, + limited_core_dumps=False): """ Run the coreclr tests Args: @@ -766,6 +979,9 @@ def run_tests(host_os, print("Running GCStress, extending timeout to 120 minutes.") os.environ["__TestTimeout"] = str(120*60*1000) # 1,800,000 ms + if limited_core_dumps: + setup_coredump_generation(host_os) + # Set Core_Root print("Setting CORE_ROOT=%s" % core_root) os.environ["CORE_ROOT"] = core_root @@ -823,10 +1039,12 @@ def run_tests(host_os, # Call msbuild. return call_msbuild(coreclr_repo_location, dotnetcli_location, + test_location, host_os, arch, build_type, is_illink=is_illink, + limited_core_dumps=limited_core_dumps, sequential=run_sequential) def setup_args(args): @@ -2044,6 +2262,7 @@ def do_setup(host_os, is_gcsimulator=unprocessed_args.gcsimulator, is_jitdasm=unprocessed_args.jitdisasm, is_ilasm=unprocessed_args.ilasmroundtrip, + limited_core_dumps=unprocessed_args.limited_core_dumps, run_sequential=unprocessed_args.sequential, run_crossgen_tests=unprocessed_args.run_crossgen_tests, test_env=test_env) diff --git a/tests/runtest.sh b/tests/runtest.sh index 25e6c89..a6faf13 100755 --- a/tests/runtest.sh +++ b/tests/runtest.sh @@ -554,6 +554,9 @@ if (($doCrossgen!=0)); then runtestPyArguments+=("--precompile_core_root") fi +if [ "$limitedCoreDumps" == "ON" ]; then + runtestPyArguments+=("--limited_core_dumps") +fi # Default to python3 if it is installed __Python=python -- 2.7.4