From ab5cf5a1eb17516dddf5162dc7ab3c670c997376 Mon Sep 17 00:00:00 2001 From: driazati Date: Wed, 25 Aug 2021 12:58:24 -0700 Subject: [PATCH] Move existing target determinator to tools (#63809) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/63809 This moves out the modulefinder determinator to `tools/testing` since it is supposed to be CI-only. This also simplifies run_test.py a little bit. Test Plan: Imported from OSS Reviewed By: malfet, seemethere, janeyx99 Differential Revision: D30497438 Pulled By: driazati fbshipit-source-id: 1d203037af5af6a20c1e7812da935e7cbb5cd82f --- test/run_test.py | 232 ++--------------------------- test/test_determination.py | 2 +- tools/testing/modulefinder_determinator.py | 224 ++++++++++++++++++++++++++++ 3 files changed, 241 insertions(+), 217 deletions(-) create mode 100644 tools/testing/modulefinder_determinator.py diff --git a/test/run_test.py b/test/run_test.py index ecc93fe..d3c6610 100755 --- a/test/run_test.py +++ b/test/run_test.py @@ -4,8 +4,8 @@ import argparse import copy from datetime import datetime from distutils.util import strtobool -import modulefinder import os +import pathlib import shutil import signal import subprocess @@ -24,9 +24,11 @@ from torch.testing._internal.common_utils import ( import torch.distributed as dist from typing import Dict, Optional, List +REPO_ROOT = pathlib.Path(__file__).resolve().parent.parent + try: # using tools/ to optimize test run. - sys.path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), "..")) + sys.path.append(str(REPO_ROOT)) from tools.testing.test_selections import ( export_S3_test_times, get_shard_based_on_S3, @@ -35,6 +37,10 @@ try: get_reordered_tests, get_test_case_configs, ) + from tools.testing.modulefinder_determinator import ( + should_run_test, + TARGET_DET_LIST, + ) HAVE_TEST_SELECTION_TOOLS = True except ImportError: @@ -276,87 +282,12 @@ RUN_PARALLEL_BLOCKLIST = [ WINDOWS_COVERAGE_BLOCKLIST = [] -# These tests are slow enough that it's worth calculating whether the patch -# touched any related files first. This list was manually generated, but for every -# run with --determine-from, we use another generated list based on this one and the -# previous test stats. -TARGET_DET_LIST = [ - "distributions/test_distributions", - "test_nn", - "test_autograd", - "test_cpp_extensions_jit", - "test_jit_legacy", - "test_dataloader", - "test_overrides", - "test_linalg", - "test_jit", - "test_jit_profiling", - "test_torch", - "test_binary_ufuncs", - "test_numpy_interop", - "test_reductions", - "test_shape_ops", - "test_sort_and_select", - "test_testing", - "test_view_ops", - "distributed/nn/jit/test_instantiator", - "distributed/rpc/test_tensorpipe_agent", - "distributed/rpc/cuda/test_tensorpipe_agent", - "distributed/algorithms/ddp_comm_hooks/test_ddp_hooks", - "distributed/test_distributed_spawn", - "test_cuda", - "test_cuda_primary_ctx", - "test_cpp_extensions_aot_ninja", - "test_cpp_extensions_aot_no_ninja", - "test_serialization", - "test_optim", - "test_utils", - "test_multiprocessing", - "test_tensorboard", - "distributed/test_c10d_common", - "distributed/test_c10d_gloo", - "distributed/test_c10d_nccl", - "distributed/test_jit_c10d", - "distributed/test_c10d_spawn_gloo", - "distributed/test_c10d_spawn_nccl", - "distributed/test_store", - "distributed/test_pg_wrapper", - "test_quantization", - "test_pruning_op", - "test_determination", - "test_futures", - "distributed/pipeline/sync/skip/test_api", - "distributed/pipeline/sync/skip/test_gpipe", - "distributed/pipeline/sync/skip/test_inspect_skip_layout", - "distributed/pipeline/sync/skip/test_leak", - "distributed/pipeline/sync/skip/test_portal", - "distributed/pipeline/sync/skip/test_stash_pop", - "distributed/pipeline/sync/skip/test_tracker", - "distributed/pipeline/sync/skip/test_verify_skippables", - "distributed/pipeline/sync/test_balance", - "distributed/pipeline/sync/test_bugs", - "distributed/pipeline/sync/test_checkpoint", - "distributed/pipeline/sync/test_copy", - "distributed/pipeline/sync/test_deferred_batch_norm", - "distributed/pipeline/sync/test_dependency", - "distributed/pipeline/sync/test_inplace", - "distributed/pipeline/sync/test_microbatch", - "distributed/pipeline/sync/test_phony", - "distributed/pipeline/sync/test_pipe", - "distributed/pipeline/sync/test_pipeline", - "distributed/pipeline/sync/test_stream", - "distributed/pipeline/sync/test_transparency", - "distributed/pipeline/sync/test_worker", -] - # the JSON file to store the S3 test stats TEST_TIMES_FILE = ".pytorch-test-times.json" # if a test file takes longer than 5 min, we add it to TARGET_DET_LIST SLOW_TEST_THRESHOLD = 300 -_DEP_MODULES_CACHE: Dict[str, set] = {} - DISTRIBUTED_TESTS_CONFIG = {} @@ -957,136 +888,6 @@ def get_selected_tests(options): return selected_tests -def test_impact_of_file(filename): - """Determine what class of impact this file has on test runs. - - Possible values: - TORCH - torch python code - CAFFE2 - caffe2 python code - TEST - torch test code - UNKNOWN - may affect all tests - NONE - known to have no effect on test outcome - CI - CI configuration files - """ - parts = filename.split(os.sep) - if parts[0] in [".jenkins", ".circleci"]: - return "CI" - if parts[0] in ["docs", "scripts", "CODEOWNERS", "README.md"]: - return "NONE" - elif parts[0] == "torch": - if parts[-1].endswith(".py") or parts[-1].endswith(".pyi"): - return "TORCH" - elif parts[0] == "caffe2": - if parts[-1].endswith(".py") or parts[-1].endswith(".pyi"): - return "CAFFE2" - elif parts[0] == "test": - if parts[-1].endswith(".py") or parts[-1].endswith(".pyi"): - return "TEST" - - return "UNKNOWN" - - -def log_test_reason(file_type, filename, test, options): - if options.verbose: - print_to_stderr( - "Determination found {} file {} -- running {}".format( - file_type, - filename, - test, - ) - ) - - -def get_dep_modules(test): - # Cache results in case of repetition - if test in _DEP_MODULES_CACHE: - return _DEP_MODULES_CACHE[test] - - repo_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) - test_location = os.path.join(repo_root, "test", test + ".py") - finder = modulefinder.ModuleFinder( - # Ideally exclude all third party modules, to speed up calculation. - excludes=[ - "scipy", - "numpy", - "numba", - "multiprocessing", - "sklearn", - "setuptools", - "hypothesis", - "llvmlite", - "joblib", - "email", - "importlib", - "unittest", - "urllib", - "json", - "collections", - # Modules below are excluded because they are hitting https://bugs.python.org/issue40350 - # Trigger AttributeError: 'NoneType' object has no attribute 'is_package' - "mpl_toolkits", - "google", - "onnx", - # Triggers RecursionError - "mypy", - ], - ) - # HACK: some platforms default to ascii, so we can't just run_script :( - with open(test_location, "r", encoding="utf-8") as fp: - finder.load_module("__main__", fp, test_location, ("", "r", 1)) - - dep_modules = set(finder.modules.keys()) - _DEP_MODULES_CACHE[test] = dep_modules - return dep_modules - - -def determine_target(target_det_list, test, touched_files, options): - test = parse_test_module(test) - # Some tests are faster to execute than to determine. - if test not in target_det_list: - if options.verbose: - print_to_stderr(f"Running {test} without determination") - return True - # HACK: "no_ninja" is not a real module - if test.endswith("_no_ninja"): - test = test[: (-1 * len("_no_ninja"))] - if test.endswith("_ninja"): - test = test[: (-1 * len("_ninja"))] - - dep_modules = get_dep_modules(test) - - for touched_file in touched_files: - file_type = test_impact_of_file(touched_file) - if file_type == "NONE": - continue - elif file_type == "CI": - # Force all tests to run if any change is made to the CI - # configurations. - log_test_reason(file_type, touched_file, test, options) - return True - elif file_type == "UNKNOWN": - # Assume uncategorized source files can affect every test. - log_test_reason(file_type, touched_file, test, options) - return True - elif file_type in ["TORCH", "CAFFE2", "TEST"]: - parts = os.path.splitext(touched_file)[0].split(os.sep) - touched_module = ".".join(parts) - # test/ path does not have a "test." namespace - if touched_module.startswith("test."): - touched_module = touched_module.split("test.")[1] - if touched_module in dep_modules or touched_module == test.replace( - "/", "." - ): - log_test_reason(file_type, touched_file, test, options) - return True - - # If nothing has determined the test has run, don't run the test. - if options.verbose: - print_to_stderr(f"Determination is skipping {test}") - - return False - - def run_test_module(test: str, test_directory: str, options) -> Optional[str]: test_module = parse_test_module(test) @@ -1131,7 +932,7 @@ def main(): specified_test_cases_filename, TESTS ) - test_directory = os.path.dirname(os.path.abspath(__file__)) + test_directory = str(REPO_ROOT / "test") selected_tests = get_selected_tests(options) if options.verbose: @@ -1144,10 +945,10 @@ def main(): slow_tests = get_slow_tests_based_on_S3( TESTS, TARGET_DET_LIST, SLOW_TEST_THRESHOLD ) - print( + print_to_stderr( "Added the following tests to target_det tests as calculated based on S3:" ) - print(slow_tests) + print_to_stderr(slow_tests) with open(options.determine_from, "r") as fh: touched_files = [ os.path.normpath(name.strip()) @@ -1155,22 +956,22 @@ def main(): if len(name.strip()) > 0 ] # HACK: Ensure the 'test' paths can be traversed by Modulefinder - sys.path.append("test") + sys.path.append(test_directory) selected_tests = [ test for test in selected_tests - if determine_target( + if should_run_test( TARGET_DET_LIST + slow_tests, test, touched_files, options ) ] - sys.path.remove("test") + sys.path.remove(test_directory) if IS_IN_CI: selected_tests = get_reordered_tests( selected_tests, ENABLE_PR_HISTORY_REORDERING ) # downloading test cases configuration to local environment - get_test_case_configs(dirpath=os.path.dirname(os.path.abspath(__file__))) + get_test_case_configs(dirpath=test_directory) has_failed = False failure_messages = [] @@ -1191,8 +992,7 @@ def main(): if options.coverage: from coverage import Coverage - test_dir = os.path.dirname(os.path.abspath(__file__)) - with set_cwd(test_dir): + with set_cwd(test_directory): cov = Coverage() if PYTORCH_COLLECT_COVERAGE: cov.load() diff --git a/test/test_determination.py b/test/test_determination.py index 6b7fcc0..277bbd2 100644 --- a/test/test_determination.py +++ b/test/test_determination.py @@ -30,7 +30,7 @@ class DeterminationTest(unittest.TestCase): return [ test for test in cls.TESTS - if run_test.determine_target(run_test.TARGET_DET_LIST, test, changed_files, DummyOptions()) + if run_test.should_run_test(run_test.TARGET_DET_LIST, test, changed_files, DummyOptions()) ] def test_config_change_only(self): diff --git a/tools/testing/modulefinder_determinator.py b/tools/testing/modulefinder_determinator.py new file mode 100644 index 0000000..8acd0ed --- /dev/null +++ b/tools/testing/modulefinder_determinator.py @@ -0,0 +1,224 @@ +import os +import modulefinder +import sys +import pathlib +import warnings +from typing import Dict, Any, List, Set + +REPO_ROOT = pathlib.Path(__file__).resolve().parent.parent.parent + +# These tests are slow enough that it's worth calculating whether the patch +# touched any related files first. This list was manually generated, but for every +# run with --determine-from, we use another generated list based on this one and the +# previous test stats. +TARGET_DET_LIST = [ + "distributions/test_distributions", + "test_nn", + "test_autograd", + "test_cpp_extensions_jit", + "test_jit_legacy", + "test_dataloader", + "test_overrides", + "test_linalg", + "test_jit", + "test_jit_profiling", + "test_torch", + "test_binary_ufuncs", + "test_numpy_interop", + "test_reductions", + "test_shape_ops", + "test_sort_and_select", + "test_testing", + "test_view_ops", + "distributed/nn/jit/test_instantiator", + "distributed/rpc/test_tensorpipe_agent", + "distributed/rpc/cuda/test_tensorpipe_agent", + "distributed/algorithms/ddp_comm_hooks/test_ddp_hooks", + "distributed/test_distributed_spawn", + "test_cuda", + "test_cuda_primary_ctx", + "test_cpp_extensions_aot_ninja", + "test_cpp_extensions_aot_no_ninja", + "test_serialization", + "test_optim", + "test_utils", + "test_multiprocessing", + "test_tensorboard", + "distributed/test_c10d_common", + "distributed/test_c10d_gloo", + "distributed/test_c10d_nccl", + "distributed/test_jit_c10d", + "distributed/test_c10d_spawn_gloo", + "distributed/test_c10d_spawn_nccl", + "distributed/test_store", + "distributed/test_pg_wrapper", + "test_quantization", + "test_pruning_op", + "test_determination", + "test_futures", + "distributed/pipeline/sync/skip/test_api", + "distributed/pipeline/sync/skip/test_gpipe", + "distributed/pipeline/sync/skip/test_inspect_skip_layout", + "distributed/pipeline/sync/skip/test_leak", + "distributed/pipeline/sync/skip/test_portal", + "distributed/pipeline/sync/skip/test_stash_pop", + "distributed/pipeline/sync/skip/test_tracker", + "distributed/pipeline/sync/skip/test_verify_skippables", + "distributed/pipeline/sync/test_balance", + "distributed/pipeline/sync/test_bugs", + "distributed/pipeline/sync/test_checkpoint", + "distributed/pipeline/sync/test_copy", + "distributed/pipeline/sync/test_deferred_batch_norm", + "distributed/pipeline/sync/test_dependency", + "distributed/pipeline/sync/test_inplace", + "distributed/pipeline/sync/test_microbatch", + "distributed/pipeline/sync/test_phony", + "distributed/pipeline/sync/test_pipe", + "distributed/pipeline/sync/test_pipeline", + "distributed/pipeline/sync/test_stream", + "distributed/pipeline/sync/test_transparency", + "distributed/pipeline/sync/test_worker", +] + +_DEP_MODULES_CACHE: Dict[str, Set[str]] = {} + + +def should_run_test( + target_det_list: List[str], test: str, touched_files: List[str], options: Any +) -> bool: + test = parse_test_module(test) + # Some tests are faster to execute than to determine. + if test not in target_det_list: + if options.verbose: + print_to_stderr(f"Running {test} without determination") + return True + # HACK: "no_ninja" is not a real module + if test.endswith("_no_ninja"): + test = test[: (-1 * len("_no_ninja"))] + if test.endswith("_ninja"): + test = test[: (-1 * len("_ninja"))] + + dep_modules = get_dep_modules(test) + + for touched_file in touched_files: + file_type = test_impact_of_file(touched_file) + if file_type == "NONE": + continue + elif file_type == "CI": + # Force all tests to run if any change is made to the CI + # configurations. + log_test_reason(file_type, touched_file, test, options) + return True + elif file_type == "UNKNOWN": + # Assume uncategorized source files can affect every test. + log_test_reason(file_type, touched_file, test, options) + return True + elif file_type in ["TORCH", "CAFFE2", "TEST"]: + parts = os.path.splitext(touched_file)[0].split(os.sep) + touched_module = ".".join(parts) + # test/ path does not have a "test." namespace + if touched_module.startswith("test."): + touched_module = touched_module.split("test.")[1] + if touched_module in dep_modules or touched_module == test.replace( + "/", "." + ): + log_test_reason(file_type, touched_file, test, options) + return True + + # If nothing has determined the test has run, don't run the test. + if options.verbose: + print_to_stderr(f"Determination is skipping {test}") + + return False + + +def test_impact_of_file(filename: str) -> str: + """Determine what class of impact this file has on test runs. + + Possible values: + TORCH - torch python code + CAFFE2 - caffe2 python code + TEST - torch test code + UNKNOWN - may affect all tests + NONE - known to have no effect on test outcome + CI - CI configuration files + """ + parts = filename.split(os.sep) + if parts[0] in [".jenkins", ".circleci"]: + return "CI" + if parts[0] in ["docs", "scripts", "CODEOWNERS", "README.md"]: + return "NONE" + elif parts[0] == "torch": + if parts[-1].endswith(".py") or parts[-1].endswith(".pyi"): + return "TORCH" + elif parts[0] == "caffe2": + if parts[-1].endswith(".py") or parts[-1].endswith(".pyi"): + return "CAFFE2" + elif parts[0] == "test": + if parts[-1].endswith(".py") or parts[-1].endswith(".pyi"): + return "TEST" + + return "UNKNOWN" + + +def log_test_reason(file_type: str, filename: str, test: str, options: Any) -> None: + if options.verbose: + print_to_stderr( + "Determination found {} file {} -- running {}".format( + file_type, + filename, + test, + ) + ) + + +def get_dep_modules(test: str) -> Set[str]: + # Cache results in case of repetition + if test in _DEP_MODULES_CACHE: + return _DEP_MODULES_CACHE[test] + + test_location = REPO_ROOT / "test" / f"{test}.py" + + # HACK: some platforms default to ascii, so we can't just run_script :( + finder = modulefinder.ModuleFinder( + # Ideally exclude all third party modules, to speed up calculation. + excludes=[ + "scipy", + "numpy", + "numba", + "multiprocessing", + "sklearn", + "setuptools", + "hypothesis", + "llvmlite", + "joblib", + "email", + "importlib", + "unittest", + "urllib", + "json", + "collections", + # Modules below are excluded because they are hitting https://bugs.python.org/issue40350 + # Trigger AttributeError: 'NoneType' object has no attribute 'is_package' + "mpl_toolkits", + "google", + "onnx", + # Triggers RecursionError + "mypy", + ], + ) + + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + finder.run_script(str(test_location)) + dep_modules = set(finder.modules.keys()) + _DEP_MODULES_CACHE[test] = dep_modules + return dep_modules + + +def parse_test_module(test: str) -> str: + return test.split(".")[0] + + +def print_to_stderr(message: str) -> None: + print(message, file=sys.stderr) -- 2.7.4