From 047e68235f8ebf8dc9fd816829ba90561d423ff9 Mon Sep 17 00:00:00 2001 From: Michael Dagitses Date: Fri, 17 Sep 2021 12:07:19 -0700 Subject: [PATCH] delegate parallelism to Ninja when possible (#64733) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/64733 The previous implementation was wrong when CPU scheduling affinity is set. In fact, it is still wrong if Ninja is not being used. When there is CPU scheduling affinity set, the number of processors available on the system likely exceeds the number of processors that are usable to the build. We ought to use `len(os.sched_getaffinity(0))` to determine the effective parallelism. This change is more minimal and instead just delegates to Ninja (which handles this correctly) when it is used. Test Plan: I verified this worked as correctly using Ninja on a 96-core machine with 24 cores available for scheduling by checking: * the cmake command did not specify "-j" * the number of top-level jobs in top/pstree never exceeded 26 (24 + 2) And I verified we get the legacy behavior by specifying USE_NINJA=0 on the build. Reviewed By: jbschlosser, driazati Differential Revision: D30968796 Pulled By: dagitses fbshipit-source-id: 29547dd378fea793957bcc2f7d52d5def1ecace2 --- tools/setup_helpers/cmake.py | 42 ++++++++++++++++++++++++++++++++++-------- tools/test/test_cmake.py | 11 +++++++---- 2 files changed, 41 insertions(+), 12 deletions(-) diff --git a/tools/setup_helpers/cmake.py b/tools/setup_helpers/cmake.py index e819f47e..dd8f817 100644 --- a/tools/setup_helpers/cmake.py +++ b/tools/setup_helpers/cmake.py @@ -357,13 +357,39 @@ class CMake: from .env import build_type - max_jobs = os.getenv('MAX_JOBS', str(multiprocessing.cpu_count())) build_args = ['--build', '.', '--target', 'install', '--config', build_type.build_type_string] - # This ``if-else'' clause would be unnecessary when cmake 3.12 becomes - # minimum, which provides a '-j' option: build_args += ['-j', max_jobs] - # would be sufficient by then. - if IS_WINDOWS and not USE_NINJA: # We are likely using msbuild here - build_args += ['--', '/p:CL_MPCount={}'.format(max_jobs)] - else: - build_args += ['--', '-j', max_jobs] + + # Determine the parallelism according to the following + # priorities: + # 1) MAX_JOBS environment variable + # 2) If using the Ninja build system, delegate decision to it. + # 3) Otherwise, fall back to the number of processors. + + # Allow the user to set parallelism explicitly. If unset, + # we'll try to figure it out. + max_jobs = os.getenv('MAX_JOBS') + + if max_jobs is not None or not USE_NINJA: + # Ninja is capable of figuring out the parallelism on its + # own: only specify it explicitly if we are not using + # Ninja. + + # This lists the number of processors available on the + # machine. This may be an overestimate of the usable + # processors if CPU scheduling affinity limits it + # further. In the future, we should check for that with + # os.sched_getaffinity(0) on platforms that support it. + max_jobs = max_jobs or str(multiprocessing.cpu_count()) + + # This ``if-else'' clause would be unnecessary when cmake + # 3.12 becomes minimum, which provides a '-j' option: + # build_args += ['-j', max_jobs] would be sufficient by + # then. Until then, we use "--" to pass parameters to the + # underlying build system. + build_args += ['--'] + if IS_WINDOWS: + # We are likely using msbuild here + build_args += ['/p:CL_MPCount={}'.format(max_jobs)] + else: + build_args += ['-j', max_jobs] self.run(build_args, my_env) diff --git a/tools/test/test_cmake.py b/tools/test/test_cmake.py index 9a7fdbf..a9fae6d 100644 --- a/tools/test/test_cmake.py +++ b/tools/test/test_cmake.py @@ -20,9 +20,9 @@ class TestCMake(unittest.TestCase): mock_cpu_count.return_value = 13 cases = [ # MAX_JOBS, USE_NINJA, IS_WINDOWS, want - (( '8', True, False), ['-j', '8']), # noqa: E201,E241 - (( None, True, False), ['-j', '13']), # noqa: E201,E241 - (( None, True, True), ['-j', '13']), # noqa: E201,E241 + (( '8', True, False), ['-j', '8']), # noqa: E201,E241 + (( None, True, False), None), # noqa: E201,E241 + (( None, True, True), None), # noqa: E201,E241 (( None, False, True), ['/p:CL_MPCount=13']), # noqa: E201,E241 ] for (max_jobs, use_ninja, is_windows), want in cases: @@ -41,7 +41,10 @@ class TestCMake(unittest.TestCase): call, = cmake_run.mock_calls build_args, _ = call.args - self.assert_contains_sequence(build_args, want) + if want is None: + self.assertNotIn('-j', build_args) + else: + self.assert_contains_sequence(build_args, want) @staticmethod def assert_contains_sequence(sequence: Sequence[T], subsequence: Sequence[T]) -> None: -- 2.7.4