From: driazati Date: Thu, 19 Aug 2021 17:38:41 +0000 (-0700) Subject: Use CMake for breakpad (#63186) X-Git-Tag: accepted/tizen/8.0/unified/20231005.095509~873 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=bd8608cd5cdaa69384b11f1253b4bcc822032a51;p=platform%2Fupstream%2Fpytorch.git Use CMake for breakpad (#63186) Summary: We currently build breakpad from [this fork](https://github.com/driazati/breakpad) to include extra logic to restore signal handlers that were previously present. With some [new additions](https://github.com/google/breakpad/compare/main...driazati:main) this fork now includes a CMake based build, so we can add breakpad as a proper dependency rather than rely on including it in Docker images as a system library which is error prone (we have a bunch of images) and hard to extend to MacOS / Windows. This also includes some changes to the crash handling code to support MacOS / Windows in a similar way to Linux. ```python import torch # On Windows this writes crashes to C:\Users\\AppData\pytorch_crashes # On MacOS/Linux this writes crashes to /tmp/pytorch_crashes torch.utils._crash_handler.enable_minidumps() # Easy way to cause a segfault and trigger the handler torch.bincount(input=torch.tensor([9223372036854775807])) ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/63186 Reviewed By: malfet, seemethere Differential Revision: D30318404 Pulled By: driazati fbshipit-source-id: 0d7daf3701cfaba5451cc529a0730272ab1eb1dc --- diff --git a/.gitmodules b/.gitmodules index 6836ccb..a7cc437 100644 --- a/.gitmodules +++ b/.gitmodules @@ -139,3 +139,6 @@ [submodule "third_party/pocketfft"] path = third_party/pocketfft url = https://github.com/mreineck/pocketfft +[submodule "third_party/breakpad"] + path = third_party/breakpad + url = https://github.com/driazati/breakpad.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 188f35a..4d1653f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -202,6 +202,7 @@ cmake_dependent_option( "USE_CUDNN" OFF) option(USE_FBGEMM "Use FBGEMM (quantized 8-bit server operators)" ON) option(USE_KINETO "Use Kineto profiling library" ON) +option(USE_BREAKPAD "Use breakpad crash dump library" ON) option(USE_CUPTI_SO "Use CUPTI as a shared library" OFF) option(USE_FAKELOWP "Use FakeLowp operators" OFF) option(USE_FFMPEG "Use ffmpeg" OFF) @@ -264,6 +265,10 @@ if(NOT DEFINED USE_VULKAN) "ANDROID" OFF) endif() +if(IOS) + set(USE_BREAKPAD OFF) +endif() + option(USE_SOURCE_DEBUG_ON_MOBILE "Enable " ON) option(USE_LITE_INTERPRETER_PROFILER "Enable " ON) option(USE_VULKAN_FP16_INFERENCE "Vulkan - Use fp16 inference" OFF) diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt index 6194554..523fea8 100644 --- a/caffe2/CMakeLists.txt +++ b/caffe2/CMakeLists.txt @@ -1042,27 +1042,10 @@ if(USE_TBB) target_link_libraries(torch_cpu PUBLIC TBB::tbb) endif() - -if(LINUX) - find_library(BREAKPAD_LIB breakpad_client - PATHS /usr/local/lib/) - find_path(BREAKPAD_INCLUDE_DIR breakpad - PATHS /usr/local/include/) - - if(BREAKPAD_LIB AND BREAKPAD_INCLUDE_DIR) - message(STATUS "found breakpad library") - target_link_libraries(torch_cpu PRIVATE ${BREAKPAD_LIB}) - target_compile_definitions(torch_cpu PRIVATE ADD_BREAKPAD_SIGNAL_HANDLER) - target_include_directories(torch_cpu PRIVATE ${BREAKPAD_INCLUDE_DIR}/breakpad) - else() - if(BREAKPAD_INCLUDE_DIR) - message(STATUS "breakpad_client library not found") - elseif(BREAKPAD_LIB) - message(STATUS "breakpad include path not found") - else() - message(STATUS "breakpad_client library and include path not found") - endif() - endif() +if(USE_BREAKPAD) + target_compile_definitions(torch_cpu PRIVATE ADD_BREAKPAD_SIGNAL_HANDLER) + target_include_directories(torch_cpu PRIVATE ${CMAKE_CURRENT_LIST_DIR}/../third_party ${CMAKE_CURRENT_LIST_DIR}/../third_party/breakpad/src) + target_link_libraries(torch_cpu PRIVATE breakpad) endif() target_include_directories(torch_cpu PRIVATE ${ATen_CPU_INCLUDE}) diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index 2c19dae..3e37c35 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -1881,6 +1881,10 @@ set_target_properties(fmt-header-only PROPERTIES INTERFACE_COMPILE_FEATURES "") list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt-header-only) set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS} CACHE BOOL "Build shared libs" FORCE) +if(USE_BREAKPAD) + add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../third_party/breakpad) +endif() + # ---[ Kineto # edge profiler depends on KinetoProfiler but it only does cpu # profiling. Thus we dont need USE_CUDA/USE_ROCM diff --git a/cmake/Summary.cmake b/cmake/Summary.cmake index 4de2d79..afc63b1 100644 --- a/cmake/Summary.cmake +++ b/cmake/Summary.cmake @@ -178,6 +178,7 @@ function(caffe2_print_configuration_summary) message(STATUS " SELECTED_OP_LIST : ${SELECTED_OP_LIST}") endif() message(STATUS " USE_DEPLOY : ${USE_DEPLOY}") + message(STATUS " USE_BREAKPAD : ${USE_BREAKPAD}") message(STATUS " Public Dependencies : ${Caffe2_PUBLIC_DEPENDENCY_LIBS}") message(STATUS " Private Dependencies : ${Caffe2_DEPENDENCY_LIBS}") endfunction() diff --git a/test/test_cpp_extensions_jit.py b/test/test_cpp_extensions_jit.py index 0738352..89d9af1 100644 --- a/test/test_cpp_extensions_jit.py +++ b/test/test_cpp_extensions_jit.py @@ -869,11 +869,29 @@ class TestCppExtensionJIT(common.TestCase): gradcheck(torch.ops.my.add, [a, b], eps=1e-2) - @unittest.skipIf(not has_breakpad(), "Breakpad library must be present on system for crash handler") - @unittest.skipIf(TEST_WITH_ASAN, "ASAN disables the crash handler's signal handler") - def test_crash_handler(self): - def run_test(stderr_file, destination): - # Code to enable dumps and trigger a segfault + @staticmethod + def _crash_handler_test_process(stderr_file, destination): + # Code to enable dumps and trigger a segfault + if sys.platform == "win32": + destination = destination.replace("\\", "\\\\") + csrc = textwrap.dedent(f""" + #include + #include + #include + #include + #include + + int fail() {{ + std::wstring_convert> converter; + std::string narrow("{destination}"); + std::wstring wide = converter.from_bytes(narrow); + torch::crash_handler::enable_minidumps(wide.c_str()); + + volatile int* bad = nullptr; + return *bad; + }} + """) + else: csrc = textwrap.dedent(f""" #include @@ -885,29 +903,32 @@ class TestCppExtensionJIT(common.TestCase): }} """) - # Some special stuff to overwrite stderr for a C++ extension - # Copied from: https://stackoverflow.com/questions/8804893/redirect-stdout-from-python-for-c-calls - sys.stdout.flush() - newstdout = os.dup(2) - devnull = os.open(stderr_file, os.O_WRONLY) - os.dup2(devnull, 2) - os.close(devnull) - sys.stdout = os.fdopen(newstdout, 'w') - - module = torch.utils.cpp_extension.load_inline( - name="segfault", - cpp_sources=csrc, - functions=["fail"], - ) - module.fail() + # Some special stuff to overwrite stderr for a C++ extension + # Copied from: https://stackoverflow.com/questions/8804893/redirect-stdout-from-python-for-c-calls + sys.stdout.flush() + newstdout = os.dup(2) + devnull = os.open(stderr_file, os.O_WRONLY) + os.dup2(devnull, 2) + os.close(devnull) + sys.stdout = os.fdopen(newstdout, 'w') + module = torch.utils.cpp_extension.load_inline( + name="segfault", + cpp_sources=csrc, + functions=["fail"], + ) + module.fail() - with tempfile.TemporaryDirectory() as temp_dir, tempfile.NamedTemporaryFile() as stderr: + @unittest.skipIf(TEST_WITH_ASAN, "ASAN disables the crash handler's signal handler") + @unittest.skipIf(not has_breakpad(), "Built without breakpad") + def test_crash_handler(self): + with tempfile.TemporaryDirectory() as temp_dir, tempfile.NamedTemporaryFile(delete=not sys.platform == "win32") as stderr: # Use multiprocessing to spin up a separate process to make catching # the segfault easier - p = Process(target=run_test, args=(stderr.name, temp_dir)) + p = Process(target=self._crash_handler_test_process, args=(stderr.name, temp_dir)) p.start() p.join() + with open(stderr.name) as f: result = f.read().strip() diff --git a/test/test_utils.py b/test/test_utils.py index d0f8d10..6f9432e 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -19,7 +19,7 @@ import torch.utils.cpp_extension import torch.hub as hub from torch.autograd._functions.utils import check_onnx_broadcast from torch.onnx.symbolic_opset9 import _prepare_onnx_paddings -from torch.testing._internal.common_utils import load_tests, retry, IS_SANDCASTLE, IS_WINDOWS, has_breakpad +from torch.testing._internal.common_utils import has_breakpad, load_tests, retry, IS_SANDCASTLE, IS_WINDOWS, TEST_WITH_ASAN from urllib.error import URLError # load_tests from torch.testing._internal.common_utils is used to automatically filter tests for @@ -739,7 +739,8 @@ class TestAssert(TestCase): class TestCrashHandler(TestCase): - @unittest.skipIf(not has_breakpad(), "Crash handler lib was not linked in") + @unittest.skipIf(TEST_WITH_ASAN, "ASAN disables the crash handler's signal handler") + @unittest.skipIf(not has_breakpad(), "Built without breakpad") def test_python_exception_writing(self): with tempfile.TemporaryDirectory() as temp_dir: torch.utils._crash_handler.enable_minidumps(temp_dir) diff --git a/third_party/breakpad b/third_party/breakpad new file mode 160000 index 0000000..469a80e --- /dev/null +++ b/third_party/breakpad @@ -0,0 +1 @@ +Subproject commit 469a80ee54947ad8d000d33a615f1a199165a711 diff --git a/torch/csrc/utils/crash_handler.cpp b/torch/csrc/utils/crash_handler.cpp index 2de22be..8fb318b 100644 --- a/torch/csrc/utils/crash_handler.cpp +++ b/torch/csrc/utils/crash_handler.cpp @@ -3,8 +3,16 @@ #include #ifdef ADD_BREAKPAD_SIGNAL_HANDLER -#include +#ifdef __linux__ +#include #include +#elif __APPLE__ +#include +#elif _WIN32 +#include +#else +#error unsupported platform +#endif #endif #include @@ -16,9 +24,10 @@ namespace crash_handler { #ifdef ADD_BREAKPAD_SIGNAL_HANDLER static std::unique_ptr handler; // NOLINT -static std::string minidump_directory; // NOLINT +static STRING_TYPE minidump_directory; // NOLINT static bool enabled_for_exceptions = false; // NOLINT +#if __linux__ bool dump_callback( const google_breakpad::MinidumpDescriptor& descriptor, void* context, @@ -28,10 +37,45 @@ bool dump_callback( } return succeeded; } +#elif __APPLE__ -void enable_minidumps(const std::string& dir) { +bool dump_callback( + const char* dump_dir, + const char* minidump_id, + void* context, + bool succeeded) { + if (succeeded) { + std::cerr << "Wrote minidump to " << dump_dir << "/" << minidump_id + << ".dmp" << std::endl; + } + return succeeded; +} +#elif _WIN32 +bool dump_callback( + const wchar_t* dump_path, + const wchar_t* minidump_id, + void* context, + EXCEPTION_POINTERS* exinfo, + MDRawAssertionInfo* assertion, + bool succeeded) { + if (succeeded) { + // Printing with wcerr inserts spaces between all the characters for some + // reason. If someone figures that out then we can get rid of the std::string + // conversions here. + std::wstring dump_path_ws(dump_path); + std::string dump_path_string(dump_path_ws.begin(), dump_path_ws.end()); + std::wstring minidump_id_ws(minidump_id); + std::string minidump_id_string(minidump_id_ws.begin(), minidump_id_ws.end()); + std::cerr << "Wrote minidump to " << dump_path_string << "\\" << minidump_id_string << ".dmp" << std::endl; + } + return succeeded; +} +#endif + +void enable_minidumps(const STRING_TYPE& dir) { minidump_directory = dir; - // The constructor here registers the actual signal handler +// The constructor here registers the actual signal handler +#ifdef __linux__ handler = std::make_unique( google_breakpad::MinidumpDescriptor(minidump_directory), nullptr, @@ -39,13 +83,30 @@ void enable_minidumps(const std::string& dir) { nullptr, true, -1); +#elif __APPLE__ + handler = std::make_unique( + /*dump_path=*/minidump_directory.c_str(), + /*filter=*/nullptr, + /*callback=*/dump_callback, + /*callback_context=*/nullptr, + /*install_handler=*/true, + /*port_name=*/nullptr); +#elif _WIN32 + handler = std::make_unique( + /*dump_path=*/minidump_directory.c_str(), + /*filter=*/nullptr, + /*callback=*/dump_callback, + /*callback_context=*/nullptr, + /*handler_types=*/ + google_breakpad::ExceptionHandler::HandlerType::HANDLER_ALL); +#endif } void disable_minidumps() { handler.reset(); } -const std::string& get_minidump_directory() { +const STRING_TYPE& get_minidump_directory() { if (handler == nullptr) { AT_ERROR( "Minidump handler is uninintialized, make sure to call enable_minidumps first"); @@ -78,18 +139,16 @@ void enable_minidumps_on_exceptions() { #else // On unspported systems we can't do anything, so stub out everything. -void enable_minidumps(const std::string& dir) { - AT_ERROR( - "Minidump collection is currently only implemented for Linux platforms"); +void enable_minidumps(const STRING_TYPE& dir) { + AT_ERROR("Compiled without minidump support"); } void disable_minidumps() { // Purposefully do nothing } -const std::string& get_minidump_directory() { - AT_ERROR( - "Minidump collection is currently only implemented for Linux platforms"); +const STRING_TYPE& get_minidump_directory() { + AT_ERROR("Compiled without minidump support"); } bool is_enabled_on_exceptions() { @@ -97,13 +156,11 @@ bool is_enabled_on_exceptions() { } void write_minidump() { - AT_ERROR( - "Minidump collection is currently only implemented for Linux platforms"); + AT_ERROR("Compiled without minidump support"); } void enable_minidumps_on_exceptions() { - AT_ERROR( - "Minidump collection is currently only implemented for Linux platforms"); + AT_ERROR("Compiled without minidump support"); } #endif diff --git a/torch/csrc/utils/crash_handler.h b/torch/csrc/utils/crash_handler.h index 5fe0503..dc11945 100644 --- a/torch/csrc/utils/crash_handler.h +++ b/torch/csrc/utils/crash_handler.h @@ -5,10 +5,16 @@ namespace torch { namespace crash_handler { +#ifdef _WIN32 +typedef std::wstring STRING_TYPE; +#else +typedef std::string STRING_TYPE; +#endif + // Set up a handler that writes minidumps to 'dir' on signals. This is not // necessary to call unless you want to change 'dir' to something other than // the default '/tmp/pytorch_crashes'. -TORCH_API void enable_minidumps(const std::string& dir); +TORCH_API void enable_minidumps(const STRING_TYPE& dir); // Enable minidumps when passing exceptions up to Python. By default these don't // do anything special, but it can be useful to write out a minidump on @@ -19,7 +25,7 @@ TORCH_API void enable_minidumps_on_exceptions(); TORCH_API void disable_minidumps(); // Get the directory that minidumps will be written to -TORCH_API const std::string& get_minidump_directory(); +TORCH_API const STRING_TYPE& get_minidump_directory(); // These are TORCH_API'ed since they are used from libtorch_python.so TORCH_API bool is_enabled_on_exceptions(); diff --git a/torch/testing/_internal/common_utils.py b/torch/testing/_internal/common_utils.py index fed9a00..a16056c 100644 --- a/torch/testing/_internal/common_utils.py +++ b/torch/testing/_internal/common_utils.py @@ -2533,13 +2533,6 @@ def disable_gc(): else: yield -def has_breakpad() -> bool: - # If not on a special build, check that the library was actually linked in - try: - torch._C._get_minidump_directory() # type: ignore[attr-defined] - return True - except RuntimeError as e: - return False def find_library_location(lib_name: str) -> Path: # return the shared library file in the installed folder if exist, @@ -2590,6 +2583,22 @@ def get_tensors_from(args, kwargs): return set([arg for arg in args if isinstance(arg, Tensor)] + [v for v in kwargs.values() if isinstance(v, Tensor)]) + +def has_breakpad(): + # We always build with breakpad in CI + if IS_IN_CI: + return True + + # If not on a special build, check that the library was actually linked in + try: + torch._C._get_minidump_directory() # type: ignore[attr-defined] + return True + except RuntimeError as e: + if "Minidump handler is uninintialized" in str(e): + return True + return False + + def sandcastle_skip_if(condition, reason): """ Similar to unittest.skipIf, however in the sandcastle environment it just diff --git a/torch/utils/_crash_handler.py b/torch/utils/_crash_handler.py index 3d736c3..84b3452 100644 --- a/torch/utils/_crash_handler.py +++ b/torch/utils/_crash_handler.py @@ -5,11 +5,10 @@ import pathlib import torch DEFAULT_MINIDUMP_DIR = "/tmp/pytorch_crashes" +if sys.platform == "win32": + DEFAULT_MINIDUMP_DIR = str(pathlib.Path.home() / "AppData" / "pytorch_crashes") def enable_minidumps(directory=DEFAULT_MINIDUMP_DIR): - if sys.platform != "linux": - raise RuntimeError("Minidump collection is currently only implemented for Linux platforms") - if directory == DEFAULT_MINIDUMP_DIR: pathlib.Path(directory).mkdir(parents=True, exist_ok=True) elif not os.path.exists(directory):