Use CMake for breakpad (#63186)
authordriazati <driazati@users.noreply.github.com>
Thu, 19 Aug 2021 17:38:41 +0000 (10:38 -0700)
committerFacebook GitHub Bot <facebook-github-bot@users.noreply.github.com>
Thu, 19 Aug 2021 17:42:01 +0000 (10:42 -0700)
Summary:
We currently build breakpad from [this fork](https://github.com/driazati/breakpad) to include extra logic to restore signal handlers that were previously present. With some [new additions](https://github.com/google/breakpad/compare/main...driazati:main) this fork now includes a CMake based build, so we can add breakpad as a proper dependency rather than rely on including it in Docker images as a system library which is error prone (we have a bunch of images) and hard to extend to MacOS / Windows. This also includes some changes to the crash handling code to support MacOS / Windows in a similar way to Linux.

```python
import torch

# On Windows this writes crashes to C:\Users\<user>\AppData\pytorch_crashes
# On MacOS/Linux this writes crashes to /tmp/pytorch_crashes
torch.utils._crash_handler.enable_minidumps()

# Easy way to cause a segfault and trigger the handler
torch.bincount(input=torch.tensor([9223372036854775807]))
```

Pull Request resolved: https://github.com/pytorch/pytorch/pull/63186

Reviewed By: malfet, seemethere

Differential Revision: D30318404

Pulled By: driazati

fbshipit-source-id: 0d7daf3701cfaba5451cc529a0730272ab1eb1dc

12 files changed:
.gitmodules
CMakeLists.txt
caffe2/CMakeLists.txt
cmake/Dependencies.cmake
cmake/Summary.cmake
test/test_cpp_extensions_jit.py
test/test_utils.py
third_party/breakpad [new submodule]
torch/csrc/utils/crash_handler.cpp
torch/csrc/utils/crash_handler.h
torch/testing/_internal/common_utils.py
torch/utils/_crash_handler.py

index 6836ccb..a7cc437 100644 (file)
 [submodule "third_party/pocketfft"]
        path = third_party/pocketfft
        url = https://github.com/mreineck/pocketfft
+[submodule "third_party/breakpad"]
+       path = third_party/breakpad
+       url = https://github.com/driazati/breakpad.git
index 188f35a..4d1653f 100644 (file)
@@ -202,6 +202,7 @@ cmake_dependent_option(
     "USE_CUDNN" OFF)
 option(USE_FBGEMM "Use FBGEMM (quantized 8-bit server operators)" ON)
 option(USE_KINETO "Use Kineto profiling library" ON)
+option(USE_BREAKPAD "Use breakpad crash dump library" ON)
 option(USE_CUPTI_SO "Use CUPTI as a shared library" OFF)
 option(USE_FAKELOWP "Use FakeLowp operators" OFF)
 option(USE_FFMPEG "Use ffmpeg" OFF)
@@ -264,6 +265,10 @@ if(NOT DEFINED USE_VULKAN)
       "ANDROID" OFF)
 endif()
 
+if(IOS)
+  set(USE_BREAKPAD OFF)
+endif()
+
 option(USE_SOURCE_DEBUG_ON_MOBILE "Enable " ON)
 option(USE_LITE_INTERPRETER_PROFILER "Enable " ON)
 option(USE_VULKAN_FP16_INFERENCE "Vulkan - Use fp16 inference" OFF)
index 6194554..523fea8 100644 (file)
@@ -1042,27 +1042,10 @@ if(USE_TBB)
   target_link_libraries(torch_cpu PUBLIC TBB::tbb)
 endif()
 
-
-if(LINUX)
-  find_library(BREAKPAD_LIB breakpad_client
-    PATHS /usr/local/lib/)
-  find_path(BREAKPAD_INCLUDE_DIR breakpad
-    PATHS /usr/local/include/)
-
-  if(BREAKPAD_LIB AND BREAKPAD_INCLUDE_DIR)
-    message(STATUS "found breakpad library")
-    target_link_libraries(torch_cpu PRIVATE ${BREAKPAD_LIB})
-    target_compile_definitions(torch_cpu PRIVATE ADD_BREAKPAD_SIGNAL_HANDLER)
-    target_include_directories(torch_cpu PRIVATE ${BREAKPAD_INCLUDE_DIR}/breakpad)
-  else()
-    if(BREAKPAD_INCLUDE_DIR)
-      message(STATUS "breakpad_client library not found")
-    elseif(BREAKPAD_LIB)
-      message(STATUS "breakpad include path not found")
-    else()
-      message(STATUS "breakpad_client library and include path not found")
-    endif()
-  endif()
+if(USE_BREAKPAD)
+  target_compile_definitions(torch_cpu PRIVATE ADD_BREAKPAD_SIGNAL_HANDLER)
+  target_include_directories(torch_cpu PRIVATE ${CMAKE_CURRENT_LIST_DIR}/../third_party ${CMAKE_CURRENT_LIST_DIR}/../third_party/breakpad/src)
+  target_link_libraries(torch_cpu PRIVATE breakpad)
 endif()
 
   target_include_directories(torch_cpu PRIVATE ${ATen_CPU_INCLUDE})
index 2c19dae..3e37c35 100644 (file)
@@ -1881,6 +1881,10 @@ set_target_properties(fmt-header-only PROPERTIES INTERFACE_COMPILE_FEATURES "")
 list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt-header-only)
 set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS} CACHE BOOL "Build shared libs" FORCE)
 
+if(USE_BREAKPAD)
+  add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../third_party/breakpad)
+endif()
+
 # ---[ Kineto
 # edge profiler depends on KinetoProfiler but it only does cpu
 # profiling. Thus we dont need USE_CUDA/USE_ROCM
index 4de2d79..afc63b1 100644 (file)
@@ -178,6 +178,7 @@ function(caffe2_print_configuration_summary)
     message(STATUS "  SELECTED_OP_LIST    : ${SELECTED_OP_LIST}")
   endif()
   message(STATUS "  USE_DEPLOY           : ${USE_DEPLOY}")
+  message(STATUS "  USE_BREAKPAD         : ${USE_BREAKPAD}")
   message(STATUS "  Public Dependencies  : ${Caffe2_PUBLIC_DEPENDENCY_LIBS}")
   message(STATUS "  Private Dependencies : ${Caffe2_DEPENDENCY_LIBS}")
 endfunction()
index 0738352..89d9af1 100644 (file)
@@ -869,11 +869,29 @@ class TestCppExtensionJIT(common.TestCase):
 
         gradcheck(torch.ops.my.add, [a, b], eps=1e-2)
 
-    @unittest.skipIf(not has_breakpad(), "Breakpad library must be present on system for crash handler")
-    @unittest.skipIf(TEST_WITH_ASAN, "ASAN disables the crash handler's signal handler")
-    def test_crash_handler(self):
-        def run_test(stderr_file, destination):
-            # Code to enable dumps and trigger a segfault
+    @staticmethod
+    def _crash_handler_test_process(stderr_file, destination):
+        # Code to enable dumps and trigger a segfault
+        if sys.platform == "win32":
+            destination = destination.replace("\\", "\\\\")
+            csrc = textwrap.dedent(f"""
+            #include <torch/torch.h>
+            #include <locale>
+            #include <iostream>
+            #include <codecvt>
+            #include <string>
+
+            int fail() {{
+                std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
+                std::string narrow("{destination}");
+                std::wstring wide = converter.from_bytes(narrow);
+                torch::crash_handler::enable_minidumps(wide.c_str());
+
+                volatile int* bad = nullptr;
+                return *bad;
+            }}
+            """)
+        else:
             csrc = textwrap.dedent(f"""
             #include <torch/torch.h>
 
@@ -885,29 +903,32 @@ class TestCppExtensionJIT(common.TestCase):
             }}
             """)
 
-            # Some special stuff to overwrite stderr for a C++ extension
-            # Copied from: https://stackoverflow.com/questions/8804893/redirect-stdout-from-python-for-c-calls
-            sys.stdout.flush()
-            newstdout = os.dup(2)
-            devnull = os.open(stderr_file, os.O_WRONLY)
-            os.dup2(devnull, 2)
-            os.close(devnull)
-            sys.stdout = os.fdopen(newstdout, 'w')
-
-            module = torch.utils.cpp_extension.load_inline(
-                name="segfault",
-                cpp_sources=csrc,
-                functions=["fail"],
-            )
-            module.fail()
+        # Some special stuff to overwrite stderr for a C++ extension
+        # Copied from: https://stackoverflow.com/questions/8804893/redirect-stdout-from-python-for-c-calls
+        sys.stdout.flush()
+        newstdout = os.dup(2)
+        devnull = os.open(stderr_file, os.O_WRONLY)
+        os.dup2(devnull, 2)
+        os.close(devnull)
+        sys.stdout = os.fdopen(newstdout, 'w')
 
+        module = torch.utils.cpp_extension.load_inline(
+            name="segfault",
+            cpp_sources=csrc,
+            functions=["fail"],
+        )
+        module.fail()
 
-        with tempfile.TemporaryDirectory() as temp_dir, tempfile.NamedTemporaryFile() as stderr:
+    @unittest.skipIf(TEST_WITH_ASAN, "ASAN disables the crash handler's signal handler")
+    @unittest.skipIf(not has_breakpad(), "Built without breakpad")
+    def test_crash_handler(self):
+        with tempfile.TemporaryDirectory() as temp_dir, tempfile.NamedTemporaryFile(delete=not sys.platform == "win32") as stderr:
             # Use multiprocessing to spin up a separate process to make catching
             # the segfault easier
-            p = Process(target=run_test, args=(stderr.name, temp_dir))
+            p = Process(target=self._crash_handler_test_process, args=(stderr.name, temp_dir))
             p.start()
             p.join()
+
             with open(stderr.name) as f:
                 result = f.read().strip()
 
index d0f8d10..6f9432e 100644 (file)
@@ -19,7 +19,7 @@ import torch.utils.cpp_extension
 import torch.hub as hub
 from torch.autograd._functions.utils import check_onnx_broadcast
 from torch.onnx.symbolic_opset9 import _prepare_onnx_paddings
-from torch.testing._internal.common_utils import load_tests, retry, IS_SANDCASTLE, IS_WINDOWS, has_breakpad
+from torch.testing._internal.common_utils import has_breakpad, load_tests, retry, IS_SANDCASTLE, IS_WINDOWS, TEST_WITH_ASAN
 from urllib.error import URLError
 
 # load_tests from torch.testing._internal.common_utils is used to automatically filter tests for
@@ -739,7 +739,8 @@ class TestAssert(TestCase):
 
 
 class TestCrashHandler(TestCase):
-    @unittest.skipIf(not has_breakpad(), "Crash handler lib was not linked in")
+    @unittest.skipIf(TEST_WITH_ASAN, "ASAN disables the crash handler's signal handler")
+    @unittest.skipIf(not has_breakpad(), "Built without breakpad")
     def test_python_exception_writing(self):
         with tempfile.TemporaryDirectory() as temp_dir:
             torch.utils._crash_handler.enable_minidumps(temp_dir)
diff --git a/third_party/breakpad b/third_party/breakpad
new file mode 160000 (submodule)
index 0000000..469a80e
--- /dev/null
@@ -0,0 +1 @@
+Subproject commit 469a80ee54947ad8d000d33a615f1a199165a711
index 2de22be..8fb318b 100644 (file)
@@ -3,8 +3,16 @@
 #include <iostream>
 
 #ifdef ADD_BREAKPAD_SIGNAL_HANDLER
-#include <breakpad/client/linux/handler/exception_handler.h>
+#ifdef __linux__
+#include <breakpad/src/client/linux/handler/exception_handler.h>
 #include <csignal>
+#elif __APPLE__
+#include <breakpad/src/client/mac/handler/exception_handler.h>
+#elif _WIN32
+#include <breakpad/src/client/windows/handler/exception_handler.h>
+#else
+#error unsupported platform
+#endif
 #endif
 
 #include <c10/util/Exception.h>
@@ -16,9 +24,10 @@ namespace crash_handler {
 #ifdef ADD_BREAKPAD_SIGNAL_HANDLER
 
 static std::unique_ptr<google_breakpad::ExceptionHandler> handler; // NOLINT
-static std::string minidump_directory; // NOLINT
+static STRING_TYPE minidump_directory; // NOLINT
 static bool enabled_for_exceptions = false; // NOLINT
 
+#if __linux__
 bool dump_callback(
     const google_breakpad::MinidumpDescriptor& descriptor,
     void* context,
@@ -28,10 +37,45 @@ bool dump_callback(
   }
   return succeeded;
 }
+#elif __APPLE__
 
-void enable_minidumps(const std::string& dir) {
+bool dump_callback(
+    const char* dump_dir,
+    const char* minidump_id,
+    void* context,
+    bool succeeded) {
+  if (succeeded) {
+    std::cerr << "Wrote minidump to " << dump_dir << "/" << minidump_id
+              << ".dmp" << std::endl;
+  }
+  return succeeded;
+}
+#elif _WIN32
+bool dump_callback(
+    const wchar_t* dump_path,
+    const wchar_t* minidump_id,
+    void* context,
+    EXCEPTION_POINTERS* exinfo,
+    MDRawAssertionInfo* assertion,
+    bool succeeded) {
+  if (succeeded) {
+    // Printing with wcerr inserts spaces between all the characters for some
+    // reason. If someone figures that out then we can get rid of the std::string
+    // conversions here.
+    std::wstring dump_path_ws(dump_path);
+    std::string dump_path_string(dump_path_ws.begin(), dump_path_ws.end());
+    std::wstring minidump_id_ws(minidump_id);
+    std::string minidump_id_string(minidump_id_ws.begin(), minidump_id_ws.end());
+    std::cerr << "Wrote minidump to " << dump_path_string << "\\" << minidump_id_string << ".dmp" << std::endl;
+  }
+  return succeeded;
+}
+#endif
+
+void enable_minidumps(const STRING_TYPE& dir) {
   minidump_directory = dir;
-  // The constructor here registers the actual signal handler
+// The constructor here registers the actual signal handler
+#ifdef __linux__
   handler = std::make_unique<google_breakpad::ExceptionHandler>(
       google_breakpad::MinidumpDescriptor(minidump_directory),
       nullptr,
@@ -39,13 +83,30 @@ void enable_minidumps(const std::string& dir) {
       nullptr,
       true,
       -1);
+#elif __APPLE__
+  handler = std::make_unique<google_breakpad::ExceptionHandler>(
+      /*dump_path=*/minidump_directory.c_str(),
+      /*filter=*/nullptr,
+      /*callback=*/dump_callback,
+      /*callback_context=*/nullptr,
+      /*install_handler=*/true,
+      /*port_name=*/nullptr);
+#elif _WIN32
+  handler = std::make_unique<google_breakpad::ExceptionHandler>(
+      /*dump_path=*/minidump_directory.c_str(),
+      /*filter=*/nullptr,
+      /*callback=*/dump_callback,
+      /*callback_context=*/nullptr,
+      /*handler_types=*/
+      google_breakpad::ExceptionHandler::HandlerType::HANDLER_ALL);
+#endif
 }
 
 void disable_minidumps() {
   handler.reset();
 }
 
-const std::string& get_minidump_directory() {
+const STRING_TYPE& get_minidump_directory() {
   if (handler == nullptr) {
     AT_ERROR(
         "Minidump handler is uninintialized, make sure to call enable_minidumps first");
@@ -78,18 +139,16 @@ void enable_minidumps_on_exceptions() {
 
 #else
 // On unspported systems we can't do anything, so stub out everything.
-void enable_minidumps(const std::string& dir) {
-  AT_ERROR(
-      "Minidump collection is currently only implemented for Linux platforms");
+void enable_minidumps(const STRING_TYPE& dir) {
+  AT_ERROR("Compiled without minidump support");
 }
 
 void disable_minidumps() {
   // Purposefully do nothing
 }
 
-const std::string& get_minidump_directory() {
-  AT_ERROR(
-      "Minidump collection is currently only implemented for Linux platforms");
+const STRING_TYPE& get_minidump_directory() {
+  AT_ERROR("Compiled without minidump support");
 }
 
 bool is_enabled_on_exceptions() {
@@ -97,13 +156,11 @@ bool is_enabled_on_exceptions() {
 }
 
 void write_minidump() {
-  AT_ERROR(
-      "Minidump collection is currently only implemented for Linux platforms");
+  AT_ERROR("Compiled without minidump support");
 }
 
 void enable_minidumps_on_exceptions() {
-  AT_ERROR(
-      "Minidump collection is currently only implemented for Linux platforms");
+  AT_ERROR("Compiled without minidump support");
 }
 
 #endif
index 5fe0503..dc11945 100644 (file)
@@ -5,10 +5,16 @@
 namespace torch {
 namespace crash_handler {
 
+#ifdef _WIN32
+typedef std::wstring STRING_TYPE;
+#else
+typedef std::string STRING_TYPE;
+#endif
+
 // Set up a handler that writes minidumps to 'dir' on signals. This is not
 // necessary to call unless you want to change 'dir' to something other than
 // the default '/tmp/pytorch_crashes'.
-TORCH_API void enable_minidumps(const std::string& dir);
+TORCH_API void enable_minidumps(const STRING_TYPE& dir);
 
 // Enable minidumps when passing exceptions up to Python. By default these don't
 // do anything special, but it can be useful to write out a minidump on
@@ -19,7 +25,7 @@ TORCH_API void enable_minidumps_on_exceptions();
 TORCH_API void disable_minidumps();
 
 // Get the directory that minidumps will be written to
-TORCH_API const std::string& get_minidump_directory();
+TORCH_API const STRING_TYPE& get_minidump_directory();
 
 // These are TORCH_API'ed since they are used from libtorch_python.so
 TORCH_API bool is_enabled_on_exceptions();
index fed9a00..a16056c 100644 (file)
@@ -2533,13 +2533,6 @@ def disable_gc():
     else:
         yield
 
-def has_breakpad() -> bool:
-    # If not on a special build, check that the library was actually linked in
-    try:
-        torch._C._get_minidump_directory()  # type: ignore[attr-defined]
-        return True
-    except RuntimeError as e:
-        return False
 
 def find_library_location(lib_name: str) -> Path:
     # return the shared library file in the installed folder if exist,
@@ -2590,6 +2583,22 @@ def get_tensors_from(args, kwargs):
     return set([arg for arg in args if isinstance(arg, Tensor)] +
                [v for v in kwargs.values() if isinstance(v, Tensor)])
 
+
+def has_breakpad():
+    # We always build with breakpad in CI
+    if IS_IN_CI:
+        return True
+
+    # If not on a special build, check that the library was actually linked in
+    try:
+        torch._C._get_minidump_directory()  # type: ignore[attr-defined]
+        return True
+    except RuntimeError as e:
+        if "Minidump handler is uninintialized" in str(e):
+            return True
+        return False
+
+
 def sandcastle_skip_if(condition, reason):
     """
     Similar to unittest.skipIf, however in the sandcastle environment it just
index 3d736c3..84b3452 100644 (file)
@@ -5,11 +5,10 @@ import pathlib
 import torch
 
 DEFAULT_MINIDUMP_DIR = "/tmp/pytorch_crashes"
+if sys.platform == "win32":
+    DEFAULT_MINIDUMP_DIR = str(pathlib.Path.home() / "AppData" / "pytorch_crashes")
 
 def enable_minidumps(directory=DEFAULT_MINIDUMP_DIR):
-    if sys.platform != "linux":
-        raise RuntimeError("Minidump collection is currently only implemented for Linux platforms")
-
     if directory == DEFAULT_MINIDUMP_DIR:
         pathlib.Path(directory).mkdir(parents=True, exist_ok=True)
     elif not os.path.exists(directory):