[PyTorch][Edge] Support backtrace symbolication for Android builds (#63339)
authorPavithran Ramachandran <pavithran@fb.com>
Fri, 20 Aug 2021 01:39:50 +0000 (18:39 -0700)
committerFacebook GitHub Bot <facebook-github-bot@users.noreply.github.com>
Fri, 20 Aug 2021 01:41:29 +0000 (18:41 -0700)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/63339

# Context
https://fb.workplace.com/groups/pytorch.dev/permalink/900474523864362/?comment_id=901125403799274&reply_comment_id=905023386742809

##### WHAT IS A STACK TRACE?
A stack trace (also called stack backtrace or stack traceback) is a report of the active stack frames at a certain point in time during the execution of a program.

Typically when an exception is thrown, one would expect to see the code (file:line) that threw the exception, and every intermediate frame up to and including the main function.

We are enabling android stack trace to help debugging on android devices.

Test Plan:
## Steps to test
```
buck build fbsource//xplat/caffe2/mode/aibench_pytorch_android -c pt.enable_qpl=0 -c pt.has_backtraces=1 fbsource//xplat/caffe2/fb/lite_predictor:lite_predictorAndroid#android-x86_64

one_world android emulator android-28

adb push ~/fbsource/buck-out/gen/xplat/caffe2/fb/lite_predictor/lite_predictorAndroid#android-x86_64 /data/local/tmp

cd /data/local/tmp
./lite_predictorAndroid#android-x86_64

./lite_predictorAndroid#android-x86_64 --model ./detect.bc --input_dims "1,3,192,192" --input_type float --warmup 20 --iter 5 --report_pep true
```

## See how model file is not found stack traces is:

### before
```
./lite_predictorAndroid#android-x86_64 --model ./detect.bc --input_dims "1,3,192,192" --input_type float --warmup 20 --iter 5 --report_pep true

Run with 2 threads
Run with 2 threads
Loading model...
terminating with uncaught exception of type c10::Error: open file failed, file path: ./detect.bc
Exception raised from RAIIFile at xplat/caffe2/caffe2/serialize/file_adapter.cc:13 (most recent call first):
(no backtrace available)
Aborted
```

### after
```
134|generic_x86_64:/data/local/tmp $ ./lite_predictorAndroid#android-x86_64 --model ./detect.bc --input_dims "1,3,192,192" --input_type float --warmup 20 --iter 5 --report_pep true
Run with 2 threads
Run with 2 threads
Loading model...
terminating with uncaught exception of type c10::Error: open file failed, file path: ./detect.bc
Exception raised from RAIIFile at xplat/caffe2/caffe2/serialize/file_adapter.cc:13 (most recent call first):
 frame #0       c10::get_backtrace(unsigned long, unsigned long, bool)[0x59494274f10e]
 frame #1       [0x5949427b1eee]
 frame #2       [0x5949427b1eb2]
 frame #3       [0x5949427b1cdc]
 frame #4       std::__ndk1::function<std::__ndk1::basic_string<char, std::__ndk1::char_traits<char>, std::__ndk1::allocator<char> > ()>::operator()() const[0x5949427afc34]
 frame #5       c10::Error::Error(c10::SourceLocation, std::__ndk1::basic_string<char, std::__ndk1::char_traits<char>, std::__ndk1::allocator<char> >)[0x5949427b05b1]
 frame #6       c10::detail::torchCheckFail(char const*, char const*, unsigned int, std::__ndk1::basic_string<char, std::__ndk1::char_traits<char>, std::__ndk1::allocator<char> > const&)[0x5949427aca5f]
 frame #7       caffe2::serialize::FileAdapter::RAIIFile::RAIIFile(std::__ndk1::basic_string<char, std::__ndk1::char_traits<char>, std::__ndk1::allocator<char> > const&)[0x5949426b37b2]
 frame #8       caffe2::serialize::FileAdapter::FileAdapter(std::__ndk1::basic_string<char, std::__ndk1::char_traits<char>, std::__ndk1::allocator<char> > const&)[0x5949426b3903]
 frame #9       torch::jit::_load_for_mobile(std::__ndk1::basic_string<char, std::__ndk1::char_traits<char>, std::__ndk1::allocator<char> > const&, c10::optional<c10::Device>, std::__ndk1::unordered_map<std::__ndk1::basic_string<char, std::__ndk1::char_traits<char>, std::__ndk1::allocator<char> >, std::__ndk1::basic_string<char, std::__ndk1::char_traits<char>, std::__ndk1::allocator<char> >, std::__ndk1::hash<std::__ndk1::basic_string<char, std::__ndk1::char_traits<char>, std::__ndk1::allocator<char> > >, std::__ndk1::equal_to<std::__ndk1::basic_string<char, std::__ndk1::char_traits<char>, std::__ndk1::allocator<char> > >, std::__ndk1::allocator<std::__ndk1::pair<std::__ndk1::basic_string<char, std::__ndk1::char_traits<char>, std::__ndk1::allocator<char> > const, std::__ndk1::basic_string<char, std::__ndk1::char_traits<char>, std::__ndk1::allocator<char> > > > >&)[0x5949422737bd]
 frame #10      torch::jit::_load_for_mobile(std::__ndk1::basic_string<char, std::__ndk1::char_traits<char>, std::__ndk1::allocator<char> > const&, c10::optional<c10::Device>)[0x594942273769]
 frame #11      benchmark(std::__ndk1::basic_string<char, std::__ndk1::char_traits<char>, std::__ndk1::allocator<char> > const&, int, std::__ndk1::basic_string<char, std::__ndk1::char_traits<char>, std::__ndk1::allocator<char> > const&, std::__ndk1::basic_string<char, std::__ndk1::char_traits<char>, std::__ndk1::allocator<char> > const&, std::__ndk1::basic_string<char, std::__ndk1::char_traits<char>, std::__ndk1::allocator<char> > const&, bool, int, int, int, bool, int, bool, int, double, bool, bool, bool, std::__ndk1::basic_string<char, std::__ndk1::char_traits<char>, std::__ndk1::allocator<char> > const&)[0x59494189b21d]
 frame #12      main[0x594941882aff]
 frame #13      __libc_init[0x7b699d08578d]
```

### what we get for os:linux
```
(base) [pavithran@devvm1803.vll0 /data/users/pavithran/fbsource] ./buck-out/gen/xplat/caffe2/fb/lite_predictor/lite_predictor --model ./detect.bc --input_dims "1,3,192,192" --input_type float --warmup 20 --iter 5 --report_pep true
Run with 24 threads
Run with 24 threads
Loading model...
terminate called after throwing an instance of 'c10::Error'
  what():  open file failed, file path: ./detect.bc
Exception raised from RAIIFile at xplat/caffe2/caffe2/serialize/file_adapter.cc:13 (most recent call first):
frame #0: ./buck-out/gen/xplat/caffe2/fb/lite_predictor/lite_predictor() [0x20cb7fe]
frame #1: ./buck-out/gen/xplat/caffe2/fb/lite_predictor/lite_predictor() [0x20cb6c6]
frame #2: std::function<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > ()>::operator()() const + 0x54 (0x20ca4e4 in ./buck-out/gen/xplat/caffe2/fb/lite_predictor/lite_predictor)
frame #3: c10::Error::Error(c10::SourceLocation, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >) + 0x57 (0x20ca9a7 in ./buck-out/gen/xplat/caffe2/fb/lite_predictor/lite_predictor)
frame #4: c10::detail::torchCheckFail(char const*, char const*, unsigned int, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&) + 0x7a (0x20c823a in ./buck-out/gen/xplat/caffe2/fb/lite_predictor/lite_predictor)
frame #5: caffe2::serialize::FileAdapter::RAIIFile::RAIIFile(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&) + 0x96 (0x206f3d6 in ./buck-out/gen/xplat/caffe2/fb/lite_predictor/lite_predictor)
frame #6: caffe2::serialize::FileAdapter::FileAdapter(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&) + 0x42 (0x206f502 in ./buck-out/gen/xplat/caffe2/fb/lite_predictor/lite_predictor)
frame #7: torch::jit::_load_for_mobile(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, c10::optional<c10::Device>, std::unordered_map<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::hash<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::equal_to<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, std::allocator<std::pair<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > > >&) + 0x30 (0x1be826c in ./buck-out/gen/xplat/caffe2/fb/lite_predictor/lite_predictor)
frame #8: torch::jit::_load_for_mobile(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, c10::optional<c10::Device>) + 0x35 (0x1be8214 in ./buck-out/gen/xplat/caffe2/fb/lite_predictor/lite_predictor)
frame #9: benchmark(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, int, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, bool, int, int, int, bool, int, bool, int, double, bool, bool, bool, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&) + 0x16d (0x12093ad in ./buck-out/gen/xplat/caffe2/fb/lite_predictor/lite_predictor)
frame #10: main + 0x25c (0x11f933c in ./buck-out/gen/xplat/caffe2/fb/lite_predictor/lite_predictor)
frame #11: __libc_start_main + 0x105 (0x7fc7b9f2ed95 in /usr/local/fbcode/platform009/lib/libc.so.6)
frame #12: _start + 0x2a (0x11f902a in ./buck-out/gen/xplat/caffe2/fb/lite_predictor/lite_predictor)

Aborted (core dumped)
````

Reviewed By: dhruvbird

Differential Revision: D30135947

fbshipit-source-id: f50c634ef4545843305cad4b4a14a8776b1aec76

c10/util/Backtrace.cpp

index d978f32..2c5e2e4 100644 (file)
 
 #if SUPPORTS_BACKTRACE
 #include <cxxabi.h>
+#ifdef C10_ANDROID
+#include <dlfcn.h>
+#include <unwind.h>
+#else
 #include <execinfo.h>
 #endif
+#endif
 
 #ifdef FBCODE_CAFFE2
 #include <common/process/StackTrace.h>
 
 namespace c10 {
 
+#if SUPPORTS_BACKTRACE && defined(C10_ANDROID)
+
+struct AndroidBacktraceState {
+  std::vector<void*> buffer;
+};
+
+_Unwind_Reason_Code android_unwind_callback(
+    struct _Unwind_Context* context,
+    void* arg) {
+  AndroidBacktraceState* state = (AndroidBacktraceState*)arg;
+  uintptr_t pc = _Unwind_GetIP(context);
+  if (pc) {
+    state->buffer.emplace_back(reinterpret_cast<void*>(pc));
+  }
+  return _URC_NO_REASON;
+}
+
+void dump_stack(
+    std::ostream& os,
+    size_t frames_to_skip,
+    size_t maximum_number_of_frames) {
+  AndroidBacktraceState state;
+
+  _Unwind_Backtrace(android_unwind_callback, &state);
+
+  int idx = 0;
+  char* demangled = nullptr;
+  size_t length = 0;
+
+  for (const void* addr : state.buffer) {
+    const char* symbol = "";
+
+    Dl_info info;
+    if (dladdr(addr, &info) && info.dli_sname) {
+      symbol = info.dli_sname;
+    }
+
+    int status = 0;
+    demangled = __cxxabiv1::__cxa_demangle(
+        /*mangled_name*/ symbol,
+        /*output_buffer*/ demangled,
+        /*length*/ &length,
+        /*status*/ &status);
+
+    os << " frame #" << idx++ << "\t"
+       << ((demangled != NULL && status == 0) ? demangled : symbol) << "["
+       << addr << "]\t" << std::endl;
+  }
+  free(demangled);
+}
+
+#endif /* SUPPORTS_BACKTRACE && defined(C10_ANDROID) */
+
 #if SUPPORTS_BACKTRACE
 namespace {
 
@@ -42,6 +100,7 @@ struct FrameInformation {
   std::string object_file;
 };
 
+#ifndef C10_ANDROID
 bool is_python_frame(const FrameInformation& frame) {
   return frame.object_file == "python" || frame.object_file == "python3" ||
       (frame.object_file.find("libpython") != std::string::npos);
@@ -113,6 +172,7 @@ c10::optional<FrameInformation> parse_frame_information(
   frame.function_name = demangle(mangled_function_name.c_str());
   return frame;
 }
+#endif /* !defined(C10_ANDROID) */
 } // anonymous namespace
 #elif defined(_MSC_VER)
 namespace {
@@ -178,7 +238,7 @@ std::string get_backtrace(
   facebook::process::StackTrace st;
   return st.toString();
 
-#elif SUPPORTS_BACKTRACE
+#elif SUPPORTS_BACKTRACE && !defined(C10_ANDROID)
 
   // We always skip this frame (backtrace).
   frames_to_skip += 1;
@@ -249,6 +309,13 @@ std::string get_backtrace(
   }
 
   return stream.str();
+
+#elif SUPPORTS_BACKTRACE && defined(C10_ANDROID)
+
+  std::ostringstream oss;
+  dump_stack(oss, frames_to_skip, maximum_number_of_frames);
+  return oss.str().c_str();
+
 #elif defined(_MSC_VER) // !SUPPORTS_BACKTRACE
   // This backtrace retrieval is implemented on Windows via the Windows
   // API using `CaptureStackBackTrace`, `SymFromAddr` and