Add external callbacks into RecordFunction (#17844)

author Ilia Cherniavskii <iliacher@fb.com>

Fri, 29 Mar 2019 00:42:47 +0000 (17:42 -0700)

committer Facebook Github Bot <facebook-github-bot@users.noreply.github.com>

Fri, 29 Mar 2019 00:48:45 +0000 (17:48 -0700)
author Ilia Cherniavskii <iliacher@fb.com>
Fri, 29 Mar 2019 00:42:47 +0000 (17:42 -0700)
committer Facebook Github Bot <facebook-github-bot@users.noreply.github.com>
Fri, 29 Mar 2019 00:48:45 +0000 (17:48 -0700)
diff --git a/test/cpp/jit/test.cpp b/test/cpp/jit/test.cpp

index 1c4823d..20cab91 100644 (file)
--- a/test/cpp/jit/test.cpp
+++ b/test/cpp/jit/test.cpp
@@ -63,7 +63,8 @@ namespace jit {
    _(ATenNativeBatchNorm)           \
    _(NoneSchemaMatch)               \
    _(ClassParser)                   \
-  _(PeepholeOptimize)
+  _(PeepholeOptimize)              \
+  _(RecordFunction)
  
  #define TH_FORALL_TESTS_CUDA(_) \
    _(ArgumentSpec)               \
diff --git a/test/cpp/jit/test_misc.h b/test/cpp/jit/test_misc.h

index 10cd871..7a314ad 100644 (file)
--- a/test/cpp/jit/test_misc.h
+++ b/test/cpp/jit/test_misc.h
@@ -575,6 +575,72 @@ void testTopologicalIndex() {
    }
  }
  
+void invokeTestRecordFunction(at::Tensor& t) {
+  autograd::profiler::GetPackedInputsCallback inputs_cb =
+    [t]() {
+      Stack st;
+      pack(st, t);
+      return st;
+    };
+  autograd::profiler::RecordFunction guard("test", inputs_cb);
+  t.add_(torch::ones_like(t));
+}
+
+std::string getFullName(const autograd::profiler::RecordFunction* fn_ptr) {
+  std::string full_name = "";
+  while (fn_ptr != nullptr) {
+    if (!full_name.empty()) {
+      full_name = std::string(fn_ptr->name().str()) + "::" + full_name;
+    } else {
+      full_name = fn_ptr->name().str();
+    }
+    fn_ptr = fn_ptr->parent();
+  }
+  return full_name;
+}
+
+void invokeTestRecordFunctionNested() {
+  autograd::profiler::RecordFunction guard("inner");
+}
+
+void testRecordFunction() {
+  std::vector<std::vector<int64_t>> input_sizes;
+  autograd::profiler::pushCallback([&input_sizes](
+      const autograd::profiler::RecordFunction& fn) {
+    for (const auto& input : fn.inputs()) {
+      if (input.isTensor()) {
+        std::vector<int64_t> t = input.toTensor().sizes().vec();
+        input_sizes.push_back(t);
+      }
+    }
+  });
+
+  auto t = torch::randn({1, 2, 3}, at::kCPU);
+  invokeTestRecordFunction(t);
+
+  autograd::profiler::popCallback();
+
+  AT_CHECK(input_sizes.size() == 1);
+  AT_CHECK(input_sizes[0] == at::IntArrayRef({1, 2, 3}));
+
+  // test nested RecordFunctions
+  std::vector<std::string> nested_names;
+  autograd::profiler::pushCallback([&nested_names](
+      const autograd::profiler::RecordFunction& fn) {
+    nested_names.push_back(getFullName(&fn));
+  });
+
+  {
+    autograd::profiler::RecordFunction guard("outer");
+    invokeTestRecordFunctionNested();;
+  }
+
+  autograd::profiler::popCallback();
+  AT_CHECK(nested_names.size() == 2);
+  AT_CHECK(nested_names[0] == "outer");
+  AT_CHECK(nested_names[1] == "outer::inner");
+}
+
  void testAutogradProfiler() {
    constexpr int batch_size = 4;
    constexpr int input_size = 256;
diff --git a/tools/build_variables.py b/tools/build_variables.py

index 503eb6a..a293a56 100644 (file)
--- a/tools/build_variables.py
+++ b/tools/build_variables.py
@@ -45,6 +45,7 @@ libtorch_sources = [
      "torch/csrc/autograd/grad_mode.cpp",
      "torch/csrc/autograd/input_buffer.cpp",
      "torch/csrc/autograd/profiler.cpp",
+    "torch/csrc/autograd/record_function.cpp",
      "torch/csrc/autograd/saved_variable.cpp",
      "torch/csrc/autograd/variable.cpp",
      "torch/csrc/Exceptions.cpp",
diff --git a/torch/CMakeLists.txt b/torch/CMakeLists.txt

index c2d7783..5db5260 100644 (file)
--- a/torch/CMakeLists.txt
+++ b/torch/CMakeLists.txt
@@ -117,6 +117,7 @@ set(TORCH_SRCS
    ${TORCH_SRC_DIR}/csrc/autograd/grad_mode.cpp
    ${TORCH_SRC_DIR}/csrc/autograd/input_buffer.cpp
    ${TORCH_SRC_DIR}/csrc/autograd/profiler.cpp
+  ${TORCH_SRC_DIR}/csrc/autograd/record_function.cpp
    ${TORCH_SRC_DIR}/csrc/autograd/saved_variable.cpp
    ${TORCH_SRC_DIR}/csrc/autograd/variable.cpp
    ${TORCH_SRC_DIR}/csrc/autograd/VariableTypeManual.cpp
diff --git a/torch/csrc/autograd/profiler.cpp b/torch/csrc/autograd/profiler.cpp

index 555ca22..b68dd28 100644 (file)
--- a/torch/csrc/autograd/profiler.cpp
+++ b/torch/csrc/autograd/profiler.cpp
@@ -34,88 +34,51 @@ RangeEventList& getEventList() {
  }
  
  void mark(std::string name, bool include_cuda /* = true */) {
-  if (state == ProfilerState::Disabled) {
-    return;
-  }
    if (state == ProfilerState::NVTX) {
      cuda_stubs->nvtxMarkA(name.c_str());
    } else {
      getEventList().record(
          EventKind::Mark,
-        std::move(name),
+        StringView(std::move(name)),
          thread_id,
          include_cuda && state == ProfilerState::CUDA);
    }
  }
  
-const char* c_str(const char *str) { return str; }
-// NB: non-const to disallow temporaries (lifetime issues)
-const char* c_str(std::string& str) { return str.c_str(); }
-
-template<typename T>
-void pushRangeImpl(T name, const char* msg="", int64_t sequence_nr=-1) {
-  if (state == ProfilerState::Disabled) {
-    return;
-  }
+void pushRangeImpl(const StringView& name, const char* msg="", int64_t sequence_nr=-1) {
    if (state == ProfilerState::NVTX) {
      if(sequence_nr >= 0) {
        std::stringstream s;
-      s << name << msg << sequence_nr;
+      s << name.str() << msg << sequence_nr;
        cuda_stubs->nvtxRangePushA(s.str().c_str());
      } else {
-      cuda_stubs->nvtxRangePushA(c_str(name));
+      cuda_stubs->nvtxRangePushA(name.str());
      }
    } else {
      getEventList().record(
          EventKind::PushRange,
-        std::move(name),
+        name,
          thread_id,
          state == ProfilerState::CUDA);
    }
  }
  
  void pushRange(std::string name) {
-  pushRangeImpl(std::move(name));
+  pushRangeImpl(StringView(std::move(name)));
  }
  
  void popRange() {
-  if (state == ProfilerState::Disabled) {
-    return;
-  }
    if (state == ProfilerState::NVTX) {
      cuda_stubs->nvtxRangePop();
    } else {
      getEventList().record(
          EventKind::PopRange,
-        "",
+        StringView(""),
          thread_id,
          state == ProfilerState::CUDA);
    }
  }
  
-RecordFunction::RecordFunction(Function* fn) {
-  // typeid(*fn).name() would avoid an additional string allocation.
-  // However, typeid(*fn).name() would cause nvtx annotations for all user-defined
-  // (Python-side) custom autograd function backward() methods to have the same name,
-  // because they route through the same C++ side class.
-  // fn->name() ensures that nvtx annotations for custom function backward() methods
-  // receive a relevant, demangled name.
-  pushRangeImpl(fn->name(), ", stashed seq=", fn->sequence_nr());
-}
-
-RecordFunction::RecordFunction(std::string name) {
-  pushRangeImpl(std::move(name));
-}
-
-RecordFunction::RecordFunction(const char* name) {
-  pushRangeImpl<const char*>(name);
-}
-
-RecordFunction::RecordFunction(const char* name, int64_t current_sequence_nr)
-{
-  pushRangeImpl<const char*>(name, ", seq=", current_sequence_nr);
-}
-
  void enableProfiler(ProfilerState new_state) {
    AT_ASSERT(new_state != ProfilerState::Disabled);
    if (new_state == ProfilerState::NVTX && !cuda_stubs->enabled())
@@ -123,6 +86,14 @@ void enableProfiler(ProfilerState new_state) {
    if (state != ProfilerState::Disabled && new_state != state) {
        throw std::runtime_error("can't change kind of profiling (e.g. NVTX to CPU) while profiler is running");
    }
+
+  pushCallback([](const RecordFunction& fn) {
+    auto* msg = (fn.seqNr() >= 0) ? ", seq = " : "";
+    pushRangeImpl(fn.name(), msg, fn.seqNr());
+  },
+  [](const RecordFunction& /* unused */) {
+    popRange();
+  });
    state = new_state;
  
    if(state == ProfilerState::CUDA) {
@@ -151,7 +122,10 @@ thread_event_lists disableProfiler() {
    }
    ProfilerState old_state = state;
    mark("__stop_profile");
+
+  popCallback();
    state = ProfilerState::Disabled;
+
    if (old_state == ProfilerState::NVTX) {
      return thread_event_lists();
    } else {
diff --git a/torch/csrc/autograd/profiler.h b/torch/csrc/autograd/profiler.h

index 8b9e841..8d06c09 100644 (file)
--- a/torch/csrc/autograd/profiler.h
+++ b/torch/csrc/autograd/profiler.h
@@ -17,6 +17,7 @@
  #include <ctime>
  #endif
  
+#include <torch/csrc/autograd/record_function.h>
  #include <torch/csrc/jit/code_template.h>
  
  typedef struct CUevent_st* CUDAEventStub;
@@ -97,13 +98,8 @@ enum class EventKind : uint16_t {
  };
  
  struct TORCH_API Event final {
-  Event(EventKind kind, std::string name, uint16_t thread_id, bool record_cuda)
-  : owned_name_(new std::string(std::move(name)))
-  , name_ptr_(owned_name_->c_str())
-  , kind_(kind)
-  , thread_id_(thread_id) { record(record_cuda); }
-  Event(EventKind kind, const char* name, uint16_t thread_id, bool record_cuda)
-  : name_ptr_(name)
+  Event(EventKind kind, StringView name, uint16_t thread_id, bool record_cuda)
+  : name_(std::move(name))
    , kind_(kind)
    , thread_id_(thread_id) { record(record_cuda); }
  
@@ -117,7 +113,7 @@ struct TORCH_API Event final {
      throw std::runtime_error("unknown EventKind");
    }
    const char* name() const {
-    return name_ptr_;
+    return name_.str();
    }
    uint16_t thread_id() const {
      return thread_id_;
@@ -134,11 +130,7 @@ struct TORCH_API Event final {
    }
  private:
    int64_t cpu_ns_ = 0; // signed to allow for negative intervals, initialized for safety.
-  // std::string is a very large object (usually around 32B),
-  // and this field is used only for user-created ranges, so
-  // it's better to save on size of Events.
-  std::unique_ptr<std::string> owned_name_;
-  const char * name_ptr_;
+  StringView name_;
    EventKind kind_;
    uint16_t thread_id_;
    int device_ = -1;
@@ -203,20 +195,6 @@ TORCH_API void mark(std::string name, bool include_cuda = true);
  TORCH_API void pushRange(std::string name);
  TORCH_API void popRange();
  
-struct TORCH_API RecordFunction {
-  explicit RecordFunction(Function* fn);
-
-  explicit RecordFunction(std::string name);
-
-  explicit RecordFunction(const char* name);
-
-  explicit RecordFunction(const char* name, int64_t current_sequence_nr);
-
-  ~RecordFunction() {
-    popRange();
-  }
-};
-
  using thread_event_lists = std::vector<std::vector<Event>>;
  // NOTE: changing profiler modes is **NOT THREAD SAFE**. You should ensure that
  // there no autograd functions are being executed when these function are used.
diff --git a/torch/csrc/autograd/record_function.cpp b/torch/csrc/autograd/record_function.cpp

new file mode 100644 (file)

index 0000000..57f83e9
--- /dev/null
+++ b/torch/csrc/autograd/record_function.cpp
@@ -0,0 +1,83 @@
+#include <torch/csrc/autograd/record_function.h>
+#include <torch/csrc/autograd/function.h>
+
+namespace torch { namespace autograd { namespace profiler {
+
+namespace {
+bool has_callbacks = false;
+std::vector<RecordFunctionCallback> start_callbacks;
+std::vector<RecordFunctionCallback> end_callbacks;
+thread_local RecordFunction* thread_local_func_ = nullptr;
+}
+
+void pushCallback(RecordFunctionCallback start, RecordFunctionCallback end) {
+  start_callbacks.push_back(start);
+  end_callbacks.push_back(end);
+  has_callbacks = true;
+}
+
+void pushCallback(RecordFunctionCallback start) {
+  pushCallback(start, [](const RecordFunction&){});
+}
+
+void popCallback() {
+  if (start_callbacks.empty()) {
+    throw std::runtime_error("Empty callbacks stack");
+  }
+  start_callbacks.pop_back();
+  end_callbacks.pop_back();
+  has_callbacks = !start_callbacks.empty();
+}
+
+RecordFunction::RecordFunction(Function* fn, GetPackedInputsCallback cb) {
+  if (!has_callbacks) {
+    return;
+  }
+  fn_ = fn;
+  name_ = StringView(fn->name());
+  sequence_nr_ = fn->sequence_nr();
+  inputs_cb_ = cb;
+  processCallbacks();
+}
+
+RecordFunction::RecordFunction(
+    std::string name, int64_t sequence_nr, GetPackedInputsCallback cb) {
+  if (!has_callbacks) {
+    return;
+  }
+  name_ = StringView(std::move(name));
+  sequence_nr_ = sequence_nr;
+  inputs_cb_ = cb;
+  processCallbacks();
+}
+
+RecordFunction::RecordFunction(
+    const char* name, int64_t sequence_nr, GetPackedInputsCallback cb) {
+  if (!has_callbacks) {
+    return;
+  }
+  name_ = StringView(name);
+  sequence_nr_ = sequence_nr;
+  inputs_cb_ = cb;
+  processCallbacks();
+}
+
+void RecordFunction::processCallbacks() {
+  parent_ = thread_local_func_;
+  thread_local_func_ = this;
+
+  for (const auto& cb : start_callbacks) {
+    cb(*this);
+  }
+}
+
+RecordFunction::~RecordFunction() {
+  if (has_callbacks) {
+    for (const auto& cb : end_callbacks) {
+      cb(*this);
+    }
+    thread_local_func_ = parent_;
+  }
+}
+
+}}}
diff --git a/torch/csrc/autograd/record_function.h b/torch/csrc/autograd/record_function.h

new file mode 100644 (file)

index 0000000..eef1a67
--- /dev/null
+++ b/torch/csrc/autograd/record_function.h
@@ -0,0 +1,102 @@
+#pragma once
+
+#include <ATen/core/ivalue.h>
+#include <c10/util/SmallVector.h>
+#include <torch/csrc/WindowsTorchApiMacro.h>
+
+namespace torch { namespace autograd {
+
+struct Function;
+
+namespace profiler {
+
+struct TORCH_API StringView {
+  StringView() : StringView(nullptr) {}
+  explicit StringView(const char* str_ptr)
+    : owned_str_ptr_(nullptr), str_ptr_(str_ptr) {}
+  explicit StringView(std::string str)
+    : owned_str_ptr_(std::make_shared<std::string>(std::move(str))),
+      str_ptr_(owned_str_ptr_->c_str()) {}
+
+  inline const char* str() const {
+    return str_ptr_;
+  }
+ private:
+  std::shared_ptr<std::string> owned_str_ptr_;
+  const char* str_ptr_;
+};
+
+using GetPackedInputsCallback = std::function<std::vector<c10::IValue>()>;
+
+struct TORCH_API RecordFunction {
+  explicit RecordFunction(Function* fn, GetPackedInputsCallback cb = nullptr);
+
+  explicit RecordFunction(
+      std::string name,
+      int64_t current_sequence_nr = -1,
+      GetPackedInputsCallback cb = nullptr);
+
+  explicit RecordFunction(
+      const char* name,
+      int64_t current_sequence_nr = -1,
+      GetPackedInputsCallback cb = nullptr);
+
+  explicit RecordFunction(
+      std::string name,
+      GetPackedInputsCallback cb) : RecordFunction(name, -1, cb) {}
+
+  explicit RecordFunction(
+      const char* name,
+      GetPackedInputsCallback cb) : RecordFunction(name, -1, cb) {}
+
+  virtual ~RecordFunction();
+
+
+  inline Function* func() const {
+    return fn_;
+  }
+
+  inline const StringView& name() const {
+    return name_;
+  }
+
+  inline int64_t seqNr() const {
+    return sequence_nr_;
+  }
+
+  const std::vector<c10::IValue>& inputs() const {
+    if (inputs_cb_ && !inputs_initialized_) {
+      inputs_ = inputs_cb_();
+      inputs_initialized_ = true;
+    }
+    return inputs_;
+  }
+
+  inline const RecordFunction* parent() const {
+    return parent_;
+  }
+
+ private:
+  void processCallbacks();
+
+  Function* fn_ = nullptr;
+  StringView name_;
+  int64_t sequence_nr_ = -1;
+
+  RecordFunction* parent_ = nullptr;
+
+  GetPackedInputsCallback inputs_cb_ = nullptr;
+  mutable bool inputs_initialized_ = false;
+  // initialized lazily by inputs_cb_
+  mutable std::vector<c10::IValue> inputs_;
+};
+
+// WARNING: all calls to pushCallback/popCallback are not thread safe and
+// must not overlap with other code execution
+using RecordFunctionCallback = std::function<void(const RecordFunction&)>;
+TORCH_API void pushCallback(RecordFunctionCallback, RecordFunctionCallback);
+TORCH_API void pushCallback(RecordFunctionCallback);
+TORCH_API void popCallback();
+
+} // namespace profiler
+}} // namespace torch::autograd
author	Ilia Cherniavskii <iliacher@fb.com>
	Fri, 29 Mar 2019 00:42:47 +0000 (17:42 -0700)
committer	Facebook Github Bot <facebook-github-bot@users.noreply.github.com>
	Fri, 29 Mar 2019 00:48:45 +0000 (17:48 -0700)
test/cpp/jit/test.cpp		patch \| blob \| history
test/cpp/jit/test_misc.h		patch \| blob \| history
tools/build_variables.py		patch \| blob \| history
torch/CMakeLists.txt		patch \| blob \| history
torch/csrc/autograd/profiler.cpp		patch \| blob \| history
torch/csrc/autograd/profiler.h		patch \| blob \| history
torch/csrc/autograd/record_function.cpp	[new file with mode: 0644]	patch \| blob
torch/csrc/autograd/record_function.h	[new file with mode: 0644]	patch \| blob