Surface the Glow traces to C2 (#19087)
authorYinghai Lu <yinghai@fb.com>
Tue, 23 Apr 2019 19:17:59 +0000 (12:17 -0700)
committerFacebook Github Bot <facebook-github-bot@users.noreply.github.com>
Tue, 23 Apr 2019 19:27:49 +0000 (12:27 -0700)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/19087

att

Reviewed By: jackm321

Differential Revision: D14863112

fbshipit-source-id: 2680161b9f05391e73bb8dac4fbbeabb87a82c05

caffe2/operators/onnxifi_op.cc
caffe2/operators/onnxifi_op.h

index 9c9097d..6887281 100644 (file)
@@ -289,6 +289,18 @@ bool OnnxifiOp<CPUContext>::RunOnDevice() {
     ext_supported = true;
     output_fence.tag = ONNXIFI_TAG_MEMORY_FENCE_V1;
     output_fence.type = ONNXIFI_SYNCHRONIZATION_EVENT;
+    if (enable_tracing_) {
+      traces_.reset();
+      traces_ = std::shared_ptr<onnxTraceEventList>(
+          new onnxTraceEventList(), [this](onnxTraceEventList* p) {
+            if (p && onnxReleaseTraceEventsPointer_) {
+              CAFFE_ENFORCE_EQ(
+                  (*onnxReleaseTraceEventsPointer_)(p), ONNXIFI_STATUS_SUCCESS);
+            }
+            delete p;
+          });
+      traces_->numEvents = 0;
+    }
     CAFFE_ENFORCE_EQ(
         (*onnxSetIOAndRunGraphPointer_)(
             graph_,
@@ -297,7 +309,7 @@ bool OnnxifiOp<CPUContext>::RunOnDevice() {
             output_desc_.size(),
             output_desc_.data(),
             &output_fence,
-            /* traceEvents */ nullptr),
+            traces_.get()),
         ONNXIFI_STATUS_SUCCESS);
     output_batch_sizes = extractOutputBatchSizes();
     CAFFE_ENFORCE_EQ(
@@ -345,6 +357,7 @@ bool OnnxifiOp<CPUContext>::RunOnDevice() {
   if (adjust_output_batch_) {
     maybeAdjustOutputBatchSizes(output_batch_sizes);
   }
+  enable_tracing_ = false;
   return true;
 }
 
index e899444..3497610 100644 (file)
@@ -98,10 +98,22 @@ class OnnxifiOp final : public Operator<Context> {
   ~OnnxifiOp() {
     backend_graph_shared_ptr_.reset();
     backend_graph_map_ptr_->remove(op_id_string_);
+#ifdef ONNXIFI_ENABLE_EXT
+    traces_.reset();
+#endif
   }
 
   bool RunOnDevice() override;
 
+  void setEnableTracing(bool b) {
+    enable_tracing_ = b;
+  }
+
+#ifdef ONNXIFI_ENABLE_EXT
+  std::shared_ptr<onnxTraceEventList> traces() const {
+    return traces_;
+  }
+#endif
  private:
   uint64_t SetOutputShapeAndType(int output_idx, std::vector<size_t>* dims) {
     uint64_t type = ONNXIFI_DATATYPE_FLOAT32;
@@ -204,17 +216,29 @@ class OnnxifiOp final : public Operator<Context> {
     backend_ = backend_graph_shared_ptr_->backend;
     graph_ = backend_graph_shared_ptr_->graph;
 
-// Set up function pointer if onnxifi_ext is enabled
+    getExtFunctionPointers();
+  }
+
+  /// Set up function pointer if onnxifi_ext is enabled
+  void getExtFunctionPointers() {
 #ifdef ONNXIFI_ENABLE_EXT
     onnxExtensionFunctionPointer p;
     if (lib_->onnxGetExtensionFunctionAddress(
             backend_id_, "onnxSetIOAndRunGraphFunction", &p) !=
         ONNXIFI_STATUS_SUCCESS) {
       onnxSetIOAndRunGraphPointer_ = nullptr;
-      return;
+    } else {
+      onnxSetIOAndRunGraphPointer_ =
+          reinterpret_cast<decltype(onnxSetIOAndRunGraphPointer_)>(p);
+    }
+    if (lib_->onnxGetExtensionFunctionAddress(
+            backend_id_, "onnxReleaseTraceEventsFunction", &p) !=
+        ONNXIFI_STATUS_SUCCESS) {
+      onnxReleaseTraceEventsPointer_ = nullptr;
+    } else {
+      onnxReleaseTraceEventsPointer_ =
+          reinterpret_cast<decltype(onnxReleaseTraceEventsPointer_)>(p);
     }
-    onnxSetIOAndRunGraphPointer_ =
-        reinterpret_cast<decltype(onnxSetIOAndRunGraphPointer_)>(p);
 #endif
   }
 
@@ -253,6 +277,10 @@ class OnnxifiOp final : public Operator<Context> {
       const onnxTensorDescriptorV1*,
       onnxMemoryFenceV1*,
       onnxTraceEventList*);
+
+  onnxStatus (*onnxReleaseTraceEventsPointer_)(onnxTraceEventList*);
+
+  std::shared_ptr<onnxTraceEventList> traces_{nullptr};
 #endif
   bool use_onnx_{false};
 
@@ -277,6 +305,8 @@ class OnnxifiOp final : public Operator<Context> {
   // value: position of the input where the real batch size can be extracted
   // from its first dimension
   std::unordered_map<int, int> batch_pos_map_;
+  // Whether we enable tracing in one run of inference
+  bool enable_tracing_{false};
 };
 
 } // namespace caffe2