ext_supported = true;
output_fence.tag = ONNXIFI_TAG_MEMORY_FENCE_V1;
output_fence.type = ONNXIFI_SYNCHRONIZATION_EVENT;
+ if (enable_tracing_) {
+ traces_.reset();
+ traces_ = std::shared_ptr<onnxTraceEventList>(
+ new onnxTraceEventList(), [this](onnxTraceEventList* p) {
+ if (p && onnxReleaseTraceEventsPointer_) {
+ CAFFE_ENFORCE_EQ(
+ (*onnxReleaseTraceEventsPointer_)(p), ONNXIFI_STATUS_SUCCESS);
+ }
+ delete p;
+ });
+ traces_->numEvents = 0;
+ }
CAFFE_ENFORCE_EQ(
(*onnxSetIOAndRunGraphPointer_)(
graph_,
output_desc_.size(),
output_desc_.data(),
&output_fence,
- /* traceEvents */ nullptr),
+ traces_.get()),
ONNXIFI_STATUS_SUCCESS);
output_batch_sizes = extractOutputBatchSizes();
CAFFE_ENFORCE_EQ(
if (adjust_output_batch_) {
maybeAdjustOutputBatchSizes(output_batch_sizes);
}
+ enable_tracing_ = false;
return true;
}
~OnnxifiOp() {
backend_graph_shared_ptr_.reset();
backend_graph_map_ptr_->remove(op_id_string_);
+#ifdef ONNXIFI_ENABLE_EXT
+ traces_.reset();
+#endif
}
bool RunOnDevice() override;
+ void setEnableTracing(bool b) {
+ enable_tracing_ = b;
+ }
+
+#ifdef ONNXIFI_ENABLE_EXT
+ std::shared_ptr<onnxTraceEventList> traces() const {
+ return traces_;
+ }
+#endif
private:
uint64_t SetOutputShapeAndType(int output_idx, std::vector<size_t>* dims) {
uint64_t type = ONNXIFI_DATATYPE_FLOAT32;
backend_ = backend_graph_shared_ptr_->backend;
graph_ = backend_graph_shared_ptr_->graph;
-// Set up function pointer if onnxifi_ext is enabled
+ getExtFunctionPointers();
+ }
+
+ /// Set up function pointer if onnxifi_ext is enabled
+ void getExtFunctionPointers() {
#ifdef ONNXIFI_ENABLE_EXT
onnxExtensionFunctionPointer p;
if (lib_->onnxGetExtensionFunctionAddress(
backend_id_, "onnxSetIOAndRunGraphFunction", &p) !=
ONNXIFI_STATUS_SUCCESS) {
onnxSetIOAndRunGraphPointer_ = nullptr;
- return;
+ } else {
+ onnxSetIOAndRunGraphPointer_ =
+ reinterpret_cast<decltype(onnxSetIOAndRunGraphPointer_)>(p);
+ }
+ if (lib_->onnxGetExtensionFunctionAddress(
+ backend_id_, "onnxReleaseTraceEventsFunction", &p) !=
+ ONNXIFI_STATUS_SUCCESS) {
+ onnxReleaseTraceEventsPointer_ = nullptr;
+ } else {
+ onnxReleaseTraceEventsPointer_ =
+ reinterpret_cast<decltype(onnxReleaseTraceEventsPointer_)>(p);
}
- onnxSetIOAndRunGraphPointer_ =
- reinterpret_cast<decltype(onnxSetIOAndRunGraphPointer_)>(p);
#endif
}
const onnxTensorDescriptorV1*,
onnxMemoryFenceV1*,
onnxTraceEventList*);
+
+ onnxStatus (*onnxReleaseTraceEventsPointer_)(onnxTraceEventList*);
+
+ std::shared_ptr<onnxTraceEventList> traces_{nullptr};
#endif
bool use_onnx_{false};
// value: position of the input where the real batch size can be extracted
// from its first dimension
std::unordered_map<int, int> batch_pos_map_;
+ // Whether we enable tracing in one run of inference
+ bool enable_tracing_{false};
};
} // namespace caffe2