Move weight offload inside backend construction functor (#18385)
authorYinghai Lu <yinghai@fb.com>
Wed, 27 Mar 2019 03:57:18 +0000 (20:57 -0700)
committerFacebook Github Bot <facebook-github-bot@users.noreply.github.com>
Wed, 27 Mar 2019 04:03:17 +0000 (21:03 -0700)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/18385

By moving the weight offload into the backend initialization function, we can instantiate the backend once by creating the OnnxifiOp once and then clean up the parameter workspace. And we need to keep hold of that instantiated net (OnnxifiOp) without cleaning it. Subsequent ctor of OnnxifiOp of the same model will hit the cached backend and they will not look into weight offloading, which is safe as the weight is already gone.

Reviewed By: ipiszy

Differential Revision: D14590379

fbshipit-source-id: f7f34016e09777ad3df0af487885cd14658e1044

caffe2/operators/onnxifi_op.cc
caffe2/operators/onnxifi_op.h

index 88e3662..e8b582f 100644 (file)
@@ -91,9 +91,11 @@ template <>
 std::vector<onnxTensorDescriptorV1>
 OnnxifiOp<float, CPUContext>::buildInitializationList(
     Workspace* ws,
-    std::unordered_set<std::string>* initialization_list,
+    const std::vector<std::string>& initializers,
     std::vector<std::string>* weight_names,
     std::vector<std::vector<uint64_t>>* weight_shapes) {
+  std::unordered_set<std::string> initialization_list(
+      initializers.begin(), initializers.end());
   const std::vector<string>& ws_blobs = ws->Blobs();
   // Since onnxTensorDescriptorV1.name will point into the memory in
   // weight_names, we need to prevent weight_names from reallocating by
@@ -101,18 +103,17 @@ OnnxifiOp<float, CPUContext>::buildInitializationList(
   weight_names->reserve(ws_blobs.size());
   std::vector<onnxTensorDescriptorV1> descs;
   for (const auto& s : ws_blobs) {
-    auto it = initialization_list->find(s);
-    if (it != initialization_list->end()) {
+    auto it = initialization_list.find(s);
+    if (it != initialization_list.end()) {
       weight_names->emplace_back(s);
       onnxTensorDescriptorV1 tensor_desc;
       tensor_desc.name = weight_names->back().c_str();
       BlobToTensorDescriptor(s, ws, &tensor_desc, weight_shapes);
       descs.push_back(tensor_desc);
-      initialization_list->erase(it);
+      initialization_list.erase(it);
     }
   }
-  CAFFE_ENFORCE(
-      initialization_list->empty(), "Unfulfilled initialization list");
+  CAFFE_ENFORCE(initialization_list.empty(), "Unfulfilled initialization list");
   return descs;
 }
 
index eb9c44a..3965aaf 100644 (file)
@@ -68,24 +68,15 @@ class OnnxifiOp final : public Operator<Context> {
     std::vector<uint64_t> property_pointers;
     std::vector<int64_t> int_args;
     std::vector<float> float_args;
-    BuildPropertyList(operator_def, &property_pointers, &int_args, &float_args);
-
-    // Pull the weights from workspace and feed it to the backend through
-    // setGraphIO. Notice that since we may have rewritten the net, we need to
-    // map the weight names
-    auto initializers =
-        this->template GetRepeatedArgument<std::string>("initializers");
-    std::unordered_set<std::string> initializer_set;
-    for (auto it = initializers.begin(); it != initializers.end(); ++it) {
-      auto key = *it;
-      initializer_set.emplace(key);
-    }
-    std::vector<std::string> weight_names;
-    std::vector<std::vector<uint64_t>> weight_shapes;
-    auto weight_descs = buildInitializationList(
-        ws, &initializer_set, &weight_names, &weight_shapes);
-
-    BuildBackendAndGraph(property_pointers, onnx_model_str, weight_descs);
+    buildPropertyList(operator_def, &property_pointers, &int_args, &float_args);
+
+    // Initialize the backend if it has not been already created. When we
+    // initialized the backend, we will get the weights (initializers) from the
+    // workspace and offload onto the backend. This should be done only once.
+    // Subsequent call of this function with the same model id should find a
+    // cached backend and therefore there is no need to repeat the above
+    // process.
+    buildBackendAndGraph(ws, property_pointers, onnx_model_str);
   }
 
   ~OnnxifiOp() {
@@ -109,7 +100,7 @@ class OnnxifiOp final : public Operator<Context> {
     return type;
   }
 
-  void BuildPropertyList(
+  void buildPropertyList(
       const OperatorDef& /* unused */,
       std::vector<uint64_t>* property_list,
       std::vector<int64_t>* /* unused */,
@@ -117,26 +108,28 @@ class OnnxifiOp final : public Operator<Context> {
     property_list->push_back(ONNXIFI_BACKEND_PROPERTY_NONE);
   }
 
-  void BuildBackendAndGraph(
+  void buildBackendAndGraph(
+      Workspace* ws,
       const std::vector<uint64_t>& property_pointers,
-      const std::string& onnx_model_str,
-      const std::vector<onnxTensorDescriptorV1>& weight_descs) {
+      const std::string& onnx_model_str) {
     op_id_string_ =
         this->template GetSingleArgument<std::string>("model_id", "") + ":" +
         this->template GetSingleArgument<std::string>("net_pos", "");
 
+    auto initializers =
+        this->template GetRepeatedArgument<std::string>("initializers");
     // Build the Onnxifi engine
     auto backend_index = this->template GetSingleArgument<int>("backend_id", 0);
-    onnxifi_library* lib = lib_;
-    auto creator = [lib,
+    auto creator = [this,
+                    ws,
                     property_pointers,
                     backend_index,
                     &onnx_model_str,
-                    &weight_descs]() {
+                    &initializers]() {
       std::vector<onnxBackendID> backend_ids;
       size_t num_backends{0};
       CAFFE_ENFORCE_EQ(
-          lib->onnxGetBackendIDs(nullptr, &num_backends),
+          lib_->onnxGetBackendIDs(nullptr, &num_backends),
           ONNXIFI_STATUS_FALLBACK);
       CAFFE_ENFORCE_GT(
           num_backends, 0, "At least 1 onnxifi backend should be available");
@@ -149,14 +142,14 @@ class OnnxifiOp final : public Operator<Context> {
           num_backends);
       backend_ids.resize(num_backends);
       CAFFE_ENFORCE_EQ(
-          lib->onnxGetBackendIDs(backend_ids.data(), &num_backends),
+          lib_->onnxGetBackendIDs(backend_ids.data(), &num_backends),
           ONNXIFI_STATUS_SUCCESS);
 
       onnxBackendID backend_id = backend_ids[backend_index];
       onnxBackend backend{nullptr};
 
       CAFFE_ENFORCE_EQ(
-          lib->onnxInitBackend(backend_id, property_pointers.data(), &backend),
+          lib_->onnxInitBackend(backend_id, property_pointers.data(), &backend),
           ONNXIFI_STATUS_SUCCESS);
 
       // Release unused backend ids.
@@ -164,11 +157,18 @@ class OnnxifiOp final : public Operator<Context> {
         if (i == backend_index) {
           continue;
         }
-        lib->onnxReleaseBackendID(backend_ids[i]);
+        lib_->onnxReleaseBackendID(backend_ids[i]);
       }
+
+      // Get weights
+      std::vector<std::string> weight_names;
+      std::vector<std::vector<uint64_t>> weight_shapes;
+      auto weight_descs = buildInitializationList(
+          ws, initializers, &weight_names, &weight_shapes);
+
       onnxGraph graph{nullptr};
       CAFFE_ENFORCE_EQ(
-          lib->onnxInitGraph(
+          lib_->onnxInitGraph(
               backend,
               nullptr,
               onnx_model_str.size(),
@@ -179,7 +179,7 @@ class OnnxifiOp final : public Operator<Context> {
           ONNXIFI_STATUS_SUCCESS);
 
       return std::make_shared<onnx::BackendGraphInfo>(
-          backend_id, backend, graph, lib);
+          backend_id, backend, graph, lib_);
     };
     backend_graph_shared_ptr_ =
         backend_graph_map_ptr_->insert(op_id_string_, creator);
@@ -204,7 +204,7 @@ class OnnxifiOp final : public Operator<Context> {
 
   std::vector<onnxTensorDescriptorV1> buildInitializationList(
       Workspace* ws,
-      std::unordered_set<std::string>* initialization_list,
+      const std::vector<std::string>& initializers,
       std::vector<std::string>* weight_names,
       std::vector<std::vector<uint64_t>>* weight_shapes);