[manager] Donot allocate adam for inference
authorParichay Kapoor <pk.kapoor@samsung.com>
Mon, 4 Jan 2021 10:12:42 +0000 (19:12 +0900)
committerJijoong Moon <jijoong.moon@samsung.com>
Mon, 25 Jan 2021 02:59:21 +0000 (11:59 +0900)
Donot allocate adam and gradient memory for weights
when the model is being executed for inference

V2:
Separate memory allocation for weights and gradients
Gradient memory allocation is decided based on training/inference
However weight memory is always to be allocated and must be loaded
before readModel(), so need to be separated

**Self evaluation:**
1. Build test: [x]Passed [ ]Failed [ ]Skipped
2. Run test: [x]Passed [ ]Failed [ ]Skipped

Signed-off-by: Parichay Kapoor <pk.kapoor@samsung.com>
nntrainer/models/neuralnet.cpp
nntrainer/tensor/manager.cpp
nntrainer/tensor/manager.h
nntrainer/tensor/weight.cpp
nntrainer/tensor/weight.h
test/unittest/unittest_nntrainer_layers.cpp

index 16e8228..fd7d8d8 100644 (file)
@@ -258,13 +258,13 @@ int NeuralNetwork::initialize() {
     }
   }
   setBatchSize(batch_size);
+  // Allocate and initialize weights
+  manager->initializeWeights();
 
   if (in_place_optimization) {
     model_graph.inPlaceOptimize(*manager);
   }
 
-  manager->initialize();
-
   initialized = true;
   return status;
 }
@@ -505,7 +505,7 @@ sharedConstTensors NeuralNetwork::inference(sharedConstTensors X) {
 
 int NeuralNetwork::assignMem(bool trainable) {
   // TODO: directly replace this
-  manager->initializeInOuts(trainable);
+  manager->initializeTensors(trainable);
   return ML_ERROR_NONE;
 }
 
index e0c4875..710ccf3 100644 (file)
@@ -174,22 +174,12 @@ void Manager::trackWeights(std::vector<Weight> &ws) {
   max_grad_size = std::max(max_grad_size, grad_size);
 }
 
-/**
- * @brief Allocate and initialize the weight variable
- */
-void Manager::initialize() {
-  if (total_weight_size == 0) {
-    ml_logw("Nothing done on initialize because there is no weight registered");
-    return;
-  }
-  using AllocFunc = std::function<Tensor(const TensorDim &, size_t)>;
-
+Manager::AllocFunc Manager::getAllocFunc(bool is_weight) {
   AllocFunc allocate_none = [](const TensorDim &dim, size_t) {
     return Tensor();
   };
 
-  AllocFunc allocate_weight = allocate_none;
-  AllocFunc allocate_grad = allocate_none;
+  AllocFunc allocate_func = allocate_none;
 
   if (use_shared_memory) {
 
@@ -207,24 +197,67 @@ void Manager::initialize() {
       };
     };
 
-    allocate_weight = get_allocfunc(total_weight_size, weight_mmaped_memory);
-
-    size_t grad_size =
-      enable_gradient_memory_opt ? max_grad_size : total_grad_size;
-    allocate_grad = get_allocfunc(grad_size, grad_mmaped_memory);
-
-  } else {
+    if (is_weight) {
+      /** For weights */
+      allocate_func = get_allocfunc(total_weight_size, weight_mmaped_memory);
+    } else {
+      /** for gradients */
+      size_t grad_size =
+        enable_gradient_memory_opt ? max_grad_size : total_grad_size;
+      allocate_func = get_allocfunc(grad_size, grad_mmaped_memory);
+    }
+  } else if (!is_weight) {
+    /** only for gradients */
     if (max_grad_size > 0 && enable_gradient_memory_opt) {
       std::shared_ptr<float> window(new float[max_grad_size],
                                     std::default_delete<float[]>());
 
-      allocate_grad = [window](const TensorDim &dim, size_t offset) {
+      allocate_func = [window](const TensorDim &dim, size_t offset) {
         return Tensor::Map(window, dim, offset);
       };
     }
   }
 
+  return allocate_func;
+}
+
+/**
+ * @brief Allocate and initialize the weight variable
+ */
+void Manager::initializeWeights() {
+  if (total_weight_size == 0) {
+    ml_logw("Nothing done on initialize because there is no weight registered");
+    return;
+  }
+
+  AllocFunc allocate_weight = getAllocFunc(true);
+
   size_t weight_offset = 0;
+
+  for (auto &l_w : weights) {
+    for (auto &w : l_w) {
+      Weight &weight = w.get();
+      auto dim = weight.getDim();
+      Tensor weight_prealloc = allocate_weight(dim, weight_offset);
+      Tensor grad_prealloc = Tensor();
+
+      weight_offset += dim.getDataLen();
+      weight.initialize(weight_prealloc, Tensor(), false);
+    }
+  }
+}
+
+/**
+ * @brief Allocate and initialize the weight variable
+ */
+void Manager::initializeGradients() {
+  if (total_weight_size == 0) {
+    ml_logw("Nothing done on initialize because there is no weight registered");
+    return;
+  }
+
+  AllocFunc allocate_grad = getAllocFunc(false);
+
   size_t grad_offset = 0;
 
   for (auto &l_w : weights) {
@@ -234,13 +267,12 @@ void Manager::initialize() {
     for (auto &w : l_w) {
       Weight &weight = w.get();
       auto dim = weight.getDim();
-      Tensor weight_prealloc = allocate_weight(dim, weight_offset);
-      Tensor grad_prealloc =
-        weight.getTrainable() ? allocate_grad(dim, grad_offset) : Tensor();
+      Tensor grad_prealloc = Tensor();
+      if (weight.getTrainable())
+        grad_prealloc = allocate_grad(dim, grad_offset);
 
-      weight_offset += dim.getDataLen();
       grad_offset += dim.getDataLen();
-      weight.initialize(weight_prealloc, grad_prealloc);
+      weight.initializeGrad(grad_prealloc, true);
     }
   }
 }
@@ -340,9 +372,13 @@ void Manager::untrackLayerInOuts(const std::string &layer_name) {
 }
 
 /**
- * @brief Initialize the inputs/outputs for the layer
+ * @brief Initialize the inputs/outputs/gradients/derivatives for the layer
  */
-void Manager::initializeInOuts(bool trainable) {
+void Manager::initializeTensors(bool trainable) {
+  // Allocate gradients
+  if (trainable)
+    initializeGradients();
+
   // Allocate shared derivative memory
   Tensor shared_deriv;
   if (max_derivative_size > 0 && enable_activation_memory_opt && trainable)
index 79196d2..05393ac 100644 (file)
@@ -162,8 +162,10 @@ public:
 
   /**
    * @brief Allocate and initialize the weight variable
+   * @note This only allocates weights and does not handle training related
+   * memory for weights
    */
-  void initialize();
+  void initializeWeights();
 
   /**
    * @brief Reset the manager state
@@ -219,11 +221,14 @@ public:
   void untrackLayerInOuts(const std::string &layer_name);
 
   /**
-   * @brief Initialize the inputs/outputs for the layers
-   * @todo Make initialize() and initializeInOuts() coherent but still separated
-   * @param[in] trainable If true, initialize derivates, else, do not.
+   * @brief Initialize the inputs/outputs/derivatives/gradients for the layers
+   * @param[in] trainable If true, initialize derivates/gradients, else, do not.
+   * @note The memory allocation strategy varies based on the trainable. The
+   * memory allocated for inference mode is not compatible with training, and
+   * will require full allocation than reusing memory allocated with inference
+   * mode.
    */
-  void initializeInOuts(bool trainable);
+  void initializeTensors(bool trainable);
 
   /**
    * @brief Set the batch size for the inputs/outputs of the layers
@@ -273,6 +278,8 @@ private:
   std::unique_ptr<MMapedMemory> weight_mmaped_memory;
   std::unique_ptr<MMapedMemory> grad_mmaped_memory;
 
+  using AllocFunc = std::function<Tensor(const TensorDim &, size_t)>;
+
   /**
    * @brief Track the inputs/ouputs of the layer
    * @param[in] layer_type Type of the layer
@@ -289,6 +296,19 @@ private:
    * @param[in] var_name Name of the variable
    */
   void untrackVariable(const std::string &var_name);
+
+  /**
+   * @brief Allocate and initialize the weight gradients
+   * @note This only allocates weight's gradients and assumes that weights are
+   * pre-allocated.
+   */
+  void initializeGradients();
+
+  /**
+   * @brief Get helper allocator function to use for weight or gradient
+   * @param[in] is_weight true if weight, else false meaning its gradient
+   */
+  AllocFunc getAllocFunc(bool is_weight);
 };
 
 } // namespace nntrainer
index 562c42f..4edd65a 100644 (file)
@@ -25,8 +25,16 @@ Weight::Weight(const TensorDim &dim, const WeightInitializer init, bool train,
 }
 
 void Weight::initialize(const Tensor &weights_preallocated,
-                        const Tensor &grad_preallocated) {
-  Var_Grad::initialize(weights_preallocated, grad_preallocated);
+                        const Tensor &grad_preallocated, bool gtrain) {
+  Var_Grad::initialize(weights_preallocated, grad_preallocated, gtrain);
+
+  if (gtrain) {
+    // If trainable, allocate optimizer parameters
+    for (auto const &dim : opt_vars_dim) {
+      opt_vars.emplace_back(dim);
+      opt_vars.back().setZero();
+    }
+  }
 
   Tensor &var_ref = getVariableRef();
   const TensorDim dim = var_ref.getDim();
@@ -65,4 +73,17 @@ void Weight::initialize(const Tensor &weights_preallocated,
   }
 }
 
+void Weight::initializeGrad(const Tensor &grad_preallocated, bool gtrain) {
+  // Use self variable to initialize itself
+  Var_Grad::initialize(this->getVariableRef(), grad_preallocated, gtrain);
+
+  if (gtrain) {
+    // If trainable, allocate optimizer parameters
+    for (auto const &dim : opt_vars_dim) {
+      opt_vars.emplace_back(dim);
+      opt_vars.back().setZero();
+    }
+  }
+}
+
 } // namespace nntrainer
index d0d0943..737e74f 100644 (file)
@@ -82,7 +82,16 @@ public:
    * @copydoc var_grad::initialize(const Tensor &, const Tensor &)
    */
   void initialize(const Tensor &weight_preallocated = Tensor(),
-                  const Tensor &grad_preallocated = Tensor());
+                  const Tensor &grad_preallocated = Tensor(),
+                  bool gtrain = true);
+
+  /**
+   * @brief Initialize the gradient for the weight
+   * @param grad_preallocated if initialized, use this tensor for grad
+   * @param gtrain If all the variables should be trainable
+   */
+  void initializeGrad(const Tensor &grad_preallocated = Tensor(),
+                      bool gtrain = true);
 
   /**
    * @brief Swap for weight
@@ -159,16 +168,18 @@ public:
   /**
    * @brief Clear optimizer variables
    */
-  void clearOptimizerVariables() { opt_vars.clear(); }
+  void clearOptimizerVariables() {
+    opt_vars.clear();
+    opt_vars_dim.clear();
+  }
 
   /**
    * @brief Add optimizer variables
    * @param dim Optimizer variable dimension
    */
   void addOptimizerVariable(const TensorDim &dim) {
-    opt_vars.emplace_back(dim);
+    opt_vars_dim.emplace_back(dim);
     // TODO: Move this out when an optimizer does not initialize with 0.
-    opt_vars.back().setZero();
   }
 
   /**
@@ -181,7 +192,8 @@ public:
 private:
   WeightInitializer initializer; /**< initializer for this variable */
 
-  std::vector<Tensor> opt_vars; /**< optimizer variables */
+  std::vector<Tensor> opt_vars;        /**< optimizer variables */
+  std::vector<TensorDim> opt_vars_dim; /**< optimizer variables dimensions */
 };
 
 } // namespace nntrainer
index b80ecb3..420fc2b 100644 (file)
@@ -68,8 +68,7 @@ protected:
     layer.setOutputBuffers(manager.trackLayerOutputs(
       layer.getType(), layer.getName(), layer.getOutputDimension()));
 
-    manager.initializeInOuts(true);
-    manager.initialize();
+    manager.initializeTensors(true);
 
     return status;
   }
@@ -160,6 +159,7 @@ protected:
     EXPECT_EQ(status, ML_ERROR_NONE);
 
     EXPECT_NO_THROW(opt->addOptimizerVariable(layer.getWeightsRef()));
+    manager.initializeTensors(true);
 
     return status;
   }
@@ -472,15 +472,6 @@ protected:
     label =
       MAKE_SHARED_TENSOR(nntrainer::Tensor(layer.getOutputDimension()[0]));
 
-    std::vector<nntrainer::Tensor> v;
-
-    for (unsigned int i = 0; i < layer.getNumWeights(); ++i) {
-      v.push_back(layer.weightAt(i).getVariable());
-    }
-
-    loadFile("tc_fc_1_FCLayer.in", in);
-    loadFile("tc_fc_1_FCKernel.in", v);
-    loadFile("tc_fc_1_FCLabel.in", *label);
     layers.clear();
 
     return status;
@@ -506,7 +497,7 @@ protected:
       manager.trackLayerOutputs(act_layer->getType(), act_layer->getName(),
                                 act_layer->getOutputDimension()));
 
-    manager.initializeInOuts(true);
+    manager.initializeTensors(true);
     layers.push_back(act_layer);
   }
 
@@ -534,7 +525,7 @@ protected:
       manager.trackLayerOutputs(loss_layer->getType(), loss_layer->getName(),
                                 loss_layer->getOutputDimension()));
 
-    manager.initializeInOuts(true);
+    manager.initializeTensors(true);
     layers.push_back(loss_layer);
 
     if (type == nntrainer::LossType::LOSS_ENTROPY_SOFTMAX) {
@@ -546,6 +537,15 @@ protected:
   }
 
   void matchForwarding(const char *file) {
+    std::vector<nntrainer::Tensor> v;
+    for (unsigned int i = 0; i < layer.getNumWeights(); ++i) {
+      v.push_back(layer.weightAt(i).getVariable());
+    }
+
+    loadFile("tc_fc_1_FCLayer.in", in);
+    loadFile("tc_fc_1_FCKernel.in", v);
+    loadFile("tc_fc_1_FCLabel.in", *label);
+
     sharedConstTensor out;
     EXPECT_NO_THROW(out =
                       layer.forwarding_with_val({MAKE_SHARED_TENSOR(in)})[0]);
@@ -1692,7 +1692,7 @@ TEST(nntrainer_LossLayer, forward_loss_unknown_n) {
   layer.setOutputBuffers(manager.trackLayerOutputs(
     layer.getType(), layer.getName(), layer.getOutputDimension()));
 
-  manager.initializeInOuts(true);
+  manager.initializeTensors(true);
   EXPECT_THROW(
     layer.forwarding_with_val({MAKE_SHARED_TENSOR(a)}, {MAKE_SHARED_TENSOR(b)}),
     std::runtime_error);
@@ -1709,7 +1709,7 @@ TEST(nntrainer_LossLayer, backward_loss_unknown_n) {
   layer.setOutputBuffers(manager.trackLayerOutputs(
     layer.getType(), layer.getName(), layer.getOutputDimension()));
 
-  manager.initializeInOuts(true);
+  manager.initializeTensors(true);
   EXPECT_THROW(layer.backwarding_with_val({MAKE_SHARED_TENSOR(a)}),
                std::runtime_error);
 }
@@ -1727,7 +1727,7 @@ TEST(nntrainer_LossLayer, forward_loss_forward_entropy_n) {
   layer.setOutputBuffers(manager.trackLayerOutputs(
     layer.getType(), layer.getName(), layer.getOutputDimension()));
 
-  manager.initializeInOuts(true);
+  manager.initializeTensors(true);
   EXPECT_THROW(
     layer.forwarding_with_val({MAKE_SHARED_TENSOR(a)}, {MAKE_SHARED_TENSOR(b)}),
     std::runtime_error);
@@ -1745,7 +1745,7 @@ TEST(nntrainer_LossLayer, backward_loss_backward_entropy_n) {
   layer.setOutputBuffers(manager.trackLayerOutputs(
     layer.getType(), layer.getName(), layer.getOutputDimension()));
 
-  manager.initializeInOuts(true);
+  manager.initializeTensors(true);
   EXPECT_THROW(layer.backwarding_with_val({MAKE_SHARED_TENSOR(a)}),
                std::runtime_error);
 }
@@ -1843,7 +1843,7 @@ TEST(nntrainer_ActivationLayer, forward_backward_01_p) {
     layer.getType(), layer.getName(), layer.getInputDimension()));
   layer.setOutputBuffers(manager.trackLayerOutputs(
     layer.getType(), layer.getName(), layer.getOutputDimension()));
-  manager.initializeInOuts(true);
+  manager.initializeTensors(true);
 
   nntrainer::Tensor result;
   EXPECT_NO_THROW(result =
@@ -1916,7 +1916,7 @@ TEST_F(nntrainer_AdditionLayer, forwarding_01_n) {
   layer.setOutputBuffers(manager.trackLayerOutputs(
     layer.getType(), layer.getName(), layer.getOutputDimension()));
 
-  manager.initializeInOuts(true);
+  manager.initializeTensors(true);
 
   EXPECT_THROW(layer.forwarding_with_val({input}), std::invalid_argument);
 }
@@ -1941,7 +1941,7 @@ TEST_F(nntrainer_AdditionLayer, DISABLED_forwarding_02_n) {
   layer.setOutputBuffers(manager.trackLayerOutputs(
     layer.getType(), layer.getName(), layer.getOutputDimension()));
 
-  manager.initializeInOuts(true);
+  manager.initializeTensors(true);
 
   EXPECT_THROW(layer.forwarding_with_val({input}), std::runtime_error);
 }