[manager] Donot allocate adam for inference

author Parichay Kapoor <pk.kapoor@samsung.com>

Mon, 4 Jan 2021 10:12:42 +0000 (19:12 +0900)

committer Jijoong Moon <jijoong.moon@samsung.com>

Mon, 25 Jan 2021 02:59:21 +0000 (11:59 +0900)
author Parichay Kapoor <pk.kapoor@samsung.com>
Mon, 4 Jan 2021 10:12:42 +0000 (19:12 +0900)
committer Jijoong Moon <jijoong.moon@samsung.com>
Mon, 25 Jan 2021 02:59:21 +0000 (11:59 +0900)
diff --git a/nntrainer/models/neuralnet.cpp b/nntrainer/models/neuralnet.cpp

index 16e8228..fd7d8d8 100644 (file)
--- a/nntrainer/models/neuralnet.cpp
+++ b/nntrainer/models/neuralnet.cpp
@@ -258,13 +258,13 @@ int NeuralNetwork::initialize() {
      }
    }
    setBatchSize(batch_size);
+  // Allocate and initialize weights
+  manager->initializeWeights();
  
    if (in_place_optimization) {
      model_graph.inPlaceOptimize(*manager);
    }
  
-  manager->initialize();
-
    initialized = true;
    return status;
  }
@@ -505,7 +505,7 @@ sharedConstTensors NeuralNetwork::inference(sharedConstTensors X) {
  
  int NeuralNetwork::assignMem(bool trainable) {
    // TODO: directly replace this
-  manager->initializeInOuts(trainable);
+  manager->initializeTensors(trainable);
    return ML_ERROR_NONE;
  }
  
diff --git a/nntrainer/tensor/manager.cpp b/nntrainer/tensor/manager.cpp

index e0c4875..710ccf3 100644 (file)
--- a/nntrainer/tensor/manager.cpp
+++ b/nntrainer/tensor/manager.cpp
@@ -174,22 +174,12 @@ void Manager::trackWeights(std::vector<Weight> &ws) {
    max_grad_size = std::max(max_grad_size, grad_size);
  }
  
-/**
- * @brief Allocate and initialize the weight variable
- */
-void Manager::initialize() {
-  if (total_weight_size == 0) {
-    ml_logw("Nothing done on initialize because there is no weight registered");
-    return;
-  }
-  using AllocFunc = std::function<Tensor(const TensorDim &, size_t)>;
-
+Manager::AllocFunc Manager::getAllocFunc(bool is_weight) {
    AllocFunc allocate_none = [](const TensorDim &dim, size_t) {
      return Tensor();
    };
  
-  AllocFunc allocate_weight = allocate_none;
-  AllocFunc allocate_grad = allocate_none;
+  AllocFunc allocate_func = allocate_none;
  
    if (use_shared_memory) {
  
@@ -207,24 +197,67 @@ void Manager::initialize() {
        };
      };
  
-    allocate_weight = get_allocfunc(total_weight_size, weight_mmaped_memory);
-
-    size_t grad_size =
-      enable_gradient_memory_opt ? max_grad_size : total_grad_size;
-    allocate_grad = get_allocfunc(grad_size, grad_mmaped_memory);
-
-  } else {
+    if (is_weight) {
+      /** For weights */
+      allocate_func = get_allocfunc(total_weight_size, weight_mmaped_memory);
+    } else {
+      /** for gradients */
+      size_t grad_size =
+        enable_gradient_memory_opt ? max_grad_size : total_grad_size;
+      allocate_func = get_allocfunc(grad_size, grad_mmaped_memory);
+    }
+  } else if (!is_weight) {
+    /** only for gradients */
      if (max_grad_size > 0 && enable_gradient_memory_opt) {
        std::shared_ptr<float> window(new float[max_grad_size],
                                      std::default_delete<float[]>());
  
-      allocate_grad = [window](const TensorDim &dim, size_t offset) {
+      allocate_func = [window](const TensorDim &dim, size_t offset) {
          return Tensor::Map(window, dim, offset);
        };
      }
    }
  
+  return allocate_func;
+}
+
+/**
+ * @brief Allocate and initialize the weight variable
+ */
+void Manager::initializeWeights() {
+  if (total_weight_size == 0) {
+    ml_logw("Nothing done on initialize because there is no weight registered");
+    return;
+  }
+
+  AllocFunc allocate_weight = getAllocFunc(true);
+
    size_t weight_offset = 0;
+
+  for (auto &l_w : weights) {
+    for (auto &w : l_w) {
+      Weight &weight = w.get();
+      auto dim = weight.getDim();
+      Tensor weight_prealloc = allocate_weight(dim, weight_offset);
+      Tensor grad_prealloc = Tensor();
+
+      weight_offset += dim.getDataLen();
+      weight.initialize(weight_prealloc, Tensor(), false);
+    }
+  }
+}
+
+/**
+ * @brief Allocate and initialize the weight variable
+ */
+void Manager::initializeGradients() {
+  if (total_weight_size == 0) {
+    ml_logw("Nothing done on initialize because there is no weight registered");
+    return;
+  }
+
+  AllocFunc allocate_grad = getAllocFunc(false);
+
    size_t grad_offset = 0;
  
    for (auto &l_w : weights) {
@@ -234,13 +267,12 @@ void Manager::initialize() {
      for (auto &w : l_w) {
        Weight &weight = w.get();
        auto dim = weight.getDim();
-      Tensor weight_prealloc = allocate_weight(dim, weight_offset);
-      Tensor grad_prealloc =
-        weight.getTrainable() ? allocate_grad(dim, grad_offset) : Tensor();
+      Tensor grad_prealloc = Tensor();
+      if (weight.getTrainable())
+        grad_prealloc = allocate_grad(dim, grad_offset);
  
-      weight_offset += dim.getDataLen();
        grad_offset += dim.getDataLen();
-      weight.initialize(weight_prealloc, grad_prealloc);
+      weight.initializeGrad(grad_prealloc, true);
      }
    }
  }
@@ -340,9 +372,13 @@ void Manager::untrackLayerInOuts(const std::string &layer_name) {
  }
  
  /**
- * @brief Initialize the inputs/outputs for the layer
+ * @brief Initialize the inputs/outputs/gradients/derivatives for the layer
   */
-void Manager::initializeInOuts(bool trainable) {
+void Manager::initializeTensors(bool trainable) {
+  // Allocate gradients
+  if (trainable)
+    initializeGradients();
+
    // Allocate shared derivative memory
    Tensor shared_deriv;
    if (max_derivative_size > 0 && enable_activation_memory_opt && trainable)
diff --git a/nntrainer/tensor/manager.h b/nntrainer/tensor/manager.h

index 79196d2..05393ac 100644 (file)
--- a/nntrainer/tensor/manager.h
+++ b/nntrainer/tensor/manager.h
@@ -162,8 +162,10 @@ public:
  
    /**
     * @brief Allocate and initialize the weight variable
+   * @note This only allocates weights and does not handle training related
+   * memory for weights
     */
-  void initialize();
+  void initializeWeights();
  
    /**
     * @brief Reset the manager state
@@ -219,11 +221,14 @@ public:
    void untrackLayerInOuts(const std::string &layer_name);
  
    /**
-   * @brief Initialize the inputs/outputs for the layers
-   * @todo Make initialize() and initializeInOuts() coherent but still separated
-   * @param[in] trainable If true, initialize derivates, else, do not.
+   * @brief Initialize the inputs/outputs/derivatives/gradients for the layers
+   * @param[in] trainable If true, initialize derivates/gradients, else, do not.
+   * @note The memory allocation strategy varies based on the trainable. The
+   * memory allocated for inference mode is not compatible with training, and
+   * will require full allocation than reusing memory allocated with inference
+   * mode.
     */
-  void initializeInOuts(bool trainable);
+  void initializeTensors(bool trainable);
  
    /**
     * @brief Set the batch size for the inputs/outputs of the layers
@@ -273,6 +278,8 @@ private:
    std::unique_ptr<MMapedMemory> weight_mmaped_memory;
    std::unique_ptr<MMapedMemory> grad_mmaped_memory;
  
+  using AllocFunc = std::function<Tensor(const TensorDim &, size_t)>;
+
    /**
     * @brief Track the inputs/ouputs of the layer
     * @param[in] layer_type Type of the layer
@@ -289,6 +296,19 @@ private:
     * @param[in] var_name Name of the variable
     */
    void untrackVariable(const std::string &var_name);
+
+  /**
+   * @brief Allocate and initialize the weight gradients
+   * @note This only allocates weight's gradients and assumes that weights are
+   * pre-allocated.
+   */
+  void initializeGradients();
+
+  /**
+   * @brief Get helper allocator function to use for weight or gradient
+   * @param[in] is_weight true if weight, else false meaning its gradient
+   */
+  AllocFunc getAllocFunc(bool is_weight);
  };
  
  } // namespace nntrainer
diff --git a/nntrainer/tensor/weight.cpp b/nntrainer/tensor/weight.cpp

index 562c42f..4edd65a 100644 (file)
--- a/nntrainer/tensor/weight.cpp
+++ b/nntrainer/tensor/weight.cpp
@@ -25,8 +25,16 @@ Weight::Weight(const TensorDim &dim, const WeightInitializer init, bool train,
  }
  
  void Weight::initialize(const Tensor &weights_preallocated,
-                        const Tensor &grad_preallocated) {
-  Var_Grad::initialize(weights_preallocated, grad_preallocated);
+                        const Tensor &grad_preallocated, bool gtrain) {
+  Var_Grad::initialize(weights_preallocated, grad_preallocated, gtrain);
+
+  if (gtrain) {
+    // If trainable, allocate optimizer parameters
+    for (auto const &dim : opt_vars_dim) {
+      opt_vars.emplace_back(dim);
+      opt_vars.back().setZero();
+    }
+  }
  
    Tensor &var_ref = getVariableRef();
    const TensorDim dim = var_ref.getDim();
@@ -65,4 +73,17 @@ void Weight::initialize(const Tensor &weights_preallocated,
    }
  }
  
+void Weight::initializeGrad(const Tensor &grad_preallocated, bool gtrain) {
+  // Use self variable to initialize itself
+  Var_Grad::initialize(this->getVariableRef(), grad_preallocated, gtrain);
+
+  if (gtrain) {
+    // If trainable, allocate optimizer parameters
+    for (auto const &dim : opt_vars_dim) {
+      opt_vars.emplace_back(dim);
+      opt_vars.back().setZero();
+    }
+  }
+}
+
  } // namespace nntrainer
diff --git a/nntrainer/tensor/weight.h b/nntrainer/tensor/weight.h

index d0d0943..737e74f 100644 (file)
--- a/nntrainer/tensor/weight.h
+++ b/nntrainer/tensor/weight.h
@@ -82,7 +82,16 @@ public:
     * @copydoc var_grad::initialize(const Tensor &, const Tensor &)
     */
    void initialize(const Tensor &weight_preallocated = Tensor(),
-                  const Tensor &grad_preallocated = Tensor());
+                  const Tensor &grad_preallocated = Tensor(),
+                  bool gtrain = true);
+
+  /**
+   * @brief Initialize the gradient for the weight
+   * @param grad_preallocated if initialized, use this tensor for grad
+   * @param gtrain If all the variables should be trainable
+   */
+  void initializeGrad(const Tensor &grad_preallocated = Tensor(),
+                      bool gtrain = true);
  
    /**
     * @brief Swap for weight
@@ -159,16 +168,18 @@ public:
    /**
     * @brief Clear optimizer variables
     */
-  void clearOptimizerVariables() { opt_vars.clear(); }
+  void clearOptimizerVariables() {
+    opt_vars.clear();
+    opt_vars_dim.clear();
+  }
  
    /**
     * @brief Add optimizer variables
     * @param dim Optimizer variable dimension
     */
    void addOptimizerVariable(const TensorDim &dim) {
-    opt_vars.emplace_back(dim);
+    opt_vars_dim.emplace_back(dim);
      // TODO: Move this out when an optimizer does not initialize with 0.
-    opt_vars.back().setZero();
    }
  
    /**
@@ -181,7 +192,8 @@ public:
  private:
    WeightInitializer initializer; /**< initializer for this variable */
  
-  std::vector<Tensor> opt_vars; /**< optimizer variables */
+  std::vector<Tensor> opt_vars;        /**< optimizer variables */
+  std::vector<TensorDim> opt_vars_dim; /**< optimizer variables dimensions */
  };
  
  } // namespace nntrainer
diff --git a/test/unittest/unittest_nntrainer_layers.cpp b/test/unittest/unittest_nntrainer_layers.cpp

index b80ecb3..420fc2b 100644 (file)
--- a/test/unittest/unittest_nntrainer_layers.cpp
+++ b/test/unittest/unittest_nntrainer_layers.cpp
@@ -68,8 +68,7 @@ protected:
      layer.setOutputBuffers(manager.trackLayerOutputs(
        layer.getType(), layer.getName(), layer.getOutputDimension()));
  
-    manager.initializeInOuts(true);
-    manager.initialize();
+    manager.initializeTensors(true);
  
      return status;
    }
@@ -160,6 +159,7 @@ protected:
      EXPECT_EQ(status, ML_ERROR_NONE);
  
      EXPECT_NO_THROW(opt->addOptimizerVariable(layer.getWeightsRef()));
+    manager.initializeTensors(true);
  
      return status;
    }
@@ -472,15 +472,6 @@ protected:
      label =
        MAKE_SHARED_TENSOR(nntrainer::Tensor(layer.getOutputDimension()[0]));
  
-    std::vector<nntrainer::Tensor> v;
-
-    for (unsigned int i = 0; i < layer.getNumWeights(); ++i) {
-      v.push_back(layer.weightAt(i).getVariable());
-    }
-
-    loadFile("tc_fc_1_FCLayer.in", in);
-    loadFile("tc_fc_1_FCKernel.in", v);
-    loadFile("tc_fc_1_FCLabel.in", *label);
      layers.clear();
  
      return status;
@@ -506,7 +497,7 @@ protected:
        manager.trackLayerOutputs(act_layer->getType(), act_layer->getName(),
                                  act_layer->getOutputDimension()));
  
-    manager.initializeInOuts(true);
+    manager.initializeTensors(true);
      layers.push_back(act_layer);
    }
  
@@ -534,7 +525,7 @@ protected:
        manager.trackLayerOutputs(loss_layer->getType(), loss_layer->getName(),
                                  loss_layer->getOutputDimension()));
  
-    manager.initializeInOuts(true);
+    manager.initializeTensors(true);
      layers.push_back(loss_layer);
  
      if (type == nntrainer::LossType::LOSS_ENTROPY_SOFTMAX) {
@@ -546,6 +537,15 @@ protected:
    }
  
    void matchForwarding(const char *file) {
+    std::vector<nntrainer::Tensor> v;
+    for (unsigned int i = 0; i < layer.getNumWeights(); ++i) {
+      v.push_back(layer.weightAt(i).getVariable());
+    }
+
+    loadFile("tc_fc_1_FCLayer.in", in);
+    loadFile("tc_fc_1_FCKernel.in", v);
+    loadFile("tc_fc_1_FCLabel.in", *label);
+
      sharedConstTensor out;
      EXPECT_NO_THROW(out =
                        layer.forwarding_with_val({MAKE_SHARED_TENSOR(in)})[0]);
@@ -1692,7 +1692,7 @@ TEST(nntrainer_LossLayer, forward_loss_unknown_n) {
    layer.setOutputBuffers(manager.trackLayerOutputs(
      layer.getType(), layer.getName(), layer.getOutputDimension()));
  
-  manager.initializeInOuts(true);
+  manager.initializeTensors(true);
    EXPECT_THROW(
      layer.forwarding_with_val({MAKE_SHARED_TENSOR(a)}, {MAKE_SHARED_TENSOR(b)}),
      std::runtime_error);
@@ -1709,7 +1709,7 @@ TEST(nntrainer_LossLayer, backward_loss_unknown_n) {
    layer.setOutputBuffers(manager.trackLayerOutputs(
      layer.getType(), layer.getName(), layer.getOutputDimension()));
  
-  manager.initializeInOuts(true);
+  manager.initializeTensors(true);
    EXPECT_THROW(layer.backwarding_with_val({MAKE_SHARED_TENSOR(a)}),
                 std::runtime_error);
  }
@@ -1727,7 +1727,7 @@ TEST(nntrainer_LossLayer, forward_loss_forward_entropy_n) {
    layer.setOutputBuffers(manager.trackLayerOutputs(
      layer.getType(), layer.getName(), layer.getOutputDimension()));
  
-  manager.initializeInOuts(true);
+  manager.initializeTensors(true);
    EXPECT_THROW(
      layer.forwarding_with_val({MAKE_SHARED_TENSOR(a)}, {MAKE_SHARED_TENSOR(b)}),
      std::runtime_error);
@@ -1745,7 +1745,7 @@ TEST(nntrainer_LossLayer, backward_loss_backward_entropy_n) {
    layer.setOutputBuffers(manager.trackLayerOutputs(
      layer.getType(), layer.getName(), layer.getOutputDimension()));
  
-  manager.initializeInOuts(true);
+  manager.initializeTensors(true);
    EXPECT_THROW(layer.backwarding_with_val({MAKE_SHARED_TENSOR(a)}),
                 std::runtime_error);
  }
@@ -1843,7 +1843,7 @@ TEST(nntrainer_ActivationLayer, forward_backward_01_p) {
      layer.getType(), layer.getName(), layer.getInputDimension()));
    layer.setOutputBuffers(manager.trackLayerOutputs(
      layer.getType(), layer.getName(), layer.getOutputDimension()));
-  manager.initializeInOuts(true);
+  manager.initializeTensors(true);
  
    nntrainer::Tensor result;
    EXPECT_NO_THROW(result =
@@ -1916,7 +1916,7 @@ TEST_F(nntrainer_AdditionLayer, forwarding_01_n) {
    layer.setOutputBuffers(manager.trackLayerOutputs(
      layer.getType(), layer.getName(), layer.getOutputDimension()));
  
-  manager.initializeInOuts(true);
+  manager.initializeTensors(true);
  
    EXPECT_THROW(layer.forwarding_with_val({input}), std::invalid_argument);
  }
@@ -1941,7 +1941,7 @@ TEST_F(nntrainer_AdditionLayer, DISABLED_forwarding_02_n) {
    layer.setOutputBuffers(manager.trackLayerOutputs(
      layer.getType(), layer.getName(), layer.getOutputDimension()));
  
-  manager.initializeInOuts(true);
+  manager.initializeTensors(true);
  
    EXPECT_THROW(layer.forwarding_with_val({input}), std::runtime_error);
  }
author	Parichay Kapoor <pk.kapoor@samsung.com>
	Mon, 4 Jan 2021 10:12:42 +0000 (19:12 +0900)
committer	Jijoong Moon <jijoong.moon@samsung.com>
	Mon, 25 Jan 2021 02:59:21 +0000 (11:59 +0900)
nntrainer/models/neuralnet.cpp		patch \| blob \| history
nntrainer/tensor/manager.cpp		patch \| blob \| history
nntrainer/tensor/manager.h		patch \| blob \| history
nntrainer/tensor/weight.cpp		patch \| blob \| history
nntrainer/tensor/weight.h		patch \| blob \| history
test/unittest/unittest_nntrainer_layers.cpp		patch \| blob \| history