[weight] Updated weights to be vector
authorParichay Kapoor <pk.kapoor@samsung.com>
Tue, 1 Dec 2020 12:07:49 +0000 (21:07 +0900)
committerJijoong Moon <jijoong.moon@samsung.com>
Mon, 7 Dec 2020 00:32:10 +0000 (09:32 +0900)
Updated weights of layer to be vector than a shared_ptr array
This is for easier management and updating weight internally when
gradient will share the memory

See also #774 #766

**Self evaluation:**
1. Build test: [x]Passed [ ]Failed [ ]Skipped
2. Run test: [x]Passed [ ]Failed [ ]Skipped

Signed-off-by: Parichay Kapoor <pk.kapoor@samsung.com>
13 files changed:
nntrainer/layers/bn_layer.cpp
nntrainer/layers/conv2d_layer.cpp
nntrainer/layers/fc_layer.cpp
nntrainer/layers/layer.cpp
nntrainer/layers/layer_internal.h
nntrainer/manager.h
nntrainer/optimizers/adam.cpp
nntrainer/optimizers/adam.h
nntrainer/optimizers/optimizer.cpp
nntrainer/optimizers/optimizer_internal.h
nntrainer/tensor/var_grad.h
nntrainer/tensor/weight.h
test/unittest/unittest_nntrainer_layers.cpp

index 579876e39c1a541caa6121221b89e1ddca6d248d..18263dcc2ebd84e9ba894e3e48d4f726095b6680 100644 (file)
@@ -59,19 +59,17 @@ int BatchNormalizationLayer::initialize(Manager &manager) {
       axes_to_reduce.push_back(i);
   }
 
-  setNumWeights(4);
-  weightAt(BNParams::mu) =
-    std::move(Weight(dim, initializers[BNParams::mu], false, "BN:moving_mean"));
-  ///@todo shift var to std to save computation
-  weightAt(BNParams::var) = std::move(
-    Weight(dim, initializers[BNParams::var], false, "BN:moving_variance"));
-  weightAt(BNParams::gamma) =
-    std::move(Weight(dim, initializers[BNParams::gamma], true, "BN:gamma"));
-  weightAt(BNParams::beta) =
-    std::move(Weight(dim, initializers[BNParams::beta], true, "BN:beta"));
-
-  manager.trackWeights({weightAt(BNParams::mu), weightAt(BNParams::var),
-                        weightAt(BNParams::gamma), weightAt(BNParams::beta)});
+  weights.clear();
+  if (weights.empty()) {
+    weights.reserve(4);
+    weights.push_back(createWeight(manager, dim, initializers[BNParams::mu], false, "BN::moving_mean"));
+    weights.push_back(createWeight(manager, dim, initializers[BNParams::var], false, "BN::moving_variance"));
+    weights.push_back(createWeight(manager, dim, initializers[BNParams::gamma], true, "BN::gamma"));
+    weights.push_back(createWeight(manager, dim, initializers[BNParams::beta], true, "BN::beta"));
+  } else {
+    for (size_t idx = 0; idx < weights.size(); idx ++)
+      weights[idx].reset(dim, initializers[idx], weights[idx].getTrainable());
+  }
 
   return status;
 }
index 0d2f4c7414416a61133b0ef2e24bfe257db8f1c2..4e62a4f4baf82c52d8a45d7bb7370a63c8a5da3f 100644 (file)
@@ -50,13 +50,14 @@ int Conv2DLayer::initialize(Manager &manager) {
     TensorDim(filter_size, in_dim.channel(), kernel_size[0], kernel_size[1]);
   TensorDim bias_dim = TensorDim(1, filter_size, 1, 1);
 
-  setNumWeights(2);
-  weightAt(ConvParams::weight) =
-    Weight(dim, weight_initializer, true, kernelPrefix);
-  weightAt(ConvParams::bias) =
-    Weight(bias_dim, bias_initializer, true, biasPrefix);
-  manager.trackWeights(
-    {weightAt(ConvParams::weight), weightAt(ConvParams::bias)});
+  if (weights.empty()) {
+    weights.reserve(2);
+    weights.push_back(createWeight(manager, dim, weight_initializer, true, kernelPrefix));
+    weights.push_back(createWeight(manager, bias_dim, bias_initializer, true, biasPrefix));
+  } else {
+    for (auto &weight : weights)
+      weight.reset(weight.getVariable().getDim(), weight_initializer, true);
+  }
 
   // this output_dim should be the same with dimension of hidden
   out_dim.batch(in_dim.batch());
index 355fd6c94b1f08595e1a1505f07d04b9fc6c3bf1..b21647077b72005e3b3f926b45ef04989d353a78 100644 (file)
@@ -52,12 +52,14 @@ int FullyConnectedLayer::initialize(Manager &manager) {
   dim.height(input_dim[0].width());
   dim.batch(1);
 
-  setNumWeights(2);
-  weightAt(FCParams::weight) =
-    Weight(dim, weight_initializer, true, "FC:weight");
-  weightAt(FCParams::bias) =
-    Weight(bias_dim, bias_initializer, true, "FC::bias");
-  manager.trackWeights({weightAt(FCParams::weight), weightAt(FCParams::bias)});
+  if (weights.empty()) {
+    weights.reserve(2);
+    weights.push_back(createWeight(manager, dim, weight_initializer, true, "FC:weight"));
+    weights.push_back(createWeight(manager, bias_dim, bias_initializer, true, "FC:bias"));
+  } else {
+    for (auto &weight : weights)
+      weight.reset(weight.getVariable().getDim(), weight_initializer, true);
+  }
 
   return status;
 }
index b3f9d6dd4c3c0de3f8524d757cda2210ec6d4d3d..cd0b0eb4f2258a0640ab37bcdbe91e17e18f527c 100644 (file)
@@ -39,7 +39,7 @@ void Layer::setActivation(ActivationType acti) {
 
 int Layer::setOptimizer(std::shared_ptr<Optimizer> opt) {
   this->opt = createOptimizer(opt->getType(), *opt);
-  return this->opt->initialize(weight_list, num_weights, true);
+  return this->opt->initialize(weights, true);
 }
 
 int Layer::checkValidation() {
@@ -78,10 +78,8 @@ std::vector<Tensor> Layer::getDerivatives() {
 }
 
 void Layer::copy(std::shared_ptr<Layer> l) {
-  setNumWeights(l->num_weights);
-  for (unsigned int i = 0; i < num_weights; ++i) {
-    weightAt(i) = l->weightAt(i);
-  }
+  for(auto const &w : weights)
+    weights.push_back(w.clone());
 
   // TODO: fix this #630
   this->opt = l->opt;
@@ -149,24 +147,24 @@ sharedConstTensors Layer::backwarding_with_val(int iteration,
 }
 
 void Layer::read(std::ifstream &file) {
-  for (unsigned int i = 0; i < num_weights; ++i) {
-    weightAt(i).getVariableRef().read(file);
+  for (auto &weight : weights) {
+    weight.getVariableRef().read(file);
   }
   if (opt)
     opt->read(file);
 }
 
 void Layer::save(std::ofstream &file) {
-  for (unsigned int i = 0; i < num_weights; ++i) {
-    weightAt(i).getVariableRef().save(file);
+  for (auto &weight : weights) {
+    weight.getVariableRef().save(file);
   }
   if (opt)
     opt->save(file);
 }
 
 void Layer::applyGradient(unsigned int iteration) {
-  if (trainable && num_weights > 0) {
-    opt->apply_gradients(weight_list, num_weights, iteration);
+  if (trainable && !weights.empty()) {
+    opt->apply_gradients(weights, iteration);
   }
 }
 
@@ -326,7 +324,7 @@ void Layer::printIfValid(std::ostream &out, const PropertyType type,
 void Layer::printShapeInfo(std::ostream &out) {
   for (unsigned int idx = 0; idx < num_inputs; ++idx) {
     out << "input " << input_dim[idx];
-    for (unsigned int i = 0; i < num_weights; i++)
+    for (unsigned int i = 0; i < weights.size(); i++)
       out << "inner" << i << " " << weightAt(i).getVariable().getDim();
   }
   for (unsigned int idx = 0; idx < num_outputs; ++idx) {
@@ -403,9 +401,9 @@ void Layer::print(std::ostream &out, unsigned int flags) {
 
   if (flags & PRINT_WEIGHTS) {
     out << "======weights: " << std::endl;
-    for (unsigned int i = 0; i < num_weights; ++i) {
-      out << '[' << weightAt(i).getName() << ']' << std::endl;
-      out << weightAt(i).var;
+    for (auto const &weight : weights) {
+      out << '[' << weight.getName() << ']' << std::endl;
+      out << weight.getVariable();
     }
   }
 
index 64452d2663e22a10e9f44b631764664b274b28af..88eaa75fc6baaed4eb28a32b46f4343b4447d7ca 100644 (file)
@@ -90,7 +90,6 @@ public:
     bias_initializer(bias_initializer_),
     flatten(flatten_),
     trainable(trainable_),
-    num_weights(0),
     num_inputs(1),
     num_outputs(1) {
     input_dim.resize(1);
@@ -257,7 +256,7 @@ public:
    * @brief     get all weights of the layer
    * @retval    vector of all params
    */
-  std::shared_ptr<Weight> getWeights() { return weight_list; }
+  std::vector<Weight> getWeights() { return weights; }
 
   /**
    * @brief     get if the output of this layer must be flatten
@@ -289,11 +288,7 @@ public:
    * @exception std::out_of_range for index out of range
    */
   Weight &weightAt(const unsigned int position) {
-    if (position >= num_weights) {
-      throw std::out_of_range("index out of range");
-    }
-
-    return weight_list.get()[position];
+    return weights[position];
   }
 
   /**
@@ -301,7 +296,7 @@ public:
    *
    * @return unsigned int number of weights
    */
-  unsigned int getNumWeights() { return num_weights; }
+  unsigned int getNumWeights() { return weights.size(); }
 
   /**
    * @brief Set the batch for the layer
@@ -448,36 +443,10 @@ protected:
   bool trainable;
 
   /**
-   * @brief     reserve memory for @a weight_list and set @a num_weights
-   * @exception std::invalid_argument when num_weights is already set and
-   * shouldn't be changed again.
+   * @brief     weight_list in this layer. This contains all weights of the
+   * layer.
    */
-  void setNumWeights(unsigned int psize) {
-    if (psize == num_weights)
-      return;
-
-    if (num_weights > 0) {
-      throw std::invalid_argument("param size can't be set once it is set");
-    }
-
-    num_weights = psize;
-    weight_list = std::shared_ptr<Weight>(new Weight[num_weights],
-                                          std::default_delete<Weight[]>());
-  }
-
-  /**
-   * @brief     weight_list in this layer. This contains trainable weights of
-   * layers.
-   */
-  std::shared_ptr<Weight> weight_list;
-
-  unsigned int num_weights; /**< length of weights.
-                                This shouldn't be changed
-                                after initiation
-                                use setNumWeights() to avoid
-                                setting parameters twice */
-
-  std::vector<std::shared_ptr<Weight>> weights;
+  std::vector<Weight> weights;
 
   /**
    * @brief   Number of inputs this layer will requries/will operate on
index b1a7e42c9e6d1d25da3fd86dd2e91ff7f02602a4..b6c51d47837db691c4d32a6ca806a334e899b34b 100644 (file)
@@ -50,7 +50,7 @@ public:
    *
    * @param ws  Weights to be tracked
    */
-  void trackWeights(std::vector<Weight> ws) {
+  void trackWeights(std::vector<Weight> &ws) {
     weights.reserve(weights.size() + ws.size());
     weights.insert(weights.end(), ws.begin(), ws.end());
   }
index 728c6f6567fad71fa2afe9950e9c910158a86320..4a0e80815a4d8f0f3a45f1fb069a6f4a828ee70c 100644 (file)
@@ -24,15 +24,12 @@ namespace nntrainer {
 
 const std::string Adam::type = "adam";
 
-int Adam::initialize(std::shared_ptr<Weight> weight_list,
-                     unsigned int num_weights, bool set_tensor) {
+int Adam::initialize(std::vector<Weight> &weight_list, bool set_tensor) {
   int status = ML_ERROR_NONE;
   weight_mv.clear();
 
   if (set_tensor) {
-    for (unsigned int i = 0; i < num_weights; ++i) {
-      Weight &w = weight_list.get()[i];
-
+    for (auto const &w : weight_list) {
       // TODO: only trainable weights must be sent to optimizer
       if (!w.getTrainable())
         continue;
index a9837bace2e1a29b83de7944cbe6b14ac7a87f97..a43c57cd1ef77669780b57580b24e9d6712184dc 100644 (file)
@@ -59,11 +59,9 @@ public:
   void setProperty(const PropertyType type, const std::string &value = "");
 
   /**
-   * @copydoc Optimizer::initialize(std::shared_ptr<Weight> params, unsigned int
-   num_weights, bool setTensor)
+   * @copydoc Optimizer::initialize(std::vector<Weight> params, bool setTensor)
    */
-  int initialize(std::shared_ptr<Weight> params, unsigned int num_weights,
-                 bool setTensor);
+  int initialize(std::vector<Weight> &params, bool setTensor);
 
   /**
    * @copydoc read(std::ifstream &file)
index e16b428712071f193ae2ba45a661051ea03091b3..6ef1ededefab90a2ea7e0b13bf385e1d7a9eaf32 100644 (file)
@@ -34,8 +34,7 @@
 
 namespace nntrainer {
 
-int Optimizer::initialize(std::shared_ptr<Weight> weight_list,
-                          unsigned int num_weights, bool set_tensor) {
+int Optimizer::initialize(std::vector<Weight> &weight_list, bool set_tensor) {
   return ML_ERROR_NONE;
 }
 
@@ -49,15 +48,12 @@ double Optimizer::getLearningRate(int iteration) {
   return ll;
 }
 
-void Optimizer::apply_gradients(std::shared_ptr<Weight> weight_list,
-                                unsigned int num_weights, int iteration) {
+void Optimizer::apply_gradients(std::vector<Weight> &weight_list, int iteration) {
 
   double ll = getLearningRate(iteration);
 
   int idx = 0;
-  for (unsigned int i = 0; i < num_weights; ++i) {
-    Weight &weight = weight_list.get()[i];
-
+  for (auto &weight : weight_list) {
     if (!weight.getTrainable())
       continue;
 
index 1b3f9e56e1299d9dbb07d9e8ccbb8f54b954d1ec..1e51a18c5261de343b54b26a4249663836caee80 100644 (file)
@@ -105,11 +105,9 @@ public:
   /**
    * @brief     apply gradient to weight_list
    * @param[in] params Weight list
-   * @param[in] num_weights size of the array
    * @param[in] iteration nth epoch number
    */
-  void apply_gradients(std::shared_ptr<Weight> params, unsigned int num_weights,
-                       int iteration);
+  void apply_gradients(std::vector<Weight> &params, int iteration);
 
   /**
    * @brief     Read Training optimizer paramters from file
@@ -165,8 +163,7 @@ private:
    * @retval #ML_ERROR_NONE Successful.
    * @retval #ML_ERROR_INVALID_PARAMETER invalid parameter.
    */
-  virtual int initialize(std::shared_ptr<Weight> params,
-                         unsigned int num_weights, bool setTensor);
+  virtual int initialize(std::vector<Weight> &params, bool setTensor);
 
   /**
    * @brief     apply gradient to the given weight
index d139bec42aed7015d48d5fd0b6f637fa37d3dcba..8d25722b1acb83bb0ca3acf9abc04f4cf4b244f1 100644 (file)
@@ -144,7 +144,7 @@ public:
    *
    * @return Cloned copy
    */
-  virtual Var_Grad clone() const {
+  Var_Grad clone() const {
     Var_Grad vg(*this);
     vg.var = std::make_shared<Tensor>(this->var->clone());
     vg.grad = std::make_shared<Tensor>(this->grad->clone());
@@ -152,6 +152,22 @@ public:
     return vg;
   };
 
+  /**
+   * @brief Reset the weight
+   *
+   * @param dim Variable and gradient tensor dimension
+   * @param train If the variable is trainable
+   *
+   * @note New dimension must maintain the shape of the variable
+   */
+
+  void reset (const TensorDim &dim, bool train) {
+    var->reshape(dim);
+    grad->reshape(dim);
+    trainable = train;
+    resetGradient();
+  }
+
 protected:
   /**
    * @brief Get the variable tensor (by reference)
index 9ae5e309418b4a5e6a3dde1eff20c7d435bf1f9b..4f11629d5991f933fd5b251c1caa13bbcf2fefd9 100644 (file)
@@ -127,11 +127,11 @@ public:
   Weight &operator=(Weight &&rhs) = default;
 
   /**
-   * @bried Clone the currnet object
+   * @brief Clone the currnet object
    *
    * @return Cloned copy
    */
-  Weight clone() {
+  Weight clone() const {
     Weight w(*this);
     if (!var->uninitialized())
       w.var = std::make_shared<Tensor>(this->var->clone());
@@ -141,6 +141,21 @@ public:
     return w;
   }
 
+  /**
+   * @brief Reset the weight
+   *
+   * @param dim Variable and gradient tensor dimension
+   * @param init Initializer for the tensor
+   * @param train If the variable is trainable
+   *
+   * @note New dimension must maintain the shape of the variable
+   */
+
+  void reset (const TensorDim &dim, const WeightInitializer init, bool train) {
+    Var_Grad::reset(dim, train);
+    initializeWeight();
+  }
+
 private:
   WeightInitializer initializer; /**< initializer for this variable */
 };
index af11820d0789996430e2eb1d21bcf1b479cfe948..e0ac52eb248c2936a293db08fabca823f8992d34 100644 (file)
@@ -640,12 +640,12 @@ protected:
   }
 
   void matchUpdatedWeightsGradients() {
-    std::shared_ptr<nntrainer::Weight> params = layer.getWeights();
+    std::vector<nntrainer::Weight> params = layer.getWeights();
 
     /** Match gradients and updated weights */
     for (int idx = 0; idx < 2; ++idx) {
-      matchOutput(params.get()[idx].getGradient(), grad[idx]);
-      matchOutput(params.get()[idx].getVariable(), new_w[idx]);
+      matchOutput(params[idx].getGradient(), grad[idx]);
+      matchOutput(params[idx].getVariable(), new_w[idx]);
     }
   }
 
@@ -680,7 +680,7 @@ TEST_F(nntrainer_FullyConnectedLayer_TFmatch, forwarding_backwarding_00_p) {
 
   matchOutput(result, "tc_fc_1_goldenFCGradientAdam.out");
 
-  nntrainer::Weight *param_data = layer.getWeights().get();
+  auto param_data = layer.getWeights();
 
   nntrainer::Weight &param = param_data[0];
   nntrainer::Tensor weight = param.getVariable();
@@ -1181,7 +1181,7 @@ TEST_F(nntrainer_Conv2DLayer, backwarding_01_p) {
   EXPECT_NO_THROW(result = *layer.backwarding_with_val(
                     1, {MAKE_SHARED_TENSOR(derivatives)})[0]);
 
-  nntrainer::Weight *param_data = layer.getWeights().get();
+  auto param_data = layer.getWeights();
   const float *weight_grad = param_data[0].getGradient().getData();
   const float *bias_grad = param_data[1].getGradient().getData();
 
@@ -1218,7 +1218,7 @@ TEST_F(nntrainer_Conv2DLayer, backwarding_02_p) {
   EXPECT_NO_THROW(result = *layer.backwarding_with_val(
                     1, {MAKE_SHARED_TENSOR(derivatives)})[0]);
 
-  nntrainer::Weight *param_data = layer.getWeights().get();
+  auto param_data = layer.getWeights();
   const float *weight_grad = param_data[0].getGradient().getData();
   const float *bias_grad = param_data[1].getGradient().getData();
 
@@ -1328,7 +1328,7 @@ TEST_F(nntrainer_Conv2DLayer, DISABLED_backwarding_03_p) {
     result = *layer1.backwarding_with_val(1, {MAKE_SHARED_TENSOR(result2)})[0]);
 
   /** Compare second conv */
-  nntrainer::Weight *param_data = layer2.getWeights().get();
+  auto param_data = layer2.getWeights();
   const float *weight_grad = param_data[0].getGradient().getData();
   const float *bias_grad = param_data[1].getGradient().getData();
 
@@ -1336,7 +1336,7 @@ TEST_F(nntrainer_Conv2DLayer, DISABLED_backwarding_03_p) {
   matchOutput(bias_grad, "tc_conv2d_int_goldenBias2Grad.out");
 
   /** Compare first conv */
-  param_data = layer1.getWeights().get();
+  param_data = layer1.getWeights();
   weight_grad = param_data[0].getGradient().getData();
   bias_grad = param_data[1].getGradient().getData();
 
@@ -1371,7 +1371,7 @@ TEST_F(nntrainer_Conv2DLayer, backwarding_04_p) {
   EXPECT_NO_THROW(result = *layer.backwarding_with_val(
                     1, {MAKE_SHARED_TENSOR(derivatives)})[0]);
 
-  nntrainer::Weight *param_data = layer.getWeights().get();
+  auto param_data = layer.getWeights();
   const float *weight_grad = param_data[0].getGradient().getData();
   const float *bias_grad = param_data[1].getGradient().getData();