[weight/gradients] Initialize Weights once only
authorParichay Kapoor <pk.kapoor@samsung.com>
Fri, 3 Jul 2020 07:32:46 +0000 (16:32 +0900)
committerJijoong Moon <jijoong.moon@samsung.com>
Mon, 6 Jul 2020 01:23:56 +0000 (10:23 +0900)
Since layers only store weights reference, store them once than doing it in every iteration

Signed-off-by: Parichay Kapoor <pk.kapoor@samsung.com>
nntrainer/include/layer.h
nntrainer/src/conv2d_layer.cpp
nntrainer/src/fc_layer.cpp

index 8988e543943047ead207e12cd3def6bd5e22a5dd..dc9d87613b9b510da6aec23a66267c714d223168 100644 (file)
@@ -436,11 +436,15 @@ protected:
 
   /**
    * @brief     Gradient for the weights in this layer
+   * @note      The order of gradients should match the order in weights
    */
   std::vector<std::reference_wrapper<Tensor>> gradients;
 
   /**
    * @brief     weights in this layer
+   * @note      The weights are combined with their corresponding bias
+   *            For example- with W0, W1, B0 and B1, weights would be of format
+   *            {W0, B0, W1, B1}.
    */
   std::vector<std::reference_wrapper<Tensor>> weights;
 
index 4150c29e502d6a85a1a2aed91aba02a56755d900..a684248d04a7c39b0a4c2846e13627768386f34f 100644 (file)
@@ -35,6 +35,7 @@ int Conv2DLayer::initialize(bool last) {
   Kdim.height(kernel_size[0]);
   Kdim.width(kernel_size[1]);
 
+  weights.clear();
   for (unsigned int i = 0; i < filter_size; ++i) {
     Tensor Knl = initializeWeight(Kdim, weight_ini_type, status);
     NN_RETURN_STATUS();
@@ -43,12 +44,14 @@ int Conv2DLayer::initialize(bool last) {
       Tensor(input_dim.batch(), Kdim.channel(), Kdim.height(), Kdim.width()));
     delBias.push_back(Tensor(input_dim.batch(), 1, 1, 1));
     filters.push_back(Knl);
+    weights.push_back(Knl);
 
     Tensor B(input_dim.batch(), 1, 1, 1);
     if (!bias_init_zero) {
       B.apply([&](float x) { return random(); });
     }
     bias.push_back(B);
+    weights.push_back(B);
   }
   // this output_dim should be the same with dimension of hidden
   output_dim.batch(input_dim.batch());
@@ -187,7 +190,6 @@ Tensor Conv2DLayer::backwarding(Tensor derivative, int iteration) {
   }
 
   gradients.clear();
-  weights.clear();
 
   //  Update K / bias
   for (unsigned int i = 0; i < filter_size; ++i) {
@@ -197,9 +199,6 @@ Tensor Conv2DLayer::backwarding(Tensor derivative, int iteration) {
                              filters[i], weight_decay.lambda)
                     .run();
 
-    weights.push_back(filters[i]);
-    weights.push_back(bias[i]);
-
     gradients.push_back(djdw);
     gradients.push_back(delBias[i]);
   }
index 7b75e62715de9e3c218966a5583628a95c043ede..305d77470a044817e2a55cdd06bf98fee8d79e7a 100644 (file)
@@ -52,6 +52,11 @@ int FullyConnectedLayer::initialize(bool last) {
   } else {
     bias.setRandUniform(-0.5, 0.5);
   }
+
+  weights.clear();
+  weights.push_back(weight);
+  weights.push_back(bias);
+
   return status;
 }
 
@@ -177,10 +182,6 @@ Tensor FullyConnectedLayer::backwarding(Tensor derivative, int iteration) {
   gradients.push_back(djdw);
   gradients.push_back(djdb);
 
-  weights.clear();
-  weights.push_back(weight);
-  weights.push_back(bias);
-
   opt.apply_gradients(weights, gradients, iteration);
 
   return ret;