From 4cec0a7857a373f3252fdf62d2123c9234b2ea25 Mon Sep 17 00:00:00 2001
From: Parichay Kapoor <pk.kapoor@samsung.com>
Date: Fri, 3 Jul 2020 16:32:46 +0900
Subject: [PATCH] [weight/gradients] Initialize Weights once only

Since layers only store weights reference, store them once than doing it in every iteration

Signed-off-by: Parichay Kapoor <pk.kapoor@samsung.com>
---
 nntrainer/include/layer.h      | 4 ++++
 nntrainer/src/conv2d_layer.cpp | 7 +++----
 nntrainer/src/fc_layer.cpp     | 9 +++++----
 3 files changed, 12 insertions(+), 8 deletions(-)
diff --git a/nntrainer/include/layer.h b/nntrainer/include/layer.h
index 8988e543..dc9d8761 100644
--- a/nntrainer/include/layer.h
+++ b/nntrainer/include/layer.h
@@ -436,11 +436,15 @@ protected:
 
   /**
    * @brief     Gradient for the weights in this layer
+   * @note      The order of gradients should match the order in weights
    */
   std::vector<std::reference_wrapper<Tensor>> gradients;
 
   /**
    * @brief     weights in this layer
+   * @note      The weights are combined with their corresponding bias
+   *            For example- with W0, W1, B0 and B1, weights would be of format
+   *            {W0, B0, W1, B1}.
    */
   std::vector<std::reference_wrapper<Tensor>> weights;
 
diff --git a/nntrainer/src/conv2d_layer.cpp b/nntrainer/src/conv2d_layer.cpp
index 4150c29e..a684248d 100644
--- a/nntrainer/src/conv2d_layer.cpp
+++ b/nntrainer/src/conv2d_layer.cpp
@@ -35,6 +35,7 @@ int Conv2DLayer::initialize(bool last) {
   Kdim.height(kernel_size[0]);
   Kdim.width(kernel_size[1]);
 
+  weights.clear();
   for (unsigned int i = 0; i < filter_size; ++i) {
     Tensor Knl = initializeWeight(Kdim, weight_ini_type, status);
     NN_RETURN_STATUS();
@@ -43,12 +44,14 @@ int Conv2DLayer::initialize(bool last) {
       Tensor(input_dim.batch(), Kdim.channel(), Kdim.height(), Kdim.width()));
     delBias.push_back(Tensor(input_dim.batch(), 1, 1, 1));
     filters.push_back(Knl);
+    weights.push_back(Knl);
 
     Tensor B(input_dim.batch(), 1, 1, 1);
     if (!bias_init_zero) {
       B.apply([&](float x) { return random(); });
     }
     bias.push_back(B);
+    weights.push_back(B);
   }
   // this output_dim should be the same with dimension of hidden
   output_dim.batch(input_dim.batch());
@@ -187,7 +190,6 @@ Tensor Conv2DLayer::backwarding(Tensor derivative, int iteration) {
   }
 
   gradients.clear();
-  weights.clear();
 
   //  Update K / bias
   for (unsigned int i = 0; i < filter_size; ++i) {
@@ -197,9 +199,6 @@ Tensor Conv2DLayer::backwarding(Tensor derivative, int iteration) {
                              filters[i], weight_decay.lambda)
                     .run();
 
-    weights.push_back(filters[i]);
-    weights.push_back(bias[i]);
-
     gradients.push_back(djdw);
     gradients.push_back(delBias[i]);
   }
diff --git a/nntrainer/src/fc_layer.cpp b/nntrainer/src/fc_layer.cpp
index 7b75e627..305d7747 100644
--- a/nntrainer/src/fc_layer.cpp
+++ b/nntrainer/src/fc_layer.cpp
@@ -52,6 +52,11 @@ int FullyConnectedLayer::initialize(bool last) {
   } else {
     bias.setRandUniform(-0.5, 0.5);
   }
+
+  weights.clear();
+  weights.push_back(weight);
+  weights.push_back(bias);
+
   return status;
 }
 
@@ -177,10 +182,6 @@ Tensor FullyConnectedLayer::backwarding(Tensor derivative, int iteration) {
   gradients.push_back(djdw);
   gradients.push_back(djdb);
 
-  weights.clear();
-  weights.push_back(weight);
-  weights.push_back(bias);
-
   opt.apply_gradients(weights, gradients, iteration);
 
   return ret;
-- 
2.34.1