[weight/gradients] Initialize Weights once only

author Parichay Kapoor <pk.kapoor@samsung.com>

Fri, 3 Jul 2020 07:32:46 +0000 (16:32 +0900)

committer Jijoong Moon <jijoong.moon@samsung.com>

Mon, 6 Jul 2020 01:23:56 +0000 (10:23 +0900)
author Parichay Kapoor <pk.kapoor@samsung.com>
Fri, 3 Jul 2020 07:32:46 +0000 (16:32 +0900)
committer Jijoong Moon <jijoong.moon@samsung.com>
Mon, 6 Jul 2020 01:23:56 +0000 (10:23 +0900)
diff --git a/nntrainer/include/layer.h b/nntrainer/include/layer.h

index 8988e543943047ead207e12cd3def6bd5e22a5dd..dc9d87613b9b510da6aec23a66267c714d223168 100644 (file)
--- a/nntrainer/include/layer.h
+++ b/nntrainer/include/layer.h
@@ -436,11 +436,15 @@ protected:
  
    /**
     * @brief     Gradient for the weights in this layer
+   * @note      The order of gradients should match the order in weights
     */
    std::vector<std::reference_wrapper<Tensor>> gradients;
  
    /**
     * @brief     weights in this layer
+   * @note      The weights are combined with their corresponding bias
+   *            For example- with W0, W1, B0 and B1, weights would be of format
+   *            {W0, B0, W1, B1}.
     */
    std::vector<std::reference_wrapper<Tensor>> weights;
  
diff --git a/nntrainer/src/conv2d_layer.cpp b/nntrainer/src/conv2d_layer.cpp

index 4150c29e502d6a85a1a2aed91aba02a56755d900..a684248d04a7c39b0a4c2846e13627768386f34f 100644 (file)
--- a/nntrainer/src/conv2d_layer.cpp
+++ b/nntrainer/src/conv2d_layer.cpp
@@ -35,6 +35,7 @@ int Conv2DLayer::initialize(bool last) {
    Kdim.height(kernel_size[0]);
    Kdim.width(kernel_size[1]);
  
+  weights.clear();
    for (unsigned int i = 0; i < filter_size; ++i) {
      Tensor Knl = initializeWeight(Kdim, weight_ini_type, status);
      NN_RETURN_STATUS();
@@ -43,12 +44,14 @@ int Conv2DLayer::initialize(bool last) {
        Tensor(input_dim.batch(), Kdim.channel(), Kdim.height(), Kdim.width()));
      delBias.push_back(Tensor(input_dim.batch(), 1, 1, 1));
      filters.push_back(Knl);
+    weights.push_back(Knl);
  
      Tensor B(input_dim.batch(), 1, 1, 1);
      if (!bias_init_zero) {
        B.apply([&](float x) { return random(); });
      }
      bias.push_back(B);
+    weights.push_back(B);
    }
    // this output_dim should be the same with dimension of hidden
    output_dim.batch(input_dim.batch());
@@ -187,7 +190,6 @@ Tensor Conv2DLayer::backwarding(Tensor derivative, int iteration) {
    }
  
    gradients.clear();
-  weights.clear();
  
    //  Update K / bias
    for (unsigned int i = 0; i < filter_size; ++i) {
@@ -197,9 +199,6 @@ Tensor Conv2DLayer::backwarding(Tensor derivative, int iteration) {
                               filters[i], weight_decay.lambda)
                      .run();
  
-    weights.push_back(filters[i]);
-    weights.push_back(bias[i]);
-
      gradients.push_back(djdw);
      gradients.push_back(delBias[i]);
    }
diff --git a/nntrainer/src/fc_layer.cpp b/nntrainer/src/fc_layer.cpp

index 7b75e62715de9e3c218966a5583628a95c043ede..305d77470a044817e2a55cdd06bf98fee8d79e7a 100644 (file)
--- a/nntrainer/src/fc_layer.cpp
+++ b/nntrainer/src/fc_layer.cpp
@@ -52,6 +52,11 @@ int FullyConnectedLayer::initialize(bool last) {
    } else {
      bias.setRandUniform(-0.5, 0.5);
    }
+
+  weights.clear();
+  weights.push_back(weight);
+  weights.push_back(bias);
+
    return status;
  }
  
@@ -177,10 +182,6 @@ Tensor FullyConnectedLayer::backwarding(Tensor derivative, int iteration) {
    gradients.push_back(djdw);
    gradients.push_back(djdb);
  
-  weights.clear();
-  weights.push_back(weight);
-  weights.push_back(bias);
-
    opt.apply_gradients(weights, gradients, iteration);
  
    return ret;
author	Parichay Kapoor <pk.kapoor@samsung.com>
	Fri, 3 Jul 2020 07:32:46 +0000 (16:32 +0900)
committer	Jijoong Moon <jijoong.moon@samsung.com>
	Mon, 6 Jul 2020 01:23:56 +0000 (10:23 +0900)
nntrainer/include/layer.h		patch \| blob \| history
nntrainer/src/conv2d_layer.cpp		patch \| blob \| history
nntrainer/src/fc_layer.cpp		patch \| blob \| history