From 4cec0a7857a373f3252fdf62d2123c9234b2ea25 Mon Sep 17 00:00:00 2001 From: Parichay Kapoor Date: Fri, 3 Jul 2020 16:32:46 +0900 Subject: [PATCH] [weight/gradients] Initialize Weights once only Since layers only store weights reference, store them once than doing it in every iteration Signed-off-by: Parichay Kapoor --- nntrainer/include/layer.h | 4 ++++ nntrainer/src/conv2d_layer.cpp | 7 +++---- nntrainer/src/fc_layer.cpp | 9 +++++---- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/nntrainer/include/layer.h b/nntrainer/include/layer.h index 8988e543..dc9d8761 100644 --- a/nntrainer/include/layer.h +++ b/nntrainer/include/layer.h @@ -436,11 +436,15 @@ protected: /** * @brief Gradient for the weights in this layer + * @note The order of gradients should match the order in weights */ std::vector> gradients; /** * @brief weights in this layer + * @note The weights are combined with their corresponding bias + * For example- with W0, W1, B0 and B1, weights would be of format + * {W0, B0, W1, B1}. */ std::vector> weights; diff --git a/nntrainer/src/conv2d_layer.cpp b/nntrainer/src/conv2d_layer.cpp index 4150c29e..a684248d 100644 --- a/nntrainer/src/conv2d_layer.cpp +++ b/nntrainer/src/conv2d_layer.cpp @@ -35,6 +35,7 @@ int Conv2DLayer::initialize(bool last) { Kdim.height(kernel_size[0]); Kdim.width(kernel_size[1]); + weights.clear(); for (unsigned int i = 0; i < filter_size; ++i) { Tensor Knl = initializeWeight(Kdim, weight_ini_type, status); NN_RETURN_STATUS(); @@ -43,12 +44,14 @@ int Conv2DLayer::initialize(bool last) { Tensor(input_dim.batch(), Kdim.channel(), Kdim.height(), Kdim.width())); delBias.push_back(Tensor(input_dim.batch(), 1, 1, 1)); filters.push_back(Knl); + weights.push_back(Knl); Tensor B(input_dim.batch(), 1, 1, 1); if (!bias_init_zero) { B.apply([&](float x) { return random(); }); } bias.push_back(B); + weights.push_back(B); } // this output_dim should be the same with dimension of hidden output_dim.batch(input_dim.batch()); @@ -187,7 +190,6 @@ Tensor Conv2DLayer::backwarding(Tensor derivative, int iteration) { } gradients.clear(); - weights.clear(); // Update K / bias for (unsigned int i = 0; i < filter_size; ++i) { @@ -197,9 +199,6 @@ Tensor Conv2DLayer::backwarding(Tensor derivative, int iteration) { filters[i], weight_decay.lambda) .run(); - weights.push_back(filters[i]); - weights.push_back(bias[i]); - gradients.push_back(djdw); gradients.push_back(delBias[i]); } diff --git a/nntrainer/src/fc_layer.cpp b/nntrainer/src/fc_layer.cpp index 7b75e627..305d7747 100644 --- a/nntrainer/src/fc_layer.cpp +++ b/nntrainer/src/fc_layer.cpp @@ -52,6 +52,11 @@ int FullyConnectedLayer::initialize(bool last) { } else { bias.setRandUniform(-0.5, 0.5); } + + weights.clear(); + weights.push_back(weight); + weights.push_back(bias); + return status; } @@ -177,10 +182,6 @@ Tensor FullyConnectedLayer::backwarding(Tensor derivative, int iteration) { gradients.push_back(djdw); gradients.push_back(djdb); - weights.clear(); - weights.push_back(weight); - weights.push_back(bias); - opt.apply_gradients(weights, gradients, iteration); return ret; -- 2.34.1