From 735b39d43ca5f9d808a6902e4e89f5103e124f7f Mon Sep 17 00:00:00 2001 From: Parichay Kapoor Date: Wed, 2 Dec 2020 11:52:44 +0900 Subject: [PATCH] [layers/manager] Register weights with manager All the weights of the layer are now registered with manager Manager allocates memory for these weights and in future handle their updates etc See also #774 #766 **Self evaluation:** 1. Build test: [x]Passed [ ]Failed [ ]Skipped 2. Run test: [x]Passed [ ]Failed [ ]Skipped Signed-off-by: Parichay Kapoor --- nntrainer/layers/bn_layer.cpp | 17 +++++--- nntrainer/layers/conv2d_layer.cpp | 9 +++-- nntrainer/layers/fc_layer.cpp | 9 +++-- nntrainer/layers/layer.cpp | 2 +- nntrainer/layers/layer_internal.h | 4 +- nntrainer/manager.h | 60 +++++++++++++++++++++-------- nntrainer/models/neuralnet.cpp | 2 + nntrainer/models/neuralnet.h | 8 ++++ nntrainer/optimizers/optimizer.cpp | 3 +- nntrainer/tensor/var_grad.cpp | 6 +++ nntrainer/tensor/var_grad.h | 17 ++++++-- nntrainer/tensor/weight.cpp | 6 ++- nntrainer/tensor/weight.h | 11 +++--- test/unittest/unittest_nntrainer_layers.cpp | 7 +++- 14 files changed, 113 insertions(+), 48 deletions(-) diff --git a/nntrainer/layers/bn_layer.cpp b/nntrainer/layers/bn_layer.cpp index 18263dc..093a532 100644 --- a/nntrainer/layers/bn_layer.cpp +++ b/nntrainer/layers/bn_layer.cpp @@ -62,13 +62,18 @@ int BatchNormalizationLayer::initialize(Manager &manager) { weights.clear(); if (weights.empty()) { weights.reserve(4); - weights.push_back(createWeight(manager, dim, initializers[BNParams::mu], false, "BN::moving_mean")); - weights.push_back(createWeight(manager, dim, initializers[BNParams::var], false, "BN::moving_variance")); - weights.push_back(createWeight(manager, dim, initializers[BNParams::gamma], true, "BN::gamma")); - weights.push_back(createWeight(manager, dim, initializers[BNParams::beta], true, "BN::beta")); + weights.emplace_back(dim, initializers[BNParams::mu], false, + "BN::moving_mean"); + weights.emplace_back(dim, initializers[BNParams::var], false, + "BN::moving_variance"); + weights.emplace_back(dim, initializers[BNParams::gamma], true, "BN::gamma"); + weights.emplace_back(dim, initializers[BNParams::beta], true, "BN::beta"); + manager.trackWeights(weights); } else { - for (size_t idx = 0; idx < weights.size(); idx ++) - weights[idx].reset(dim, initializers[idx], weights[idx].getTrainable()); + weights[BNParams::mu].reset(dim, initializers[BNParams::mu], false); + weights[BNParams::var].reset(dim, initializers[BNParams::var], false); + weights[BNParams::gamma].reset(dim, initializers[BNParams::gamma], true); + weights[BNParams::beta].reset(dim, initializers[BNParams::beta], true); } return status; diff --git a/nntrainer/layers/conv2d_layer.cpp b/nntrainer/layers/conv2d_layer.cpp index 4e62a4f..ed61169 100644 --- a/nntrainer/layers/conv2d_layer.cpp +++ b/nntrainer/layers/conv2d_layer.cpp @@ -52,11 +52,12 @@ int Conv2DLayer::initialize(Manager &manager) { if (weights.empty()) { weights.reserve(2); - weights.push_back(createWeight(manager, dim, weight_initializer, true, kernelPrefix)); - weights.push_back(createWeight(manager, bias_dim, bias_initializer, true, biasPrefix)); + weights.emplace_back(dim, weight_initializer, true, kernelPrefix); + weights.emplace_back(bias_dim, bias_initializer, true, biasPrefix); + manager.trackWeights(weights); } else { - for (auto &weight : weights) - weight.reset(weight.getVariable().getDim(), weight_initializer, true); + weights[ConvParams::weight].reset(dim, weight_initializer, true); + weights[ConvParams::bias].reset(bias_dim, bias_initializer, true); } // this output_dim should be the same with dimension of hidden diff --git a/nntrainer/layers/fc_layer.cpp b/nntrainer/layers/fc_layer.cpp index b216470..d971412 100644 --- a/nntrainer/layers/fc_layer.cpp +++ b/nntrainer/layers/fc_layer.cpp @@ -54,11 +54,12 @@ int FullyConnectedLayer::initialize(Manager &manager) { if (weights.empty()) { weights.reserve(2); - weights.push_back(createWeight(manager, dim, weight_initializer, true, "FC:weight")); - weights.push_back(createWeight(manager, bias_dim, bias_initializer, true, "FC:bias")); + weights.emplace_back(dim, weight_initializer, true, "FC:weight"); + weights.emplace_back(bias_dim, bias_initializer, true, "FC:bias"); + manager.trackWeights(weights); } else { - for (auto &weight : weights) - weight.reset(weight.getVariable().getDim(), weight_initializer, true); + weights[FCParams::weight].reset(dim, weight_initializer, true); + weights[FCParams::bias].reset(bias_dim, bias_initializer, true); } return status; diff --git a/nntrainer/layers/layer.cpp b/nntrainer/layers/layer.cpp index cd0b0eb..40f84c1 100644 --- a/nntrainer/layers/layer.cpp +++ b/nntrainer/layers/layer.cpp @@ -78,7 +78,7 @@ std::vector Layer::getDerivatives() { } void Layer::copy(std::shared_ptr l) { - for(auto const &w : weights) + for (auto const &w : weights) weights.push_back(w.clone()); // TODO: fix this #630 diff --git a/nntrainer/layers/layer_internal.h b/nntrainer/layers/layer_internal.h index 88eaa75..ebdbdb7 100644 --- a/nntrainer/layers/layer_internal.h +++ b/nntrainer/layers/layer_internal.h @@ -287,9 +287,7 @@ public: * @brief get data alias at param position. * @exception std::out_of_range for index out of range */ - Weight &weightAt(const unsigned int position) { - return weights[position]; - } + Weight &weightAt(const unsigned int position) { return weights[position]; } /** * @brief Get the number of weights diff --git a/nntrainer/manager.h b/nntrainer/manager.h index b6c51d4..2503071 100644 --- a/nntrainer/manager.h +++ b/nntrainer/manager.h @@ -16,6 +16,7 @@ #define __MANAGER_H__ #ifdef __cplusplus +#include #include #include @@ -31,7 +32,7 @@ public: /** * @brief Constructor of Manager */ - Manager() {} + Manager() : enable_gradient_memory_opt(true) {} /** * @brief Destructor of Manager @@ -43,7 +44,9 @@ public: * * @param w Weight to be tracked */ - void trackWeight(Weight w) { weights.push_back(w); } + void trackWeight(std::reference_wrapper w) { + weights.emplace_back(w); + } /** * @brief Add weights to be tracked and updated with nntrainer @@ -52,32 +55,55 @@ public: */ void trackWeights(std::vector &ws) { weights.reserve(weights.size() + ws.size()); - weights.insert(weights.end(), ws.begin(), ws.end()); + for (auto &w : ws) + weights.emplace_back(std::ref(w)); } /** * @brief Get weights tracked with nntrainer * - * @retval list of weights + * @retval list of weight references + */ + std::vector> getWeightRefs() { + return weights; + } + + /** + * @brief Enable gradient memory sharing based optimization + * @param opt True to enable, else false */ - std::vector getWeights() { return weights; } + void setGradientMemoryOptimization(bool opt) { + enable_gradient_memory_opt = opt; + } + + /** + * @brief Allocate and initialize the weight variable + */ + void initialize() { + for (auto &weight : weights) + weight.get().initialize(); + } + + void reset() { weights.clear(); } private: // TODO: ensure that names of these weights are unique - std::vector weights; + std::vector> weights; + + bool enable_gradient_memory_opt; /**< share memory among all the gradients */ }; -/** - * @brief Helper func for weight creation which are tracked by nntrainer - * - * @retval create weight - */ -template -Weight createWeight(Manager &manager, Args... args) { - Weight w = Weight(args...); - manager.trackWeight(w); - return w; -} +// /** +// * @brief Helper func for weight creation which are tracked by nntrainer +// * +// * @retval create weight +// */ +// template +// Weight createWeight(Manager &manager, Args... args) { +// Weight w = Weight(args...); +// manager.trackWeight(std::forwardargs); +// return ; +// } } // namespace nntrainer diff --git a/nntrainer/models/neuralnet.cpp b/nntrainer/models/neuralnet.cpp index 940644d..3f41117 100644 --- a/nntrainer/models/neuralnet.cpp +++ b/nntrainer/models/neuralnet.cpp @@ -236,6 +236,8 @@ int NeuralNetwork::initialize() { setBatchSize(batch_size); + manager.initialize(); + initialized = true; return status; } diff --git a/nntrainer/models/neuralnet.h b/nntrainer/models/neuralnet.h index 7edcc88..57621ef 100644 --- a/nntrainer/models/neuralnet.h +++ b/nntrainer/models/neuralnet.h @@ -346,6 +346,14 @@ public: */ void printPreset(std::ostream &out, unsigned int preset); + /** + * @brief Enable gradient memory sharing based optimization + * @param opt True to enable, else false + */ + void setGradientMemoryOptimization(bool opt) { + manager.setGradientMemoryOptimization(opt); + } + private: /** * @brief Print Options when printing layer info diff --git a/nntrainer/optimizers/optimizer.cpp b/nntrainer/optimizers/optimizer.cpp index 6ef1ede..98ae0a4 100644 --- a/nntrainer/optimizers/optimizer.cpp +++ b/nntrainer/optimizers/optimizer.cpp @@ -48,7 +48,8 @@ double Optimizer::getLearningRate(int iteration) { return ll; } -void Optimizer::apply_gradients(std::vector &weight_list, int iteration) { +void Optimizer::apply_gradients(std::vector &weight_list, + int iteration) { double ll = getLearningRate(iteration); diff --git a/nntrainer/tensor/var_grad.cpp b/nntrainer/tensor/var_grad.cpp index 3395b3d..158c5cd 100644 --- a/nntrainer/tensor/var_grad.cpp +++ b/nntrainer/tensor/var_grad.cpp @@ -17,8 +17,14 @@ namespace nntrainer { Var_Grad::Var_Grad(const TensorDim &dim, bool train, const std::string &name) : + dim(dim), trainable(train), name(name) { + var = std::make_shared(); + grad = std::make_shared(); +} + +void Var_Grad::initialize() { var = std::make_shared(dim); grad = std::make_shared(); diff --git a/nntrainer/tensor/var_grad.h b/nntrainer/tensor/var_grad.h index 8d25722..cf94c00 100644 --- a/nntrainer/tensor/var_grad.h +++ b/nntrainer/tensor/var_grad.h @@ -91,11 +91,16 @@ public: Var_Grad &operator=(Var_Grad &&rhs) = default; /** + * @brief Allocate and initialize the weight variable + */ + virtual void initialize(); + + /** * @brief Get the TensorDim * * @return TensorDim Dimension */ - TensorDim getDim() const { return var->getDim(); } + TensorDim getDim() const { return dim; } /** * @brief Get if the Var_Grad is trainable @@ -161,9 +166,12 @@ public: * @note New dimension must maintain the shape of the variable */ - void reset (const TensorDim &dim, bool train) { - var->reshape(dim); - grad->reshape(dim); + void reset(const TensorDim &tdim, bool train) { + dim = tdim; + if (!var->uninitialized()) + var->reshape(dim); + if (!grad->uninitialized()) + grad->reshape(dim); trainable = train; resetGradient(); } @@ -183,6 +191,7 @@ protected: */ Tensor &getGradientRef() { return *grad.get(); } + TensorDim dim; /**< dimension of the tensor */ std::shared_ptr var; /**< variable to be updated and used */ std::shared_ptr grad; /**< gradient for the variable */ bool trainable; /**< if this variable is trainable */ diff --git a/nntrainer/tensor/weight.cpp b/nntrainer/tensor/weight.cpp index 3304c66..b095625 100644 --- a/nntrainer/tensor/weight.cpp +++ b/nntrainer/tensor/weight.cpp @@ -23,10 +23,12 @@ Weight::Weight(const TensorDim &dim, const WeightInitializer init, bool train, if (initializer == WeightInitializer::WEIGHT_UNKNOWN) throw std::invalid_argument("Weight initializer unknown"); - initializeWeight(); + // initialize(); } -void Weight::initializeWeight() { +void Weight::initialize() { + Var_Grad::initialize(); + Tensor &var_ref = getVariableRef(); const TensorDim dim = var_ref.getDim(); diff --git a/nntrainer/tensor/weight.h b/nntrainer/tensor/weight.h index 4f11629..730579b 100644 --- a/nntrainer/tensor/weight.h +++ b/nntrainer/tensor/weight.h @@ -81,7 +81,7 @@ public: /** * @brief Allocate and initialize the weight variable */ - void initializeWeight(); + void initialize(); /** * @brief Swap for weight @@ -133,11 +133,12 @@ public: */ Weight clone() const { Weight w(*this); - if (!var->uninitialized()) + if (!this->var->uninitialized()) w.var = std::make_shared(this->var->clone()); - if (!grad->uninitialized()) + if (!this->grad->uninitialized()) w.grad = std::make_shared(this->grad->clone()); + return w; } @@ -151,9 +152,9 @@ public: * @note New dimension must maintain the shape of the variable */ - void reset (const TensorDim &dim, const WeightInitializer init, bool train) { + void reset(const TensorDim &dim, const WeightInitializer init, bool train) { + initializer = init; Var_Grad::reset(dim, train); - initializeWeight(); } private: diff --git a/test/unittest/unittest_nntrainer_layers.cpp b/test/unittest/unittest_nntrainer_layers.cpp index e0ac52e..02a74ce 100644 --- a/test/unittest/unittest_nntrainer_layers.cpp +++ b/test/unittest/unittest_nntrainer_layers.cpp @@ -77,6 +77,8 @@ protected: layer.setOutputBuffer(i, n_buffer); } + manager.initialize(); + return status; } @@ -93,7 +95,10 @@ protected: // anchor point to prepare layer virtual void prepareLayer(){}; - virtual void resetLayer() { layer = LayerType(); } + virtual void resetLayer() { + layer = LayerType(); + manager.reset(); + } virtual void setInputDim(const std::string &dimension) { ASSERT_EQ(layer.setProperty({"input_shape=" + dimension}), ML_ERROR_NONE); -- 2.7.4