From: Parichay Kapoor Date: Tue, 1 Dec 2020 12:07:49 +0000 (+0900) Subject: [weight] Updated weights to be vector X-Git-Tag: submit/tizen/20201207.013927~3 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=e90f950e82f50e655dd02c3b69138535c16ca4ce;p=platform%2Fcore%2Fml%2Fnntrainer.git [weight] Updated weights to be vector Updated weights of layer to be vector than a shared_ptr array This is for easier management and updating weight internally when gradient will share the memory See also #774 #766 **Self evaluation:** 1. Build test: [x]Passed [ ]Failed [ ]Skipped 2. Run test: [x]Passed [ ]Failed [ ]Skipped Signed-off-by: Parichay Kapoor --- diff --git a/nntrainer/layers/bn_layer.cpp b/nntrainer/layers/bn_layer.cpp index 579876e3..18263dcc 100644 --- a/nntrainer/layers/bn_layer.cpp +++ b/nntrainer/layers/bn_layer.cpp @@ -59,19 +59,17 @@ int BatchNormalizationLayer::initialize(Manager &manager) { axes_to_reduce.push_back(i); } - setNumWeights(4); - weightAt(BNParams::mu) = - std::move(Weight(dim, initializers[BNParams::mu], false, "BN:moving_mean")); - ///@todo shift var to std to save computation - weightAt(BNParams::var) = std::move( - Weight(dim, initializers[BNParams::var], false, "BN:moving_variance")); - weightAt(BNParams::gamma) = - std::move(Weight(dim, initializers[BNParams::gamma], true, "BN:gamma")); - weightAt(BNParams::beta) = - std::move(Weight(dim, initializers[BNParams::beta], true, "BN:beta")); - - manager.trackWeights({weightAt(BNParams::mu), weightAt(BNParams::var), - weightAt(BNParams::gamma), weightAt(BNParams::beta)}); + weights.clear(); + if (weights.empty()) { + weights.reserve(4); + weights.push_back(createWeight(manager, dim, initializers[BNParams::mu], false, "BN::moving_mean")); + weights.push_back(createWeight(manager, dim, initializers[BNParams::var], false, "BN::moving_variance")); + weights.push_back(createWeight(manager, dim, initializers[BNParams::gamma], true, "BN::gamma")); + weights.push_back(createWeight(manager, dim, initializers[BNParams::beta], true, "BN::beta")); + } else { + for (size_t idx = 0; idx < weights.size(); idx ++) + weights[idx].reset(dim, initializers[idx], weights[idx].getTrainable()); + } return status; } diff --git a/nntrainer/layers/conv2d_layer.cpp b/nntrainer/layers/conv2d_layer.cpp index 0d2f4c74..4e62a4f4 100644 --- a/nntrainer/layers/conv2d_layer.cpp +++ b/nntrainer/layers/conv2d_layer.cpp @@ -50,13 +50,14 @@ int Conv2DLayer::initialize(Manager &manager) { TensorDim(filter_size, in_dim.channel(), kernel_size[0], kernel_size[1]); TensorDim bias_dim = TensorDim(1, filter_size, 1, 1); - setNumWeights(2); - weightAt(ConvParams::weight) = - Weight(dim, weight_initializer, true, kernelPrefix); - weightAt(ConvParams::bias) = - Weight(bias_dim, bias_initializer, true, biasPrefix); - manager.trackWeights( - {weightAt(ConvParams::weight), weightAt(ConvParams::bias)}); + if (weights.empty()) { + weights.reserve(2); + weights.push_back(createWeight(manager, dim, weight_initializer, true, kernelPrefix)); + weights.push_back(createWeight(manager, bias_dim, bias_initializer, true, biasPrefix)); + } else { + for (auto &weight : weights) + weight.reset(weight.getVariable().getDim(), weight_initializer, true); + } // this output_dim should be the same with dimension of hidden out_dim.batch(in_dim.batch()); diff --git a/nntrainer/layers/fc_layer.cpp b/nntrainer/layers/fc_layer.cpp index 355fd6c9..b2164707 100644 --- a/nntrainer/layers/fc_layer.cpp +++ b/nntrainer/layers/fc_layer.cpp @@ -52,12 +52,14 @@ int FullyConnectedLayer::initialize(Manager &manager) { dim.height(input_dim[0].width()); dim.batch(1); - setNumWeights(2); - weightAt(FCParams::weight) = - Weight(dim, weight_initializer, true, "FC:weight"); - weightAt(FCParams::bias) = - Weight(bias_dim, bias_initializer, true, "FC::bias"); - manager.trackWeights({weightAt(FCParams::weight), weightAt(FCParams::bias)}); + if (weights.empty()) { + weights.reserve(2); + weights.push_back(createWeight(manager, dim, weight_initializer, true, "FC:weight")); + weights.push_back(createWeight(manager, bias_dim, bias_initializer, true, "FC:bias")); + } else { + for (auto &weight : weights) + weight.reset(weight.getVariable().getDim(), weight_initializer, true); + } return status; } diff --git a/nntrainer/layers/layer.cpp b/nntrainer/layers/layer.cpp index b3f9d6dd..cd0b0eb4 100644 --- a/nntrainer/layers/layer.cpp +++ b/nntrainer/layers/layer.cpp @@ -39,7 +39,7 @@ void Layer::setActivation(ActivationType acti) { int Layer::setOptimizer(std::shared_ptr opt) { this->opt = createOptimizer(opt->getType(), *opt); - return this->opt->initialize(weight_list, num_weights, true); + return this->opt->initialize(weights, true); } int Layer::checkValidation() { @@ -78,10 +78,8 @@ std::vector Layer::getDerivatives() { } void Layer::copy(std::shared_ptr l) { - setNumWeights(l->num_weights); - for (unsigned int i = 0; i < num_weights; ++i) { - weightAt(i) = l->weightAt(i); - } + for(auto const &w : weights) + weights.push_back(w.clone()); // TODO: fix this #630 this->opt = l->opt; @@ -149,24 +147,24 @@ sharedConstTensors Layer::backwarding_with_val(int iteration, } void Layer::read(std::ifstream &file) { - for (unsigned int i = 0; i < num_weights; ++i) { - weightAt(i).getVariableRef().read(file); + for (auto &weight : weights) { + weight.getVariableRef().read(file); } if (opt) opt->read(file); } void Layer::save(std::ofstream &file) { - for (unsigned int i = 0; i < num_weights; ++i) { - weightAt(i).getVariableRef().save(file); + for (auto &weight : weights) { + weight.getVariableRef().save(file); } if (opt) opt->save(file); } void Layer::applyGradient(unsigned int iteration) { - if (trainable && num_weights > 0) { - opt->apply_gradients(weight_list, num_weights, iteration); + if (trainable && !weights.empty()) { + opt->apply_gradients(weights, iteration); } } @@ -326,7 +324,7 @@ void Layer::printIfValid(std::ostream &out, const PropertyType type, void Layer::printShapeInfo(std::ostream &out) { for (unsigned int idx = 0; idx < num_inputs; ++idx) { out << "input " << input_dim[idx]; - for (unsigned int i = 0; i < num_weights; i++) + for (unsigned int i = 0; i < weights.size(); i++) out << "inner" << i << " " << weightAt(i).getVariable().getDim(); } for (unsigned int idx = 0; idx < num_outputs; ++idx) { @@ -403,9 +401,9 @@ void Layer::print(std::ostream &out, unsigned int flags) { if (flags & PRINT_WEIGHTS) { out << "======weights: " << std::endl; - for (unsigned int i = 0; i < num_weights; ++i) { - out << '[' << weightAt(i).getName() << ']' << std::endl; - out << weightAt(i).var; + for (auto const &weight : weights) { + out << '[' << weight.getName() << ']' << std::endl; + out << weight.getVariable(); } } diff --git a/nntrainer/layers/layer_internal.h b/nntrainer/layers/layer_internal.h index 64452d26..88eaa75f 100644 --- a/nntrainer/layers/layer_internal.h +++ b/nntrainer/layers/layer_internal.h @@ -90,7 +90,6 @@ public: bias_initializer(bias_initializer_), flatten(flatten_), trainable(trainable_), - num_weights(0), num_inputs(1), num_outputs(1) { input_dim.resize(1); @@ -257,7 +256,7 @@ public: * @brief get all weights of the layer * @retval vector of all params */ - std::shared_ptr getWeights() { return weight_list; } + std::vector getWeights() { return weights; } /** * @brief get if the output of this layer must be flatten @@ -289,11 +288,7 @@ public: * @exception std::out_of_range for index out of range */ Weight &weightAt(const unsigned int position) { - if (position >= num_weights) { - throw std::out_of_range("index out of range"); - } - - return weight_list.get()[position]; + return weights[position]; } /** @@ -301,7 +296,7 @@ public: * * @return unsigned int number of weights */ - unsigned int getNumWeights() { return num_weights; } + unsigned int getNumWeights() { return weights.size(); } /** * @brief Set the batch for the layer @@ -448,36 +443,10 @@ protected: bool trainable; /** - * @brief reserve memory for @a weight_list and set @a num_weights - * @exception std::invalid_argument when num_weights is already set and - * shouldn't be changed again. + * @brief weight_list in this layer. This contains all weights of the + * layer. */ - void setNumWeights(unsigned int psize) { - if (psize == num_weights) - return; - - if (num_weights > 0) { - throw std::invalid_argument("param size can't be set once it is set"); - } - - num_weights = psize; - weight_list = std::shared_ptr(new Weight[num_weights], - std::default_delete()); - } - - /** - * @brief weight_list in this layer. This contains trainable weights of - * layers. - */ - std::shared_ptr weight_list; - - unsigned int num_weights; /**< length of weights. - This shouldn't be changed - after initiation - use setNumWeights() to avoid - setting parameters twice */ - - std::vector> weights; + std::vector weights; /** * @brief Number of inputs this layer will requries/will operate on diff --git a/nntrainer/manager.h b/nntrainer/manager.h index b1a7e42c..b6c51d47 100644 --- a/nntrainer/manager.h +++ b/nntrainer/manager.h @@ -50,7 +50,7 @@ public: * * @param ws Weights to be tracked */ - void trackWeights(std::vector ws) { + void trackWeights(std::vector &ws) { weights.reserve(weights.size() + ws.size()); weights.insert(weights.end(), ws.begin(), ws.end()); } diff --git a/nntrainer/optimizers/adam.cpp b/nntrainer/optimizers/adam.cpp index 728c6f65..4a0e8081 100644 --- a/nntrainer/optimizers/adam.cpp +++ b/nntrainer/optimizers/adam.cpp @@ -24,15 +24,12 @@ namespace nntrainer { const std::string Adam::type = "adam"; -int Adam::initialize(std::shared_ptr weight_list, - unsigned int num_weights, bool set_tensor) { +int Adam::initialize(std::vector &weight_list, bool set_tensor) { int status = ML_ERROR_NONE; weight_mv.clear(); if (set_tensor) { - for (unsigned int i = 0; i < num_weights; ++i) { - Weight &w = weight_list.get()[i]; - + for (auto const &w : weight_list) { // TODO: only trainable weights must be sent to optimizer if (!w.getTrainable()) continue; diff --git a/nntrainer/optimizers/adam.h b/nntrainer/optimizers/adam.h index a9837bac..a43c57cd 100644 --- a/nntrainer/optimizers/adam.h +++ b/nntrainer/optimizers/adam.h @@ -59,11 +59,9 @@ public: void setProperty(const PropertyType type, const std::string &value = ""); /** - * @copydoc Optimizer::initialize(std::shared_ptr params, unsigned int - num_weights, bool setTensor) + * @copydoc Optimizer::initialize(std::vector params, bool setTensor) */ - int initialize(std::shared_ptr params, unsigned int num_weights, - bool setTensor); + int initialize(std::vector ¶ms, bool setTensor); /** * @copydoc read(std::ifstream &file) diff --git a/nntrainer/optimizers/optimizer.cpp b/nntrainer/optimizers/optimizer.cpp index e16b4287..6ef1eded 100644 --- a/nntrainer/optimizers/optimizer.cpp +++ b/nntrainer/optimizers/optimizer.cpp @@ -34,8 +34,7 @@ namespace nntrainer { -int Optimizer::initialize(std::shared_ptr weight_list, - unsigned int num_weights, bool set_tensor) { +int Optimizer::initialize(std::vector &weight_list, bool set_tensor) { return ML_ERROR_NONE; } @@ -49,15 +48,12 @@ double Optimizer::getLearningRate(int iteration) { return ll; } -void Optimizer::apply_gradients(std::shared_ptr weight_list, - unsigned int num_weights, int iteration) { +void Optimizer::apply_gradients(std::vector &weight_list, int iteration) { double ll = getLearningRate(iteration); int idx = 0; - for (unsigned int i = 0; i < num_weights; ++i) { - Weight &weight = weight_list.get()[i]; - + for (auto &weight : weight_list) { if (!weight.getTrainable()) continue; diff --git a/nntrainer/optimizers/optimizer_internal.h b/nntrainer/optimizers/optimizer_internal.h index 1b3f9e56..1e51a18c 100644 --- a/nntrainer/optimizers/optimizer_internal.h +++ b/nntrainer/optimizers/optimizer_internal.h @@ -105,11 +105,9 @@ public: /** * @brief apply gradient to weight_list * @param[in] params Weight list - * @param[in] num_weights size of the array * @param[in] iteration nth epoch number */ - void apply_gradients(std::shared_ptr params, unsigned int num_weights, - int iteration); + void apply_gradients(std::vector ¶ms, int iteration); /** * @brief Read Training optimizer paramters from file @@ -165,8 +163,7 @@ private: * @retval #ML_ERROR_NONE Successful. * @retval #ML_ERROR_INVALID_PARAMETER invalid parameter. */ - virtual int initialize(std::shared_ptr params, - unsigned int num_weights, bool setTensor); + virtual int initialize(std::vector ¶ms, bool setTensor); /** * @brief apply gradient to the given weight diff --git a/nntrainer/tensor/var_grad.h b/nntrainer/tensor/var_grad.h index d139bec4..8d25722b 100644 --- a/nntrainer/tensor/var_grad.h +++ b/nntrainer/tensor/var_grad.h @@ -144,7 +144,7 @@ public: * * @return Cloned copy */ - virtual Var_Grad clone() const { + Var_Grad clone() const { Var_Grad vg(*this); vg.var = std::make_shared(this->var->clone()); vg.grad = std::make_shared(this->grad->clone()); @@ -152,6 +152,22 @@ public: return vg; }; + /** + * @brief Reset the weight + * + * @param dim Variable and gradient tensor dimension + * @param train If the variable is trainable + * + * @note New dimension must maintain the shape of the variable + */ + + void reset (const TensorDim &dim, bool train) { + var->reshape(dim); + grad->reshape(dim); + trainable = train; + resetGradient(); + } + protected: /** * @brief Get the variable tensor (by reference) diff --git a/nntrainer/tensor/weight.h b/nntrainer/tensor/weight.h index 9ae5e309..4f11629d 100644 --- a/nntrainer/tensor/weight.h +++ b/nntrainer/tensor/weight.h @@ -127,11 +127,11 @@ public: Weight &operator=(Weight &&rhs) = default; /** - * @bried Clone the currnet object + * @brief Clone the currnet object * * @return Cloned copy */ - Weight clone() { + Weight clone() const { Weight w(*this); if (!var->uninitialized()) w.var = std::make_shared(this->var->clone()); @@ -141,6 +141,21 @@ public: return w; } + /** + * @brief Reset the weight + * + * @param dim Variable and gradient tensor dimension + * @param init Initializer for the tensor + * @param train If the variable is trainable + * + * @note New dimension must maintain the shape of the variable + */ + + void reset (const TensorDim &dim, const WeightInitializer init, bool train) { + Var_Grad::reset(dim, train); + initializeWeight(); + } + private: WeightInitializer initializer; /**< initializer for this variable */ }; diff --git a/test/unittest/unittest_nntrainer_layers.cpp b/test/unittest/unittest_nntrainer_layers.cpp index af11820d..e0ac52eb 100644 --- a/test/unittest/unittest_nntrainer_layers.cpp +++ b/test/unittest/unittest_nntrainer_layers.cpp @@ -640,12 +640,12 @@ protected: } void matchUpdatedWeightsGradients() { - std::shared_ptr params = layer.getWeights(); + std::vector params = layer.getWeights(); /** Match gradients and updated weights */ for (int idx = 0; idx < 2; ++idx) { - matchOutput(params.get()[idx].getGradient(), grad[idx]); - matchOutput(params.get()[idx].getVariable(), new_w[idx]); + matchOutput(params[idx].getGradient(), grad[idx]); + matchOutput(params[idx].getVariable(), new_w[idx]); } } @@ -680,7 +680,7 @@ TEST_F(nntrainer_FullyConnectedLayer_TFmatch, forwarding_backwarding_00_p) { matchOutput(result, "tc_fc_1_goldenFCGradientAdam.out"); - nntrainer::Weight *param_data = layer.getWeights().get(); + auto param_data = layer.getWeights(); nntrainer::Weight ¶m = param_data[0]; nntrainer::Tensor weight = param.getVariable(); @@ -1181,7 +1181,7 @@ TEST_F(nntrainer_Conv2DLayer, backwarding_01_p) { EXPECT_NO_THROW(result = *layer.backwarding_with_val( 1, {MAKE_SHARED_TENSOR(derivatives)})[0]); - nntrainer::Weight *param_data = layer.getWeights().get(); + auto param_data = layer.getWeights(); const float *weight_grad = param_data[0].getGradient().getData(); const float *bias_grad = param_data[1].getGradient().getData(); @@ -1218,7 +1218,7 @@ TEST_F(nntrainer_Conv2DLayer, backwarding_02_p) { EXPECT_NO_THROW(result = *layer.backwarding_with_val( 1, {MAKE_SHARED_TENSOR(derivatives)})[0]); - nntrainer::Weight *param_data = layer.getWeights().get(); + auto param_data = layer.getWeights(); const float *weight_grad = param_data[0].getGradient().getData(); const float *bias_grad = param_data[1].getGradient().getData(); @@ -1328,7 +1328,7 @@ TEST_F(nntrainer_Conv2DLayer, DISABLED_backwarding_03_p) { result = *layer1.backwarding_with_val(1, {MAKE_SHARED_TENSOR(result2)})[0]); /** Compare second conv */ - nntrainer::Weight *param_data = layer2.getWeights().get(); + auto param_data = layer2.getWeights(); const float *weight_grad = param_data[0].getGradient().getData(); const float *bias_grad = param_data[1].getGradient().getData(); @@ -1336,7 +1336,7 @@ TEST_F(nntrainer_Conv2DLayer, DISABLED_backwarding_03_p) { matchOutput(bias_grad, "tc_conv2d_int_goldenBias2Grad.out"); /** Compare first conv */ - param_data = layer1.getWeights().get(); + param_data = layer1.getWeights(); weight_grad = param_data[0].getGradient().getData(); bias_grad = param_data[1].getGradient().getData(); @@ -1371,7 +1371,7 @@ TEST_F(nntrainer_Conv2DLayer, backwarding_04_p) { EXPECT_NO_THROW(result = *layer.backwarding_with_val( 1, {MAKE_SHARED_TENSOR(derivatives)})[0]); - nntrainer::Weight *param_data = layer.getWeights().get(); + auto param_data = layer.getWeights(); const float *weight_grad = param_data[0].getGradient().getData(); const float *bias_grad = param_data[1].getGradient().getData();