[optimizer] Simplify optimizer initialize
authorParichay Kapoor <pk.kapoor@samsung.com>
Thu, 3 Dec 2020 06:19:02 +0000 (15:19 +0900)
committerJijoong Moon <jijoong.moon@samsung.com>
Thu, 10 Dec 2020 10:20:41 +0000 (19:20 +0900)
As there is just one optimizer and shared by layers, it must be initialized just once by the neural network.
Also, addOptimizerVariables() moved out separately from initialize() as initialize() should work
on optimizers parameters and should not need list of weights.

Also remove set_tensor argument which was redundant

**Self evaluation:**
1. Build test: [x]Passed [ ]Failed [ ]Skipped
2. Run test: [x]Passed [ ]Failed [ ]Skipped

Signed-off-by: Parichay Kapoor <pk.kapoor@samsung.com>
nntrainer/layers/layer.cpp
nntrainer/models/neuralnet.cpp
nntrainer/optimizers/adam.cpp
nntrainer/optimizers/adam.h
nntrainer/optimizers/optimizer.cpp
nntrainer/optimizers/optimizer_internal.h
nntrainer/optimizers/sgd.cpp
nntrainer/optimizers/sgd.h

index 4342b7a..15089f2 100644 (file)
@@ -39,7 +39,8 @@ void Layer::setActivation(ActivationType acti) {
 
 int Layer::setOptimizer(std::shared_ptr<Optimizer> opt) {
   this->opt = opt;
-  return this->opt->initialize(weights, true);
+  this->opt->addOptimizerVariable(weights);
+  return ML_ERROR_NONE;
 }
 
 int Layer::checkValidation() {
index 4b26813..d1bb54d 100644 (file)
@@ -175,6 +175,7 @@ int NeuralNetwork::initialize() {
   ml_logd("initializing neural network, layer size: %d", n_layers);
 
   model_graph.setNumNetBufferSize();
+  opt->initialize();
 
   for (unsigned int idx = 0; idx < n_layers; ++idx) {
     bool first = idx == 0;
index df955ea..43b9715 100644 (file)
@@ -26,22 +26,17 @@ const std::string Adam::type = "adam";
 
 enum AdamParams { wm, wv };
 
-int Adam::initialize(std::vector<Weight> &weight_list, bool set_tensor) {
-  int status = ML_ERROR_NONE;
-
-  if (set_tensor) {
-    for (auto &w : weight_list) {
-      w.clearOptimizerVariables();
+void Adam::addOptimizerVariable(std::vector<Weight> &weight_list) {
+  for (auto &w : weight_list) {
+    w.clearOptimizerVariables();
 
-      // TODO: only trainable weights must be sent to optimizer
-      if (!w.getTrainable())
-        continue;
+    // TODO: only trainable weights must be sent to optimizer
+    if (!w.getTrainable())
+      continue;
 
-      w.addOptimizerVariable(w.getDim()); /** Add wm */
-      w.addOptimizerVariable(w.getDim()); /** Add wv */
-    }
+    w.addOptimizerVariable(w.getDim()); /** Add wm */
+    w.addOptimizerVariable(w.getDim()); /** Add wv */
   }
-  return status;
 }
 
 double Adam::getLearningRate(int iteration) {
@@ -56,8 +51,7 @@ double Adam::getLearningRate(int iteration) {
   return ll;
 }
 
-void Adam::apply_gradient(Weight &weight, int tensor_idx, double updated_lr,
-                          int iteration) {
+void Adam::apply_gradient(Weight &weight, double updated_lr, int iteration) {
 
   Tensor &x = weight.getVariableRef();
   const Tensor &x_grad = weight.getGradientRef();
index 2590fa5..42abeea 100644 (file)
@@ -39,7 +39,7 @@ public:
    * @copydoc apply_gradient(Weight &weight, int tensor_idx, double updated_lr,
    * int iteration)
    */
-  void apply_gradient(Weight &weight, int tensor_idx, double updated_lr,
+  void apply_gradient(Weight &weight, double updated_lr,
                       int iteration);
 
   /**
@@ -59,9 +59,9 @@ public:
   void setProperty(const PropertyType type, const std::string &value = "");
 
   /**
-   * @copydoc Optimizer::initialize(std::vector<Weight> params, bool setTensor)
+   * @copydoc Optimizer::addOptimizerVariable(std::vector<Weight> &params)
    */
-  int initialize(std::vector<Weight> &params, bool setTensor);
+  void addOptimizerVariable(std::vector<Weight> &params);
 
   /**
    * @brief get beta1
index 98ae0a4..8cabef3 100644 (file)
@@ -34,7 +34,7 @@
 
 namespace nntrainer {
 
-int Optimizer::initialize(std::vector<Weight> &weight_list, bool set_tensor) {
+int Optimizer::initialize() {
   return ML_ERROR_NONE;
 }
 
@@ -53,13 +53,11 @@ void Optimizer::apply_gradients(std::vector<Weight> &weight_list,
 
   double ll = getLearningRate(iteration);
 
-  int idx = 0;
   for (auto &weight : weight_list) {
     if (!weight.getTrainable())
       continue;
 
-    apply_gradient(weight, idx, ll, iteration);
-    idx += 1;
+    apply_gradient(weight, ll, iteration);
   }
 }
 
index 1e51a18..a23e5d3 100644 (file)
@@ -38,6 +38,7 @@ class Optimizer : public ml::train::Optimizer {
 
   /** Allow layer to initialize optimizer with itself */
   friend class Layer;
+  friend class NeuralNetwork;
 
 public:
   /**
@@ -154,26 +155,28 @@ protected:
 
 private:
   /**
-   * @brief     initialize optimizer. Initialize Weight if it is adam
+   * @brief     initialize optimizer.
+   * @retval #ML_ERROR_NONE Successful.
+   * @retval #ML_ERROR_INVALID_PARAMETER invalid parameter.
+   */
+  virtual int initialize();
+
+  /**
+   * @brief     Add extra variables per weight if the optimizer needs any.
    * @param[in] params Weight list
-   * @param[in] num_weights size of the array
-   * @param[in] setTensor true if the layer need weight update.
-   *            Input Layer and Batch Normalization layer won't need it.
-   *            Therefore, it sets false.
    * @retval #ML_ERROR_NONE Successful.
    * @retval #ML_ERROR_INVALID_PARAMETER invalid parameter.
    */
-  virtual int initialize(std::vector<Weight> &params, bool setTensor);
+  virtual void addOptimizerVariable(std::vector<Weight> &params) {}
 
   /**
    * @brief     apply gradient to the given weight
    * @param[in] weight Weight and gradient set to be updated
-   * @param[in] tensor_idx Idx of this tensor in the tensors list
    * @param[in] num_weights size of the array
    * @param[in] iteration nth epoch number
    * @note weight which is called upon can be assumed to be trainable
    */
-  virtual void apply_gradient(Weight &weight, int tensor_idx, double updated_lr,
+  virtual void apply_gradient(Weight &weight, double updated_lr,
                               int iteration) = 0;
 };
 
index c4ed668..f299ad4 100644 (file)
@@ -17,7 +17,7 @@ namespace nntrainer {
 
 const std::string SGD::type = "sgd";
 
-void SGD::apply_gradient(Weight &weight, int tensor_idx, double updated_lr,
+void SGD::apply_gradient(Weight &weight, double updated_lr,
                          int iteration) {
   Tensor &x = weight.getVariableRef();
   const Tensor &x_grad = weight.getGradientRef();
index 7ac43b8..da5c9bc 100644 (file)
@@ -31,10 +31,10 @@ public:
   SGD(float lr = 0.0001f, Args... args) : Optimizer(lr, args...) {}
 
   /**
-   * @copydoc apply_gradient(Weight &weight, int tensor_idx, double updated_lr,
+   * @copydoc apply_gradient(Weight &weight, double updated_lr,
    * int iteration)
    */
-  void apply_gradient(Weight &weight, int tensor_idx, double updated_lr,
+  void apply_gradient(Weight &weight, double updated_lr,
                       int iteration);
 
   /**