As there is just one optimizer and shared by layers, it must be initialized just once by the neural network.
Also, addOptimizerVariables() moved out separately from initialize() as initialize() should work
on optimizers parameters and should not need list of weights.
Also remove set_tensor argument which was redundant
**Self evaluation:**
1. Build test: [x]Passed [ ]Failed [ ]Skipped
2. Run test: [x]Passed [ ]Failed [ ]Skipped
Signed-off-by: Parichay Kapoor <pk.kapoor@samsung.com>
int Layer::setOptimizer(std::shared_ptr<Optimizer> opt) {
this->opt = opt;
- return this->opt->initialize(weights, true);
+ this->opt->addOptimizerVariable(weights);
+ return ML_ERROR_NONE;
}
int Layer::checkValidation() {
ml_logd("initializing neural network, layer size: %d", n_layers);
model_graph.setNumNetBufferSize();
+ opt->initialize();
for (unsigned int idx = 0; idx < n_layers; ++idx) {
bool first = idx == 0;
enum AdamParams { wm, wv };
-int Adam::initialize(std::vector<Weight> &weight_list, bool set_tensor) {
- int status = ML_ERROR_NONE;
-
- if (set_tensor) {
- for (auto &w : weight_list) {
- w.clearOptimizerVariables();
+void Adam::addOptimizerVariable(std::vector<Weight> &weight_list) {
+ for (auto &w : weight_list) {
+ w.clearOptimizerVariables();
- // TODO: only trainable weights must be sent to optimizer
- if (!w.getTrainable())
- continue;
+ // TODO: only trainable weights must be sent to optimizer
+ if (!w.getTrainable())
+ continue;
- w.addOptimizerVariable(w.getDim()); /** Add wm */
- w.addOptimizerVariable(w.getDim()); /** Add wv */
- }
+ w.addOptimizerVariable(w.getDim()); /** Add wm */
+ w.addOptimizerVariable(w.getDim()); /** Add wv */
}
- return status;
}
double Adam::getLearningRate(int iteration) {
return ll;
}
-void Adam::apply_gradient(Weight &weight, int tensor_idx, double updated_lr,
- int iteration) {
+void Adam::apply_gradient(Weight &weight, double updated_lr, int iteration) {
Tensor &x = weight.getVariableRef();
const Tensor &x_grad = weight.getGradientRef();
* @copydoc apply_gradient(Weight &weight, int tensor_idx, double updated_lr,
* int iteration)
*/
- void apply_gradient(Weight &weight, int tensor_idx, double updated_lr,
+ void apply_gradient(Weight &weight, double updated_lr,
int iteration);
/**
void setProperty(const PropertyType type, const std::string &value = "");
/**
- * @copydoc Optimizer::initialize(std::vector<Weight> params, bool setTensor)
+ * @copydoc Optimizer::addOptimizerVariable(std::vector<Weight> ¶ms)
*/
- int initialize(std::vector<Weight> ¶ms, bool setTensor);
+ void addOptimizerVariable(std::vector<Weight> ¶ms);
/**
* @brief get beta1
namespace nntrainer {
-int Optimizer::initialize(std::vector<Weight> &weight_list, bool set_tensor) {
+int Optimizer::initialize() {
return ML_ERROR_NONE;
}
double ll = getLearningRate(iteration);
- int idx = 0;
for (auto &weight : weight_list) {
if (!weight.getTrainable())
continue;
- apply_gradient(weight, idx, ll, iteration);
- idx += 1;
+ apply_gradient(weight, ll, iteration);
}
}
/** Allow layer to initialize optimizer with itself */
friend class Layer;
+ friend class NeuralNetwork;
public:
/**
private:
/**
- * @brief initialize optimizer. Initialize Weight if it is adam
+ * @brief initialize optimizer.
+ * @retval #ML_ERROR_NONE Successful.
+ * @retval #ML_ERROR_INVALID_PARAMETER invalid parameter.
+ */
+ virtual int initialize();
+
+ /**
+ * @brief Add extra variables per weight if the optimizer needs any.
* @param[in] params Weight list
- * @param[in] num_weights size of the array
- * @param[in] setTensor true if the layer need weight update.
- * Input Layer and Batch Normalization layer won't need it.
- * Therefore, it sets false.
* @retval #ML_ERROR_NONE Successful.
* @retval #ML_ERROR_INVALID_PARAMETER invalid parameter.
*/
- virtual int initialize(std::vector<Weight> ¶ms, bool setTensor);
+ virtual void addOptimizerVariable(std::vector<Weight> ¶ms) {}
/**
* @brief apply gradient to the given weight
* @param[in] weight Weight and gradient set to be updated
- * @param[in] tensor_idx Idx of this tensor in the tensors list
* @param[in] num_weights size of the array
* @param[in] iteration nth epoch number
* @note weight which is called upon can be assumed to be trainable
*/
- virtual void apply_gradient(Weight &weight, int tensor_idx, double updated_lr,
+ virtual void apply_gradient(Weight &weight, double updated_lr,
int iteration) = 0;
};
const std::string SGD::type = "sgd";
-void SGD::apply_gradient(Weight &weight, int tensor_idx, double updated_lr,
+void SGD::apply_gradient(Weight &weight, double updated_lr,
int iteration) {
Tensor &x = weight.getVariableRef();
const Tensor &x_grad = weight.getGradientRef();
SGD(float lr = 0.0001f, Args... args) : Optimizer(lr, args...) {}
/**
- * @copydoc apply_gradient(Weight &weight, int tensor_idx, double updated_lr,
+ * @copydoc apply_gradient(Weight &weight, double updated_lr,
* int iteration)
*/
- void apply_gradient(Weight &weight, int tensor_idx, double updated_lr,
+ void apply_gradient(Weight &weight, double updated_lr,
int iteration);
/**