const std::string Adam::type = "adam";
+enum AdamParams { wm, wv };
+
int Adam::initialize(std::vector<Weight> &weight_list, bool set_tensor) {
int status = ML_ERROR_NONE;
- weight_mv.clear();
if (set_tensor) {
- for (auto const &w : weight_list) {
+ for (auto &w : weight_list) {
+ w.clearOptimizerVariables();
+
// TODO: only trainable weights must be sent to optimizer
if (!w.getTrainable())
continue;
- Tensor m = Tensor(w.getDim());
- m.setZero();
- Tensor v = Tensor(w.getDim());
- v.setZero();
- std::pair<Tensor, Tensor> p =
- std::pair<Tensor, Tensor>(std::move(m), std::move(v));
- weight_mv.push_back(std::move(p));
+ w.addOptimizerVariable(w.getDim()); /** Add wm */
+ w.addOptimizerVariable(w.getDim()); /** Add wv */
}
}
return status;
// This is not deleted intentionally.
// float biasCorrection1 = 1 - pow(beta1, iteration + 1);
// float biasCorrection2 = 1 - pow(beta2, iteration + 1);
- // Tensor &wm = weight_mv[idx].first;
- // Tensor &wv = weight_mv[idx].second;
+ // Tensor &wm = weight.getOptimizerVariableRef(AdamParams::wm);
+ // Tensor &wv = weight.getOptimizerVariableRef(AdamParams::wv);
// wm.multiply_i(beta1);
// wm.add_i(x_grad, 1.0f - beta1);
return 1 / (sqrtDouble(f) + this->epsilon);
};
- Tensor &wm = weight_mv[tensor_idx].first;
- Tensor &wv = weight_mv[tensor_idx].second;
+ Tensor &wm = weight.getOptimizerVariableRef(AdamParams::wm);
+ Tensor &wv = weight.getOptimizerVariableRef(AdamParams::wv);
wm.multiply_i(beta1);
wm.add_i(x_grad, 1.0f - beta1);
throw_status(status);
}
-void Adam::read(std::ifstream &file) {
- /// @todo need strong exception safety guarantee
- Optimizer::read(file);
-
- if (continue_train) {
- for (auto iter = weight_mv.begin(); iter != weight_mv.end(); iter++) {
- (*iter).first.read(file);
- (*iter).second.read(file);
- }
- } else {
- size_t total_size = 0;
- for (auto iter = weight_mv.begin(); iter != weight_mv.end(); iter++)
- total_size += (*iter).first.getSize() + (*iter).second.getSize();
-
- file.seekg(total_size, std::ifstream::cur);
- }
-}
-
-void Adam::save(std::ofstream &file) {
- Optimizer::save(file);
-
- for (auto iter = weight_mv.begin(); iter != weight_mv.end(); iter++) {
- (*iter).first.save(file);
- (*iter).second.save(file);
- }
-}
-
} // namespace nntrainer
int initialize(std::vector<Weight> ¶ms, bool setTensor);
/**
- * @copydoc read(std::ifstream &file)
- */
- void read(std::ifstream &file);
-
- /**
- * @copydoc save(std::ofstream &file)
- */
- void save(std::ofstream &file);
-
- /**
* @brief get beta1
*/
double getBeta1() { return beta1; };
static const std::string type;
private:
- /**
- * @brief Internal Tensors for adam Optimizer
- */
- std::vector<std::pair<Tensor, Tensor>> weight_mv;
double beta1; /** momentum for grad */
double beta2; /** momentum for grad**2 */
*
* @note New dimension must maintain the shape of the variable
*/
-
void reset(const TensorDim &dim, const WeightInitializer init, bool train) {
initializer = init;
Var_Grad::reset(dim, train);
}
+ /**
+ * @brief Clear optimizer variables
+ */
+ void clearOptimizerVariables() { opt_vars.clear(); }
+
+ /**
+ * @brief Add optimizer variables
+ * @param dim Optimizer variable dimension
+ */
+ void addOptimizerVariable(const TensorDim &dim) {
+ opt_vars.emplace_back(dim);
+ opt_vars.back().setZero();
+ }
+
+ /**
+ * @brief Get optimizer variable reference
+ * @param idx Index of the optimizer variable to get
+ * @retval Reference of the optimizer variable
+ */
+ Tensor &getOptimizerVariableRef(unsigned int idx) {
+ return opt_vars[idx];
+ }
+
private:
WeightInitializer initializer; /**< initializer for this variable */
+
+ std::vector<Tensor> opt_vars; /**< optimizer variables */
};
} // namespace nntrainer