axes_to_reduce.push_back(i);
}
- setNumWeights(4);
- weightAt(BNParams::mu) =
- std::move(Weight(dim, initializers[BNParams::mu], false, "BN:moving_mean"));
- ///@todo shift var to std to save computation
- weightAt(BNParams::var) = std::move(
- Weight(dim, initializers[BNParams::var], false, "BN:moving_variance"));
- weightAt(BNParams::gamma) =
- std::move(Weight(dim, initializers[BNParams::gamma], true, "BN:gamma"));
- weightAt(BNParams::beta) =
- std::move(Weight(dim, initializers[BNParams::beta], true, "BN:beta"));
-
- manager.trackWeights({weightAt(BNParams::mu), weightAt(BNParams::var),
- weightAt(BNParams::gamma), weightAt(BNParams::beta)});
+ weights.clear();
+ if (weights.empty()) {
+ weights.reserve(4);
+ weights.push_back(createWeight(manager, dim, initializers[BNParams::mu], false, "BN::moving_mean"));
+ weights.push_back(createWeight(manager, dim, initializers[BNParams::var], false, "BN::moving_variance"));
+ weights.push_back(createWeight(manager, dim, initializers[BNParams::gamma], true, "BN::gamma"));
+ weights.push_back(createWeight(manager, dim, initializers[BNParams::beta], true, "BN::beta"));
+ } else {
+ for (size_t idx = 0; idx < weights.size(); idx ++)
+ weights[idx].reset(dim, initializers[idx], weights[idx].getTrainable());
+ }
return status;
}
TensorDim(filter_size, in_dim.channel(), kernel_size[0], kernel_size[1]);
TensorDim bias_dim = TensorDim(1, filter_size, 1, 1);
- setNumWeights(2);
- weightAt(ConvParams::weight) =
- Weight(dim, weight_initializer, true, kernelPrefix);
- weightAt(ConvParams::bias) =
- Weight(bias_dim, bias_initializer, true, biasPrefix);
- manager.trackWeights(
- {weightAt(ConvParams::weight), weightAt(ConvParams::bias)});
+ if (weights.empty()) {
+ weights.reserve(2);
+ weights.push_back(createWeight(manager, dim, weight_initializer, true, kernelPrefix));
+ weights.push_back(createWeight(manager, bias_dim, bias_initializer, true, biasPrefix));
+ } else {
+ for (auto &weight : weights)
+ weight.reset(weight.getVariable().getDim(), weight_initializer, true);
+ }
// this output_dim should be the same with dimension of hidden
out_dim.batch(in_dim.batch());
dim.height(input_dim[0].width());
dim.batch(1);
- setNumWeights(2);
- weightAt(FCParams::weight) =
- Weight(dim, weight_initializer, true, "FC:weight");
- weightAt(FCParams::bias) =
- Weight(bias_dim, bias_initializer, true, "FC::bias");
- manager.trackWeights({weightAt(FCParams::weight), weightAt(FCParams::bias)});
+ if (weights.empty()) {
+ weights.reserve(2);
+ weights.push_back(createWeight(manager, dim, weight_initializer, true, "FC:weight"));
+ weights.push_back(createWeight(manager, bias_dim, bias_initializer, true, "FC:bias"));
+ } else {
+ for (auto &weight : weights)
+ weight.reset(weight.getVariable().getDim(), weight_initializer, true);
+ }
return status;
}
int Layer::setOptimizer(std::shared_ptr<Optimizer> opt) {
this->opt = createOptimizer(opt->getType(), *opt);
- return this->opt->initialize(weight_list, num_weights, true);
+ return this->opt->initialize(weights, true);
}
int Layer::checkValidation() {
}
void Layer::copy(std::shared_ptr<Layer> l) {
- setNumWeights(l->num_weights);
- for (unsigned int i = 0; i < num_weights; ++i) {
- weightAt(i) = l->weightAt(i);
- }
+ for(auto const &w : weights)
+ weights.push_back(w.clone());
// TODO: fix this #630
this->opt = l->opt;
}
void Layer::read(std::ifstream &file) {
- for (unsigned int i = 0; i < num_weights; ++i) {
- weightAt(i).getVariableRef().read(file);
+ for (auto &weight : weights) {
+ weight.getVariableRef().read(file);
}
if (opt)
opt->read(file);
}
void Layer::save(std::ofstream &file) {
- for (unsigned int i = 0; i < num_weights; ++i) {
- weightAt(i).getVariableRef().save(file);
+ for (auto &weight : weights) {
+ weight.getVariableRef().save(file);
}
if (opt)
opt->save(file);
}
void Layer::applyGradient(unsigned int iteration) {
- if (trainable && num_weights > 0) {
- opt->apply_gradients(weight_list, num_weights, iteration);
+ if (trainable && !weights.empty()) {
+ opt->apply_gradients(weights, iteration);
}
}
void Layer::printShapeInfo(std::ostream &out) {
for (unsigned int idx = 0; idx < num_inputs; ++idx) {
out << "input " << input_dim[idx];
- for (unsigned int i = 0; i < num_weights; i++)
+ for (unsigned int i = 0; i < weights.size(); i++)
out << "inner" << i << " " << weightAt(i).getVariable().getDim();
}
for (unsigned int idx = 0; idx < num_outputs; ++idx) {
if (flags & PRINT_WEIGHTS) {
out << "======weights: " << std::endl;
- for (unsigned int i = 0; i < num_weights; ++i) {
- out << '[' << weightAt(i).getName() << ']' << std::endl;
- out << weightAt(i).var;
+ for (auto const &weight : weights) {
+ out << '[' << weight.getName() << ']' << std::endl;
+ out << weight.getVariable();
}
}
bias_initializer(bias_initializer_),
flatten(flatten_),
trainable(trainable_),
- num_weights(0),
num_inputs(1),
num_outputs(1) {
input_dim.resize(1);
* @brief get all weights of the layer
* @retval vector of all params
*/
- std::shared_ptr<Weight> getWeights() { return weight_list; }
+ std::vector<Weight> getWeights() { return weights; }
/**
* @brief get if the output of this layer must be flatten
* @exception std::out_of_range for index out of range
*/
Weight &weightAt(const unsigned int position) {
- if (position >= num_weights) {
- throw std::out_of_range("index out of range");
- }
-
- return weight_list.get()[position];
+ return weights[position];
}
/**
*
* @return unsigned int number of weights
*/
- unsigned int getNumWeights() { return num_weights; }
+ unsigned int getNumWeights() { return weights.size(); }
/**
* @brief Set the batch for the layer
bool trainable;
/**
- * @brief reserve memory for @a weight_list and set @a num_weights
- * @exception std::invalid_argument when num_weights is already set and
- * shouldn't be changed again.
+ * @brief weight_list in this layer. This contains all weights of the
+ * layer.
*/
- void setNumWeights(unsigned int psize) {
- if (psize == num_weights)
- return;
-
- if (num_weights > 0) {
- throw std::invalid_argument("param size can't be set once it is set");
- }
-
- num_weights = psize;
- weight_list = std::shared_ptr<Weight>(new Weight[num_weights],
- std::default_delete<Weight[]>());
- }
-
- /**
- * @brief weight_list in this layer. This contains trainable weights of
- * layers.
- */
- std::shared_ptr<Weight> weight_list;
-
- unsigned int num_weights; /**< length of weights.
- This shouldn't be changed
- after initiation
- use setNumWeights() to avoid
- setting parameters twice */
-
- std::vector<std::shared_ptr<Weight>> weights;
+ std::vector<Weight> weights;
/**
* @brief Number of inputs this layer will requries/will operate on
*
* @param ws Weights to be tracked
*/
- void trackWeights(std::vector<Weight> ws) {
+ void trackWeights(std::vector<Weight> &ws) {
weights.reserve(weights.size() + ws.size());
weights.insert(weights.end(), ws.begin(), ws.end());
}
const std::string Adam::type = "adam";
-int Adam::initialize(std::shared_ptr<Weight> weight_list,
- unsigned int num_weights, bool set_tensor) {
+int Adam::initialize(std::vector<Weight> &weight_list, bool set_tensor) {
int status = ML_ERROR_NONE;
weight_mv.clear();
if (set_tensor) {
- for (unsigned int i = 0; i < num_weights; ++i) {
- Weight &w = weight_list.get()[i];
-
+ for (auto const &w : weight_list) {
// TODO: only trainable weights must be sent to optimizer
if (!w.getTrainable())
continue;
void setProperty(const PropertyType type, const std::string &value = "");
/**
- * @copydoc Optimizer::initialize(std::shared_ptr<Weight> params, unsigned int
- num_weights, bool setTensor)
+ * @copydoc Optimizer::initialize(std::vector<Weight> params, bool setTensor)
*/
- int initialize(std::shared_ptr<Weight> params, unsigned int num_weights,
- bool setTensor);
+ int initialize(std::vector<Weight> ¶ms, bool setTensor);
/**
* @copydoc read(std::ifstream &file)
namespace nntrainer {
-int Optimizer::initialize(std::shared_ptr<Weight> weight_list,
- unsigned int num_weights, bool set_tensor) {
+int Optimizer::initialize(std::vector<Weight> &weight_list, bool set_tensor) {
return ML_ERROR_NONE;
}
return ll;
}
-void Optimizer::apply_gradients(std::shared_ptr<Weight> weight_list,
- unsigned int num_weights, int iteration) {
+void Optimizer::apply_gradients(std::vector<Weight> &weight_list, int iteration) {
double ll = getLearningRate(iteration);
int idx = 0;
- for (unsigned int i = 0; i < num_weights; ++i) {
- Weight &weight = weight_list.get()[i];
-
+ for (auto &weight : weight_list) {
if (!weight.getTrainable())
continue;
/**
* @brief apply gradient to weight_list
* @param[in] params Weight list
- * @param[in] num_weights size of the array
* @param[in] iteration nth epoch number
*/
- void apply_gradients(std::shared_ptr<Weight> params, unsigned int num_weights,
- int iteration);
+ void apply_gradients(std::vector<Weight> ¶ms, int iteration);
/**
* @brief Read Training optimizer paramters from file
* @retval #ML_ERROR_NONE Successful.
* @retval #ML_ERROR_INVALID_PARAMETER invalid parameter.
*/
- virtual int initialize(std::shared_ptr<Weight> params,
- unsigned int num_weights, bool setTensor);
+ virtual int initialize(std::vector<Weight> ¶ms, bool setTensor);
/**
* @brief apply gradient to the given weight
*
* @return Cloned copy
*/
- virtual Var_Grad clone() const {
+ Var_Grad clone() const {
Var_Grad vg(*this);
vg.var = std::make_shared<Tensor>(this->var->clone());
vg.grad = std::make_shared<Tensor>(this->grad->clone());
return vg;
};
+ /**
+ * @brief Reset the weight
+ *
+ * @param dim Variable and gradient tensor dimension
+ * @param train If the variable is trainable
+ *
+ * @note New dimension must maintain the shape of the variable
+ */
+
+ void reset (const TensorDim &dim, bool train) {
+ var->reshape(dim);
+ grad->reshape(dim);
+ trainable = train;
+ resetGradient();
+ }
+
protected:
/**
* @brief Get the variable tensor (by reference)
Weight &operator=(Weight &&rhs) = default;
/**
- * @bried Clone the currnet object
+ * @brief Clone the currnet object
*
* @return Cloned copy
*/
- Weight clone() {
+ Weight clone() const {
Weight w(*this);
if (!var->uninitialized())
w.var = std::make_shared<Tensor>(this->var->clone());
return w;
}
+ /**
+ * @brief Reset the weight
+ *
+ * @param dim Variable and gradient tensor dimension
+ * @param init Initializer for the tensor
+ * @param train If the variable is trainable
+ *
+ * @note New dimension must maintain the shape of the variable
+ */
+
+ void reset (const TensorDim &dim, const WeightInitializer init, bool train) {
+ Var_Grad::reset(dim, train);
+ initializeWeight();
+ }
+
private:
WeightInitializer initializer; /**< initializer for this variable */
};
}
void matchUpdatedWeightsGradients() {
- std::shared_ptr<nntrainer::Weight> params = layer.getWeights();
+ std::vector<nntrainer::Weight> params = layer.getWeights();
/** Match gradients and updated weights */
for (int idx = 0; idx < 2; ++idx) {
- matchOutput(params.get()[idx].getGradient(), grad[idx]);
- matchOutput(params.get()[idx].getVariable(), new_w[idx]);
+ matchOutput(params[idx].getGradient(), grad[idx]);
+ matchOutput(params[idx].getVariable(), new_w[idx]);
}
}
matchOutput(result, "tc_fc_1_goldenFCGradientAdam.out");
- nntrainer::Weight *param_data = layer.getWeights().get();
+ auto param_data = layer.getWeights();
nntrainer::Weight ¶m = param_data[0];
nntrainer::Tensor weight = param.getVariable();
EXPECT_NO_THROW(result = *layer.backwarding_with_val(
1, {MAKE_SHARED_TENSOR(derivatives)})[0]);
- nntrainer::Weight *param_data = layer.getWeights().get();
+ auto param_data = layer.getWeights();
const float *weight_grad = param_data[0].getGradient().getData();
const float *bias_grad = param_data[1].getGradient().getData();
EXPECT_NO_THROW(result = *layer.backwarding_with_val(
1, {MAKE_SHARED_TENSOR(derivatives)})[0]);
- nntrainer::Weight *param_data = layer.getWeights().get();
+ auto param_data = layer.getWeights();
const float *weight_grad = param_data[0].getGradient().getData();
const float *bias_grad = param_data[1].getGradient().getData();
result = *layer1.backwarding_with_val(1, {MAKE_SHARED_TENSOR(result2)})[0]);
/** Compare second conv */
- nntrainer::Weight *param_data = layer2.getWeights().get();
+ auto param_data = layer2.getWeights();
const float *weight_grad = param_data[0].getGradient().getData();
const float *bias_grad = param_data[1].getGradient().getData();
matchOutput(bias_grad, "tc_conv2d_int_goldenBias2Grad.out");
/** Compare first conv */
- param_data = layer1.getWeights().get();
+ param_data = layer1.getWeights();
weight_grad = param_data[0].getGradient().getData();
bias_grad = param_data[1].getGradient().getData();
EXPECT_NO_THROW(result = *layer.backwarding_with_val(
1, {MAKE_SHARED_TENSOR(derivatives)})[0]);
- nntrainer::Weight *param_data = layer.getWeights().get();
+ auto param_data = layer.getWeights();
const float *weight_grad = param_data[0].getGradient().getData();
const float *bias_grad = param_data[1].getGradient().getData();