$(NNTRAINER_ROOT)/nntrainer/src/flatten_layer.cpp \
$(NNTRAINER_ROOT)/nntrainer/src/model_loader.cpp \
$(NNTRAINER_ROOT)/nntrainer/src/addition_layer.cpp \
- $(NNTRAINER_ROOT)/nntrainer/src/blas_interface.cpp
+ $(NNTRAINER_ROOT)/nntrainer/src/blas_interface.cpp \
+ $(NNTRAINER_ROOT)/nntrainer/src/weight.cpp
NNTRAINER_INCLUDES := $(NNTRAINER_ROOT)/nntrainer/include \
$(NNTRAINER_ROOT)/api \
#include <optimizer.h>
#include <tensor.h>
#include <tensor_dim.h>
+#include <weight.h>
namespace nntrainer {
};
/**
- * @brief Enumeration of Weight Initialization Type
- */
-enum class WeightInitializer {
- WEIGHT_ZEROS, /** Zero initialization */
- WEIGHT_ONES, /** One initialization */
- WEIGHT_LECUN_NORMAL, /** LeCun normal initialization */
- WEIGHT_LECUN_UNIFORM, /** uniform initialization */
- WEIGHT_XAVIER_NORMAL, /** Xavier normal initialization */
- WEIGHT_XAVIER_UNIFORM, /** Xavier uniform initialization */
- WEIGHT_HE_NORMAL, /** He normal initialization */
- WEIGHT_HE_UNIFORM, /** He uniform initialization */
- WEIGHT_UNKNOWN /** Unknown */
-};
-
-/**
* @brief Print Options when printing layer info
*/
typedef enum {
type(LayerType::LAYER_UNKNOWN),
loss(0.0f),
activation_type(ActivationType::ACT_NONE),
- weight_regularizer(),
+ weight_regularizer(WeightRegularizerType::unknown),
+ weight_regularizer_constant(0.0f),
weight_initializer(WeightInitializer::WEIGHT_XAVIER_UNIFORM),
bias_initializer(WeightInitializer::WEIGHT_ZEROS),
flatten(false),
trainable(true),
- param_size(0),
+ num_weights(0),
num_inputs(1),
num_outputs(1) {}
/**
* @brief read layer Weight & Bias data from file
- * @note derived class can call this to get/save updatableParams
+ * @note derived class can call this to get/save weights
* @param[in] file input file stream
*/
virtual void read(std::ifstream &file);
/**
* @brief save layer Weight & Bias data from file
- * @note derived class can call this to get/save updatableParams
+ * @note derived class can call this to get/save weights
* @param[in] file output file stream
*/
virtual void save(std::ofstream &file);
* @brief set weight decay parameters
* @param[in] w struct for weight decay
*/
- // void setWeightRegularizer(WeightRegularizerParam w) {
- // weight_regularizer = w;
- // }
+ void setWeightRegularizer(WeightRegularizerType type) {
+ weight_regularizer = type;
+ }
/**
* @brief set Weight Initialization Type
void setTrainable(bool train) { trainable = train; }
/**
- * @brief get updatable params of all
+ * @brief get all weights of the layer
* @retval vector of all params
*/
- std::shared_ptr<UpdatableParam> getParams() { return params; }
+ std::shared_ptr<Weight> getWeights() { return weight_list; }
/**
* @brief get if the output of this layer must be flatten
* @brief get data alias at param position.
* @exception std::out_of_range for index out of range
*/
- UpdatableParam ¶msAt(const unsigned int position) {
- if (position >= param_size) {
+ Weight &weightAt(const unsigned int position) {
+ if (position >= num_weights) {
throw std::out_of_range("index out of range");
}
- return params.get()[position];
+ return weight_list.get()[position];
}
protected:
* @return bool is weightdecay type is L2 Norm
*/
bool isWeightRegularizerL2Norm() {
- return weight_regularizer.type == WeightRegularizerType::l2norm;
+ return weight_regularizer == WeightRegularizerType::l2norm;
}
/**
* @brief Input Tensor
ActivationType activation_type;
- WeightRegularizerParam weight_regularizer;
+ WeightRegularizerType weight_regularizer;
+
+ float weight_regularizer_constant;
WeightInitializer weight_initializer; /** initializer for weights */
bool trainable;
/**
- * @brief reserve memory for @a params and set @a param_size
- * @exception std::invalid_argument when param_size is already set and
+ * @brief reserve memory for @a weight_list and set @a num_weights
+ * @exception std::invalid_argument when num_weights is already set and
* shouldn't be changed again.
*/
- void setParamSize(unsigned int psize) {
- if (psize == param_size)
+ void setNumWeights(unsigned int psize) {
+ if (psize == num_weights)
return;
- if (param_size > 0) {
+ if (num_weights > 0) {
throw std::invalid_argument("param size can't be set once it is set");
}
- param_size = psize;
- params = std::shared_ptr<UpdatableParam>(
- new UpdatableParam[psize], std::default_delete<UpdatableParam[]>());
+ num_weights = psize;
+ weight_list = std::shared_ptr<Weight>(new Weight[num_weights],
+ std::default_delete<Weight[]>());
}
/**
- * @brief updatable params in this layer. This contains params of layers.
- * @note UpdatableParam has weights and gradients paired.
+ * @brief weight_list in this layer. This contains trainable weights of
+ * layers.
*/
- std::shared_ptr<UpdatableParam> params;
+ std::shared_ptr<Weight> weight_list;
- unsigned int param_size; /**< length of UpdatableParam * params.
+ unsigned int num_weights; /**< length of weights.
This shouldn't be changed
after initiation
- use setParamSize() to avoid
+ use setNumWeights() to avoid
setting parameters twice */
/**
return out;
}
-/**
- * @brief initialize Weight
- * @param[in] w_dim TensorDim
- * @param[in] initializer Weight Initializer
- * @param[out] status Status
- * @retval Tensor Initialized Tensor
- */
-// TODO: move out
-Tensor getInitializedTensor(const TensorDim &w_dim,
- WeightInitializer initializer);
-
} // namespace nntrainer
#endif /* __cplusplus */
*/
void ensureName(std::shared_ptr<Layer> layer, std::string prefix = "");
+ /**
+ * @brief Swap function for the class
+ */
friend void swap(NeuralNetwork &lhs, NeuralNetwork &rhs) {
using std::swap;
#include <memory>
#include <tensor.h>
+#include <weight.h>
namespace nntrainer {
/**
- * @brief UpdatableParam that could be updated thorugh optimizer
- */
-// TODO: move this out from here
-struct UpdatableParam {
- Tensor weight; /**< weight to be updated and used */
- Tensor grad; /**< gradient for the weight */
- std::string name; /**< name of the parameter */
- bool updatable = true; /**< if this param is updatable */
-};
-
-/**
* @brief Enumeration of Optimizer
* 0. SGD
* 1. ADAM
enum class OptType { sgd = 0, adam = 1, unknown = 2 };
/**
- * @brief Enumeration of Weight Decay type
- * 0. L2Norm
- * 1. Regression
- * 2. Unknown (equivalent to none)
- */
-// TODO: move this out of here
-enum class WeightRegularizerType { l2norm = 0, regression = 1, unknown = 2 };
-
-/**
- * @brief type for the Weight Decay hyper-parameter
- */
-typedef struct WeightRegularizerParam_ {
- WeightRegularizerType type;
- float constant;
-
- WeightRegularizerParam_() :
- type(WeightRegularizerType::unknown),
- constant(0.0f) {}
-} WeightRegularizerParam;
-
-/**
* @brief type for the Optimizor to save hyper-parameter
*/
typedef struct _OptParam {
/**
* @brief initialize optimizer. Initialize Weight if it is adam
- * @param[in] params UpdatableParam list
- * @param[in] param_size size of the array
+ * @param[in] params Weight list
+ * @param[in] num_weights size of the array
* @param[in] setTensor true if the layer need weight update.
* Input Layer and Batch Normalization layer won't need it.
* Therefore, it sets false.
* @retval #ML_ERROR_NONE Successful.
* @retval #ML_ERROR_INVALID_PARAMETER invalid parameter.
*/
- int initialize(std::shared_ptr<UpdatableParam> params,
- unsigned int param_size, bool setTensor);
+ int initialize(std::shared_ptr<Weight> params, unsigned int num_weights,
+ bool setTensor);
/**
- * @brief apply gradient to weights
- * @param[in] params array of updatable params.
- * @param[in] param_size size of the array
+ * @brief apply gradient to weight_list
+ * @param[in] params Weight list
+ * @param[in] num_weights size of the array
* @param[in] iteration nth epoch number
*/
- void apply_gradients(std::shared_ptr<UpdatableParam> params,
- unsigned int param_size, int iteration);
+ void apply_gradients(std::shared_ptr<Weight> params, unsigned int num_weights,
+ int iteration);
/**
* @brief Property Enumeration
*/
Tensor &operator=(Tensor &&rhs) noexcept = default;
- void swap(Tensor &lhs, Tensor &rhs) noexcept;
+ friend void swap(Tensor &lhs, Tensor &rhs) noexcept {
+ std::swap(lhs.dim, rhs.dim);
+ std::swap(lhs.data, rhs.data);
+ std::swap(lhs.strides, rhs.strides);
+ std::swap(lhs.is_contiguous, rhs.is_contiguous);
+ }
/**
* @brief Comparison operator overload
* @parma[out] lhs Optimizer
* @parma[in] rhs Optimizer
*/
- void swap(TensorDim &lhs, TensorDim &rhs) noexcept;
+ friend void swap(TensorDim &lhs, TensorDim &rhs) noexcept {
+ std::swap_ranges(std::begin(lhs.dim), std::begin(lhs.dim) + MAXDIM,
+ std::begin(rhs.dim));
+ std::swap(lhs.len, rhs.len);
+ std::swap(lhs.feature_len, rhs.feature_len);
+ }
unsigned int batch() const { return dim[0]; };
unsigned int channel() const { return dim[1]; };
--- /dev/null
+// SPDX-License-Identifier: Apache-2.0-only
+/**
+ * Copyright (C) 2020 Parichay Kapoor <pk.kapoor@samsung.com>
+ *
+ * @file weight.h
+ * @date 22 September 2020
+ * @see https://github.com/nnstreamer/nntrainer
+ * @author Parichay Kapoor <pk.kapoor@samsung.com>
+ * @bug No known bugs except for NYI items
+ * @brief This is Weight Class for Neural Network
+ *
+ */
+
+#ifndef __WEIGHT_H__
+#define __WEIGHT_H__
+
+#include <tensor.h>
+
+namespace nntrainer {
+
+/**
+ * @brief Enumeration of Weight Decay type
+ */
+enum class WeightRegularizerType {
+ l2norm, /** L2 norm regularizer */
+ unknown /** Unknown */
+};
+
+/**
+ * @brief Enumeration of Weight Initialization Type
+ */
+enum class WeightInitializer {
+ WEIGHT_ZEROS, /** Zero initialization */
+ WEIGHT_ONES, /** One initialization */
+ WEIGHT_LECUN_NORMAL, /** LeCun normal initialization */
+ WEIGHT_LECUN_UNIFORM, /** uniform initialization */
+ WEIGHT_XAVIER_NORMAL, /** Xavier normal initialization */
+ WEIGHT_XAVIER_UNIFORM, /** Xavier uniform initialization */
+ WEIGHT_HE_NORMAL, /** He normal initialization */
+ WEIGHT_HE_UNIFORM, /** He uniform initialization */
+ WEIGHT_UNKNOWN /** Unknown */
+};
+
+/**
+ * @class Weight
+ * @brief Weight with gradient, and its corresponding trainable property
+ */
+class Weight {
+
+ /** Declare layers as friend to get variable/gradient reference */
+ friend class Layer;
+ friend class Conv2DLayer;
+ friend class FullyConnectedLayer;
+ friend class BatchNormalizationLayer;
+
+ /** Declare opitmizer as friend to get variable/gradient reference */
+ friend class Optimizer;
+
+public:
+ /**
+ * @brief Weight default constructor
+ */
+ Weight() : initializer(WeightInitializer::WEIGHT_UNKNOWN), trainable(false) {}
+
+ /**
+ * @brief Construct a new Weight object
+ *
+ * @param dim Variable and gradient tensor dimension
+ * @param init Initializer for the tensor
+ * @param train If the variable is trainable
+ * @param name Name for this weight
+ */
+ Weight(
+ const TensorDim &dim,
+ const WeightInitializer init = WeightInitializer::WEIGHT_XAVIER_UNIFORM,
+ bool train = true, std::string name = "");
+
+ /**
+ * @brief Allocate and initialize the variable
+ *
+ * @param dim Dimension for the variable
+ */
+ void initializeVar(const TensorDim &dim);
+
+ /**
+ * @brief Swap for weight
+ *
+ * @param lhs Swap to
+ * @param rhs Swap from
+ * @note Only swap gradient if trainable
+ */
+ friend void swap(Weight &lhs, Weight &rhs) noexcept {
+ using std::swap;
+
+ swap(lhs.var, rhs.var);
+ swap(lhs.initializer, rhs.initializer);
+ swap(lhs.trainable, rhs.trainable);
+ swap(lhs.grad, rhs.grad);
+ swap(lhs.name, rhs.name);
+ }
+
+ /**
+ * @brief copy assigment
+ *
+ * @param rhs copy from
+ * @return Weight& Updated weight
+ */
+ Weight &operator=(const Weight &rhs) = default;
+
+ /**
+ * @brief move assignment
+ *
+ * @param rhs move from
+ * @return Weight& Updated weight
+ */
+ Weight &operator=(Weight &&rhs) = default;
+
+ /**
+ * @brief Get the TensorDim
+ *
+ * @return TensorDim Dimension
+ */
+ TensorDim getDim() { return var.getDim(); }
+
+ /**
+ * @brief Get if the weight is trainable
+ *
+ * @return true if trainable
+ * @return false is not trainable
+ */
+ bool getTrainable() { return trainable; }
+
+ /**
+ * @brief Get the name of the weight
+ *
+ * @return std::string name
+ */
+ std::string getName() { return name; }
+
+ /**
+ * @brief Get the variable tensor (by name)
+ *
+ * @return Tensor Variable tensor
+ */
+ Tensor getVariable() { return var; }
+
+ /**
+ * @brief Get the Gradient tensor (by name)
+ *
+ * @return Tensor Gradient tensor
+ */
+ Tensor getGradient() { return grad; }
+
+private:
+ /**
+ * @brief Get the variable tensor (by reference)
+ *
+ * @return Tensor Variable tensor
+ */
+ Tensor &getVariableRef() { return var; }
+
+ /**
+ * @brief Get the Gradient tensor (by reference)
+ *
+ * @return Tensor Gradient tensor
+ */
+ Tensor &getGradientRef() { return grad; }
+
+ Tensor var; /**< variable to be updated and used */
+ Tensor grad; /**< gradient for the variable */
+ WeightInitializer initializer; /**< initializer for this variable */
+ bool trainable; /**< if this variable is trainable */
+ std::string name; /**< name of the parameter */
+};
+
+} // namespace nntrainer
+
+#endif /** __WEIGHT_H__ */
nntrainer_sources = [
'src/activation_layer.cpp',
'src/addition_layer.cpp',
+ 'src/blas_interface.cpp',
'src/bn_layer.cpp',
'src/conv2d_layer.cpp',
'src/databuffer.cpp',
'src/tensor.cpp',
'src/tensor_dim.cpp',
'src/util_func.cpp',
- 'src/blas_interface.cpp'
+ 'src/weight.cpp'
]
nntrainer_headers = [
'include/activation_layer.h',
'include/addition_layer.h',
+ 'include/blas_interface.h',
'include/bn_layer.h',
'include/conv2d_layer.h',
'include/databuffer.h',
'include/tensor.h',
'include/tensor_dim.h',
'include/util_func.h',
- 'include/blas_interface.h',
+ 'include/weight.h',
'../api/nntrainer-api-common.h'
]
axes_to_reduce.push_back(i);
}
- Tensor mu =
- getInitializedTensor(dim, initializers[static_cast<int>(BNParams::mu)]);
- Tensor var =
- getInitializedTensor(dim, initializers[static_cast<int>(BNParams::var)]);
-
- Tensor gamma =
- getInitializedTensor(dim, initializers[static_cast<int>(BNParams::gamma)]);
- Tensor beta =
- getInitializedTensor(dim, initializers[static_cast<int>(BNParams::beta)]);
-
- setParamSize(4);
- paramsAt(0) = {std::move(mu), Tensor(), "BN:moving_mean", false};
+ setNumWeights(4);
+ weightAt(0) =
+ std::move(Weight(dim, initializers[static_cast<int>(BNParams::mu)], false,
+ "BN:moving_mean"));
///@todo shift var to std to save computation
- paramsAt(1) = {std::move(var), Tensor(), "BN:moving_variance", false};
- paramsAt(2) = {std::move(gamma), Tensor(gamma.getDim()), "BN:gamma"};
- paramsAt(3) = {std::move(beta), Tensor(beta.getDim()), "BN:beta"};
+ weightAt(1) =
+ std::move(Weight(dim, initializers[static_cast<int>(BNParams::var)], false,
+ "BN:moving_variance"));
+ weightAt(2) = std::move(Weight(
+ dim, initializers[static_cast<int>(BNParams::gamma)], true, "BN:gamma"));
+ weightAt(3) = std::move(Weight(
+ dim, initializers[static_cast<int>(BNParams::beta)], true, "BN:beta"));
return status;
}
}
sharedConstTensor BatchNormalizationLayer::forwarding(sharedConstTensor in) {
- Tensor &mu = paramsAt(static_cast<int>(BNParams::mu)).weight;
- Tensor &var = paramsAt(static_cast<int>(BNParams::var)).weight;
- Tensor &gamma = paramsAt(static_cast<int>(BNParams::gamma)).weight;
- Tensor &beta = paramsAt(static_cast<int>(BNParams::beta)).weight;
+ Tensor &mu = weightAt(static_cast<int>(BNParams::mu)).getVariableRef();
+ Tensor &var = weightAt(static_cast<int>(BNParams::var)).getVariableRef();
+ Tensor &gamma = weightAt(static_cast<int>(BNParams::gamma)).getVariableRef();
+ Tensor &beta = weightAt(static_cast<int>(BNParams::beta)).getVariableRef();
input = *in;
/// @todo change trainable #524
sharedConstTensor
BatchNormalizationLayer::backwarding(sharedConstTensor derivative,
int iteration) {
- Tensor &gamma = paramsAt(static_cast<int>(BNParams::gamma)).weight;
- Tensor &dgamma = paramsAt(static_cast<int>(BNParams::gamma)).grad;
- Tensor &dbeta = paramsAt(static_cast<int>(BNParams::beta)).grad;
+ Tensor &gamma = weightAt(static_cast<int>(BNParams::gamma)).getVariableRef();
+ Tensor &dgamma = weightAt(static_cast<int>(BNParams::gamma)).getGradientRef();
+ Tensor &dbeta = weightAt(static_cast<int>(BNParams::beta)).getGradientRef();
Tensor dx_normalized;
Tensor deriv = *derivative;
Tensor dx = dx_2.multiply(dx_1);
dx.divide_i(N);
- opt.apply_gradients(params, param_size, iteration);
+ opt.apply_gradients(weight_list, num_weights, iteration);
return MAKE_SHARED_TENSOR(std::move(dx));
}
std::string kernelPrefix = "Conv2d:filter";
std::string biasPrefix = "Conv2d:bias";
- setParamSize(filter_size * 2);
+ setNumWeights(filter_size * 2);
for (unsigned int i = 0; i < filter_size; ++i) {
- Tensor Knl = getInitializedTensor(dim, weight_initializer);
- NN_RETURN_STATUS();
-
- Tensor bias = getInitializedTensor(bias_dim, bias_initializer);
- NN_RETURN_STATUS();
-
- Tensor delK(dim);
- delK.setZero();
-
- Tensor delBias(bias_dim);
- delBias.setZero();
-
/*< @note: order of weight and bias are:
w0 w1 w2 ... w3
*/
- paramsAt(i) = {std::move(Knl), std::move(delK),
- kernelPrefix + std::to_string(i)};
- paramsAt(i + filter_size) = {std::move(bias), std::move(delBias),
- biasPrefix + std::to_string(i)};
+ weightAt(i) = std::move(
+ Weight(dim, weight_initializer, true, kernelPrefix + std::to_string(i)));
+ weightAt(i + filter_size) = std::move(
+ Weight(bias_dim, bias_initializer, true, biasPrefix + std::to_string(i)));
}
// this output_dim should be the same with dimension of hidden
std::vector<float> imkernel(kdim.getFeatureLen() * filter_size);
for (unsigned int i = 0; i < filter_size; ++i) {
- Tensor &filters = paramsAt(i).weight;
+ Tensor &filters = weightAt(i).getVariableRef();
float *d = imkernel.data();
memcpy(&d[i * kdim.getFeatureLen()], filters.getData(),
kdim.getFeatureLen() * sizeof(float));
out.size() * sizeof(float));
for (unsigned int i = 0; i < filter_size; i++) {
- Tensor &bias = paramsAt(i + filter_size).weight;
+ Tensor &bias = weightAt(i + filter_size).getVariableRef();
Tensor tmp(1, 1, hidden.height(), hidden.width());
tmp.setValue(bias.getValue(0, 0, 0, 0));
saxpy(hidden.height() * hidden.width(), 1, tmp.getData(), 1,
}
loss = 0.0f;
- if (weight_regularizer.type == WeightRegularizerType::l2norm) {
+ if (weight_regularizer == WeightRegularizerType::l2norm) {
for (unsigned int i = 0; i < filter_size; ++i) {
- Tensor &weight = paramsAt(i).weight;
- loss += weight_regularizer.constant * 0.5f * (weight.l2norm());
+ Tensor &weight = weightAt(i).getVariableRef();
+ loss += weight_regularizer_constant * 0.5f * (weight.l2norm());
}
loss /= filter_size;
}
same_pad[1] = kernel_size[1] - 1;
for (unsigned int i = 0; i < filter_size; ++i) {
- Tensor &delK = paramsAt(i).grad;
- Tensor &delBias = paramsAt(i + filter_size).grad;
+ Tensor &delK = weightAt(i).getGradientRef();
+ Tensor &delBias = weightAt(i + filter_size).getGradientRef();
delK.setZero();
delBias.setZero();
}
throw std::runtime_error("Backwarding Convolution failed.");
for (unsigned int i = 0; i < filter_size; ++i) {
- Tensor &delK = paramsAt(i).grad;
- Tensor &delBias = paramsAt(i + filter_size).grad;
+ Tensor &delK = weightAt(i).getGradientRef();
+ Tensor &delBias = weightAt(i + filter_size).getGradientRef();
float *del = delK.getData();
unsigned int s = kernel_size[0] * kernel_size[1] * input_dim.channel();
for (unsigned int j = 0; j < ret.channel(); ++j) {
for (unsigned int i = 0; i < filter_size; ++i) {
- Tensor &filters = paramsAt(i).weight;
+ Tensor &filters = weightAt(i).getVariableRef();
for (unsigned int k = 0; k < kernel_size[0] * kernel_size[1]; ++k) {
d[count++] = filters.getData()[j * kernel_size[0] * kernel_size[1] + k];
}
if (trainable) {
// Update K / bias
for (unsigned int i = 0; i < filter_size; ++i) {
- Tensor &delK = paramsAt(i).grad;
- Tensor &filters = paramsAt(i).weight;
+ Tensor &delK = weightAt(i).getGradientRef();
+ Tensor &filters = weightAt(i).getVariableRef();
if (isWeightRegularizerL2Norm()) {
- status = delK.add_i(filters, weight_regularizer.constant);
+ status = delK.add_i(filters, weight_regularizer_constant);
if (status != ML_ERROR_NONE)
throw std::runtime_error("Weight regularization failed");
}
}
- opt.apply_gradients(params, param_size, iteration);
+ opt.apply_gradients(weight_list, num_weights, iteration);
}
return MAKE_SHARED_TENSOR(std::move(strip_pad(ret, padding)));
dim.height(input_dim.width());
dim.batch(1);
- Tensor weight = getInitializedTensor(dim, weight_initializer);
- Tensor bias = getInitializedTensor(bias_dim, bias_initializer);
-
- setParamSize(2);
- paramsAt(0) = {std::move(weight), Tensor(weight.getDim()), "FC:weight"};
- paramsAt(1) = {std::move(bias), Tensor(bias.getDim()), "FC:bias"};
+ setNumWeights(2);
+ weightAt(0) = std::move(Weight(dim, weight_initializer, true, "FC:weight"));
+ weightAt(1) = std::move(Weight(bias_dim, bias_initializer, true, "FC:bias"));
return status;
}
}
sharedConstTensor FullyConnectedLayer::forwarding(sharedConstTensor in) {
- Tensor &weight = paramsAt(static_cast<int>(FCParams::weight)).weight;
- Tensor &bias = paramsAt(static_cast<int>(FCParams::bias)).weight;
+ Tensor &weight =
+ weightAt(static_cast<int>(FCParams::weight)).getVariableRef();
+ Tensor &bias = weightAt(static_cast<int>(FCParams::bias)).getVariableRef();
input = *in;
hidden = input.dot(weight);
hidden.add_i(bias);
- if (weight_regularizer.type == WeightRegularizerType::l2norm) {
- loss = weight_regularizer.constant * 0.5f * (weight.l2norm());
+ if (weight_regularizer == WeightRegularizerType::l2norm) {
+ loss = weight_regularizer_constant * 0.5f * (weight.l2norm());
}
return MAKE_SHARED_TENSOR(hidden);
int iteration) {
unsigned int weight_idx = static_cast<int>(FCParams::weight);
unsigned int bias_idx = static_cast<int>(FCParams::bias);
- Tensor &weight = paramsAt(weight_idx).weight;
- Tensor &djdw = paramsAt(weight_idx).grad;
- Tensor &djdb = paramsAt(bias_idx).grad;
+ Tensor &weight = weightAt(weight_idx).getVariableRef();
+ Tensor &djdw = weightAt(weight_idx).getGradientRef();
+ Tensor &djdb = weightAt(bias_idx).getGradientRef();
Tensor ret = derivative->dot(weight, false, true);
djdb = derivative->sum(0);
djdw = input.dot(*derivative, true, false);
if (isWeightRegularizerL2Norm())
- djdw.add_i(weight, weight_regularizer.constant);
+ djdw.add_i(weight, weight_regularizer_constant);
djdw = djdw.sum(0);
if (trainable) {
- opt.apply_gradients(params, param_size, iteration);
+ opt.apply_gradients(weight_list, num_weights, iteration);
}
return MAKE_SHARED_TENSOR(std::move(ret));
this->opt.setType(opt.getType());
this->opt.setOptParam(opt.getOptParam());
- return this->opt.initialize(params, param_size, true);
+ return this->opt.initialize(weight_list, num_weights, true);
}
int Layer::checkValidation() {
}
void Layer::copy(std::shared_ptr<Layer> l) {
- setParamSize(l->param_size);
- for (unsigned int i = 0; i < l->param_size; ++i) {
- paramsAt(i) = l->paramsAt(i);
+ setNumWeights(l->num_weights);
+ for (unsigned int i = 0; i < num_weights; ++i) {
+ weightAt(i) = l->weightAt(i);
}
}
void Layer::read(std::ifstream &file) {
- for (unsigned int i = 0; i < param_size; ++i) {
- paramsAt(i).weight.read(file);
+ for (unsigned int i = 0; i < num_weights; ++i) {
+ weightAt(i).getVariableRef().read(file);
}
}
void Layer::save(std::ofstream &file) {
- for (unsigned int i = 0; i < param_size; ++i) {
- paramsAt(i).weight.save(file);
+ for (unsigned int i = 0; i < num_weights; ++i) {
+ weightAt(i).getVariableRef().save(file);
}
}
-Tensor getInitializedTensor(const TensorDim &w_dim,
- WeightInitializer initializer) {
- Tensor w = Tensor(w_dim);
-
- if (initializer == WeightInitializer::WEIGHT_UNKNOWN) {
- ml_logw("Warning: Weight Initalization Type is not set. "
- "WEIGHT_XAVIER_NORMAL is used by default");
- initializer = WeightInitializer::WEIGHT_XAVIER_NORMAL;
- }
-
- switch (initializer) {
- case WeightInitializer::WEIGHT_ZEROS:
- w.setZero();
- break;
- case WeightInitializer::WEIGHT_ONES:
- w.setValue(1.0f);
- break;
- case WeightInitializer::WEIGHT_LECUN_NORMAL:
- w.setRandNormal(0.0f, sqrtFloat(1.0f / w_dim.height()));
- break;
- case WeightInitializer::WEIGHT_XAVIER_NORMAL:
- w.setRandNormal(0.0f, sqrtFloat(2.0f / (w_dim.width() + w_dim.height())));
- break;
- case WeightInitializer::WEIGHT_HE_NORMAL:
- w.setRandNormal(0.0f, sqrtFloat(2.0f / (w_dim.height())));
- break;
- case WeightInitializer::WEIGHT_LECUN_UNIFORM:
- w.setRandUniform(-1.0f * sqrtFloat(1.0f / w_dim.height()),
- sqrtFloat(1.0f / w_dim.height()));
- break;
- case WeightInitializer::WEIGHT_XAVIER_UNIFORM:
- w.setRandUniform(-1.0f * sqrtFloat(6.0f / (w_dim.height() + w_dim.width())),
- sqrtFloat(6.0 / (w_dim.height() + w_dim.width())));
- break;
- case WeightInitializer::WEIGHT_HE_UNIFORM:
- w.setRandUniform(-1.0f * sqrtFloat(6.0f / (w_dim.height())),
- sqrtFloat(6.0 / (w_dim.height())));
- break;
- default:
- break;
- }
-
- return w;
-}
-
int Layer::setProperty(std::vector<std::string> values) {
int status = ML_ERROR_NONE;
break;
case PropertyType::weight_regularizer:
if (!value.empty()) {
- weight_regularizer.type =
+ weight_regularizer =
(WeightRegularizerType)parseType(value, TOKEN_WEIGHT_REGULARIZER);
- if (weight_regularizer.type == WeightRegularizerType::unknown) {
+ if (weight_regularizer == WeightRegularizerType::unknown) {
throw std::invalid_argument("[Layer] Unknown Weight decay");
}
}
break;
case PropertyType::weight_regularizer_constant:
if (!value.empty()) {
- status = setFloat(weight_regularizer.constant, value);
+ status = setFloat(weight_regularizer_constant, value);
throw_status(status);
}
break;
void Layer::printShapeInfo(std::ostream &out) {
out << "input " << input_dim;
- for (unsigned int i = 0; i < param_size; i++)
- out << "inner" << i << " " << paramsAt(i).weight.getDim();
+ for (unsigned int i = 0; i < num_weights; i++)
+ out << "inner" << i << " " << weightAt(i).var.getDim();
out << "output " << output_dim;
}
void Layer::printProperties(std::ostream &out) {
out << "Trainable: " << trainable << std::endl;
printIfValid(out, PropertyType::weight_regularizer,
- static_cast<int>(weight_regularizer.type));
+ static_cast<int>(weight_regularizer));
printIfValid(out, PropertyType::weight_regularizer_constant,
- weight_regularizer.constant);
+ weight_regularizer_constant);
}
void Layer::printMetric(std::ostream &out) {
if (flags & PRINT_WEIGHTS) {
out << "======weights: " << std::endl;
- for (unsigned int i = 0; i < param_size; ++i) {
- out << '[' << paramsAt(i).name << ']' << std::endl;
- out << paramsAt(i).weight;
+ for (unsigned int i = 0; i < num_weights; ++i) {
+ out << '[' << weightAt(i).getName() << ']' << std::endl;
+ out << weightAt(i).var;
}
}
return status;
}
-int Optimizer::initialize(std::shared_ptr<UpdatableParam> params,
- unsigned int param_size, bool set_tensor) {
+int Optimizer::initialize(std::shared_ptr<Weight> weight_list,
+ unsigned int num_weights, bool set_tensor) {
int status = ML_ERROR_NONE;
if (type == OptType::adam && set_tensor) {
- UpdatableParam *param_data = params.get();
+ for (unsigned int i = 0; i < num_weights; ++i) {
+ Weight &w = weight_list.get()[i];
- for (unsigned int i = 0; i < param_size; ++i) {
- UpdatableParam ¶m = param_data[i];
-
- if (!param.updatable)
+ // TODO: only trainable weights must be sent to optimizer
+ if (!w.getTrainable())
continue;
- Tensor &weight = param.weight;
- Tensor &grad = param.grad;
- Tensor w = Tensor(weight.getDim());
- w.setZero();
- Tensor g = Tensor(grad.getDim());
- g.setZero();
+ Tensor m = Tensor(w.getDim());
+ m.setZero();
+ Tensor v = Tensor(w.getDim());
+ v.setZero();
std::pair<Tensor, Tensor> p =
- std::pair<Tensor, Tensor>(std::move(w), std::move(g));
+ std::pair<Tensor, Tensor>(std::move(m), std::move(v));
weight_mv.push_back(std::move(p));
}
}
return status;
}
-void Optimizer::apply_gradients(std::shared_ptr<UpdatableParam> params,
- unsigned int param_size, int iteration) {
-
- UpdatableParam *param_data = params.get();
+void Optimizer::apply_gradients(std::shared_ptr<Weight> weight_list,
+ unsigned int num_weights, int iteration) {
double ll = popt.learning_rate;
}
int idx = 0;
- for (unsigned int i = 0; i < param_size; ++i) {
- UpdatableParam ¶m = param_data[i];
+ for (unsigned int i = 0; i < num_weights; ++i) {
+ Weight &weight = weight_list.get()[i];
- if (!param.updatable)
+ if (!weight.getTrainable())
continue;
- Tensor &x = param.weight;
- const Tensor &x_grad = param.grad;
+ Tensor &x = weight.getVariableRef();
+ const Tensor &x_grad = weight.getGradientRef();
switch (type) {
case OptType::sgd:
x.add_i(x_grad, -ll);
}
}
-void Tensor::swap(Tensor &lhs, Tensor &rhs) noexcept {
- std::swap(lhs.dim, rhs.dim);
- std::swap(lhs.data, rhs.data);
- std::swap(lhs.strides, rhs.strides);
- std::swap(lhs.is_contiguous, rhs.is_contiguous);
-}
-
bool Tensor::operator==(const Tensor &rhs) const {
if (this->dim != rhs.dim)
return false;
namespace nntrainer {
TensorDim &TensorDim::operator=(const TensorDim &rhs) {
+ using std::swap;
+
TensorDim tmp(rhs.batch(), rhs.channel(), rhs.height(), rhs.width());
- this->swap(*this, tmp);
+ swap(*this, tmp);
return *this;
}
TensorDim &TensorDim::operator=(TensorDim &&rhs) noexcept {
- this->swap(*this, rhs);
- return *this;
-}
+ using std::swap;
-void TensorDim::swap(TensorDim &lhs, TensorDim &rhs) noexcept {
- std::swap_ranges(std::begin(lhs.dim), std::begin(lhs.dim) + MAXDIM,
- std::begin(rhs.dim));
- std::swap(lhs.len, rhs.len);
- std::swap(lhs.feature_len, rhs.feature_len);
+ swap(*this, rhs);
+ return *this;
}
void TensorDim::resetLen() {
--- /dev/null
+// SPDX-License-Identifier: Apache-2.0-only
+/**
+ * Copyright (C) 2020 Parichay Kapoor <pk.kapoor@samsung.com>
+ *
+ * @file weight.cpp
+ * @date 22 September 2020
+ * @see https://github.com/nnstreamer/nntrainer
+ * @author Parichay Kapoor <pk.kapoor@samsung.com>
+ * @bug No known bugs except for NYI items
+ * @brief This is Weight Class for Neural Network
+ *
+ */
+
+#include <util_func.h>
+#include <weight.h>
+
+namespace nntrainer {
+
+Weight::Weight(const TensorDim &dim, const WeightInitializer init, bool train,
+ std::string name) :
+ initializer(init),
+ trainable(train),
+ name(name) {
+ if (initializer == WeightInitializer::WEIGHT_UNKNOWN)
+ throw std::invalid_argument("Weight initializer unknown");
+
+ initializeVar(dim);
+ if (trainable) {
+ grad = Tensor(dim);
+ grad.setZero();
+ } else
+ grad = Tensor();
+}
+
+void Weight::initializeVar(const TensorDim &dim) {
+ var = Tensor(dim);
+ switch (initializer) {
+ case WeightInitializer::WEIGHT_ZEROS:
+ var.setZero();
+ break;
+ case WeightInitializer::WEIGHT_ONES:
+ var.setValue(1.0f);
+ break;
+ case WeightInitializer::WEIGHT_LECUN_NORMAL:
+ var.setRandNormal(0.0f, sqrtFloat(1.0f / dim.height()));
+ break;
+ case WeightInitializer::WEIGHT_XAVIER_NORMAL:
+ var.setRandNormal(0.0f, sqrtFloat(2.0f / (dim.width() + dim.height())));
+ break;
+ case WeightInitializer::WEIGHT_HE_NORMAL:
+ var.setRandNormal(0.0f, sqrtFloat(2.0f / (dim.height())));
+ break;
+ case WeightInitializer::WEIGHT_LECUN_UNIFORM:
+ var.setRandUniform(-1.0f * sqrtFloat(1.0f / dim.height()),
+ sqrtFloat(1.0f / dim.height()));
+ break;
+ case WeightInitializer::WEIGHT_XAVIER_UNIFORM:
+ var.setRandUniform(-1.0f * sqrtFloat(6.0f / (dim.height() + dim.width())),
+ sqrtFloat(6.0 / (dim.height() + dim.width())));
+ break;
+ case WeightInitializer::WEIGHT_HE_UNIFORM:
+ var.setRandUniform(-1.0f * sqrtFloat(6.0f / (dim.height())),
+ sqrtFloat(6.0 / (dim.height())));
+ break;
+ default:
+ break;
+ }
+}
+
+} // namespace nntrainer
%{_includedir}/nntrainer/addition_layer.h
%{_includedir}/nntrainer/nntrainer-api-common.h
%{_includedir}/nntrainer/blas_interface.h
+%{_includedir}/nntrainer/weight.h
%{_libdir}/pkgconfig/nntrainer.pc
%files devel-static
void loadUpdatedWeightsGradients(const char *file_uw, const char *file_g) {
for (int idx = 0; idx < 2; ++idx) {
- new_w.push_back(nntrainer::Tensor(layer.paramsAt(idx).weight.getDim()));
- grad.push_back(nntrainer::Tensor(layer.paramsAt(idx).grad.getDim()));
+ new_w.push_back(
+ nntrainer::Tensor(layer.weightAt(idx).getVariable().getDim()));
+ grad.push_back(
+ nntrainer::Tensor(layer.weightAt(idx).getGradient().getDim()));
}
loadFile(file_uw, new_w);
}
void matchUpdatedWeightsGradients() {
- std::shared_ptr<nntrainer::UpdatableParam> params = layer.getParams();
+ std::shared_ptr<nntrainer::Weight> params = layer.getWeights();
/** Match gradients and updated weights */
for (int idx = 0; idx < 2; ++idx) {
- matchOutput(params.get()[idx].grad, grad[idx]);
- matchOutput(params.get()[idx].weight, new_w[idx]);
+ matchOutput(params.get()[idx].getGradient(), grad[idx]);
+ matchOutput(params.get()[idx].getVariable(), new_w[idx]);
}
}
matchOutput(result, "tc_fc_1_goldenFCGradientAdam.out");
- nntrainer::UpdatableParam *param_data = layer.getParams().get();
+ nntrainer::Weight *param_data = layer.getWeights().get();
- nntrainer::UpdatableParam ¶m = param_data[0];
- nntrainer::Tensor &weight = param.weight;
+ nntrainer::Weight ¶m = param_data[0];
+ nntrainer::Tensor weight = param.getVariable();
matchOutput(weight, "tc_fc_1_goldenFCUpdatedWeightAdam.out");
- nntrainer::UpdatableParam &bias_param = param_data[1];
- nntrainer::Tensor &bias = bias_param.weight;
+ nntrainer::Weight &bias_param = param_data[1];
+ nntrainer::Tensor bias = bias_param.getVariable();
matchOutput(bias, "tc_fc_1_goldenFCUpdatedBiasAdam.out");
}
EXPECT_NO_THROW(
result = layer.backwarding(MAKE_SHARED_TENSOR(derivatives), 1).get()[0]);
- nntrainer::UpdatableParam *param_data = layer.getParams().get();
+ nntrainer::Weight *param_data = layer.getWeights().get();
for (unsigned int i = 0; i < filter_size * 2; ++i) {
- nntrainer::UpdatableParam ¶m = param_data[i];
- nntrainer::Tensor &grad = param.grad;
+ nntrainer::Weight ¶m = param_data[i];
+ nntrainer::Tensor grad = param.getGradient();
const float *gdata = grad.getData();
if (i < filter_size) {
for (unsigned int j = 0; j < grad.length(); ++j) {
EXPECT_NO_THROW(
result = layer.backwarding(MAKE_SHARED_TENSOR(derivatives), 1).get()[0]);
- nntrainer::UpdatableParam *param_data = layer.getParams().get();
+ nntrainer::Weight *param_data = layer.getWeights().get();
for (unsigned int i = 0; i < filter_size * 2; ++i) {
- nntrainer::UpdatableParam ¶m = param_data[i];
- nntrainer::Tensor &grad = param.grad;
+ nntrainer::Weight ¶m = param_data[i];
+ nntrainer::Tensor grad = param.getGradient();
const float *gdata = grad.getData();
if (i < filter_size) {
for (unsigned int j = 0; j < grad.length(); ++j) {
std::vector<float> weight_data;
std::vector<float> bias_grad;
std::vector<float> bias_weight;
- nntrainer::UpdatableParam *param_data;
+ nntrainer::Weight *param_data;
nntrainer::Tensor derivatives(2, 3, 5, 5);
}
EXPECT_NO_THROW(
result = layer.backwarding(MAKE_SHARED_TENSOR(derivatives), 1).get()[0]);
- param_data = layer.getParams().get();
+ param_data = layer.getWeights().get();
for (unsigned int i = 0; i < filter_size * 2; ++i) {
- nntrainer::UpdatableParam ¶m = param_data[i];
- nntrainer::Tensor &grad = param.grad;
+ nntrainer::Weight ¶m = param_data[i];
+ nntrainer::Tensor grad = param.getGradient();
const float *gdata = grad.getData();
if (i < filter_size) {
result = layer.backwarding(MAKE_SHARED_TENSOR(derivatives), 1).get()[0]);
}
- param_data = layer.getParams().get();
+ param_data = layer.getWeights().get();
for (unsigned int i = 0; i < filter_size * 2; ++i) {
- nntrainer::UpdatableParam ¶m = param_data[i];
- nntrainer::Tensor &grad = param.grad;
+ nntrainer::Weight ¶m = param_data[i];
+ nntrainer::Tensor grad = param.getGradient();
const float *gdata = grad.getData();
if (i < filter_size) {
std::vector<float> weight_data;
std::vector<float> bias_grad;
std::vector<float> bias_weight;
- nntrainer::UpdatableParam *param_data;
+ nntrainer::Weight *param_data;
nntrainer::Tensor derivatives(1, 12, 24, 24);
result = layer1.backwarding(MAKE_SHARED_TENSOR(result2), 1).get()[0]);
/** Compare second conv */
- param_data = layer2.getParams().get();
+ param_data = layer2.getWeights().get();
filter_size = 12;
grad_data.clear();
bias_grad.clear();
for (unsigned int i = 0; i < filter_size * 2; ++i) {
- nntrainer::UpdatableParam ¶m = param_data[i];
- nntrainer::Tensor &grad = param.grad;
+ nntrainer::Weight ¶m = param_data[i];
+ nntrainer::Tensor grad = param.getGradient();
const float *gdata = grad.getData();
if (i < filter_size) {
matchOutput(bias_grad, "tc_conv2d_int_goldenBias2Grad.out");
/** Compare first conv */
- param_data = layer1.getParams().get();
+ param_data = layer1.getWeights().get();
filter_size = 6;
grad_data.clear();
bias_grad.clear();
for (unsigned int i = 0; i < filter_size * 2; ++i) {
- nntrainer::UpdatableParam ¶m = param_data[i];
- nntrainer::Tensor &grad = param.grad;
+ nntrainer::Weight ¶m = param_data[i];
+ nntrainer::Tensor grad = param.getGradient();
const float *gdata = grad.getData();
if (i < filter_size) {