This PR split the Variable and Gradient Dim in Var_Grad and Weight.
By this way we can set different Variable Type and Gradient in Wegiht.
. add dim_g for gradient in WeightSpec.
. manager need to update to support WeightSpec.
. Create Tensors according to dim_v and dim_g
. Create Weight chaged in Weight.h
Resolves:
**Self evaluation:**
1. Build test: [X]Passed [ ]Failed [ ]Skipped
2. Run test: [X]Passed [ ]Failed [ ]Skipped
Signed-off-by: jijoong.moon <jijoong.moon@samsung.com>
const auto &w_specs = init_context.getWeightsSpec();
for (auto i = 0u; i < w_specs.size(); ++i) {
- shared_weight_names.emplace_back(std::get<7>(w_specs.at(i)));
+ shared_weight_names.emplace_back(std::get<8>(w_specs.at(i)));
}
}
const auto &w_specs = init_context.getWeightsSpec();
for (auto i = 0u; i < w_specs.size(); ++i) {
- shared_weight_names.emplace_back(std::get<7>(w_specs.at(i)));
+ shared_weight_names.emplace_back(std::get<8>(w_specs.at(i)));
}
}
/**
* @brief Request a new weight for the layer
*
- * @param dim dimension of the weight
+ * @param dim_v dimension of Variagble of the weight
+ * @param dim_g dimension of Gradient of the weight
* @param init initializer for the weight
* @param reg regularizer for the weight
* @param reg_const regularization constant for the weight
const WeightRegularizer reg, const float reg_const,
const float decay, const std::string &name,
bool trainable = true, unsigned int out_axis = 3) {
- weights_spec.emplace_back(dim, init, reg, reg_const, decay,
+
+ /** @note : We assumes the gradient type is same with Activation data
+ * type.*/
+ TensorDim dim_g(dim);
+
+ dim_g.setDataType(getActivationDataType());
+
+ weights_spec.emplace_back(dim, dim_g, init, reg, reg_const, decay,
clip_by_global_norm, trainable,
prefix + ":" + name, out_axis, loss_scale);
return weights_spec.size() - 1;
namespace nntrainer {
MMapedMemory::MMapedMemory(size_t size, bool allocate_fd_) :
- fd(-1),
- buf(nullptr),
- buf_size(0),
- allocate_fd(allocate_fd_) {
+ fd(-1), buf(nullptr), buf_size(0), allocate_fd(allocate_fd_) {
#ifndef __ANDROID__
if (allocate_fd) {
size_t current_size = weights_v2.size();
for (unsigned int i = 0; i < weights_spec.size(); ++i) {
- auto &[dim, t_initializer, w_reg, w_reg_const, decay, clip_by_global_norm,
- need_gradient, name, axis] = weights_spec.at(i);
+ auto &[dim_v, dim_g, t_initializer, w_reg, w_reg_const, decay,
+ clip_by_global_norm, need_gradient, name, axis, loss_scale] =
+ weights_spec.at(i);
std::vector<unsigned int> var_exec_order;
for (auto order : default_var_exec_order) {
/// shared_name is used and the orignal name is discarded
const auto &shared_name = shared_names.at(i);
/** case when shared names are given */
- var = weight_pool.requestOrExtend(shared_name, dim, var_exec_order,
+ var = weight_pool.requestOrExtend(shared_name, dim_v, var_exec_order,
var_ls, t_initializer);
if (trainable && need_gradient) {
* for each layer anymore and it is hard to overwritten.
*/
grad = tensor_pool.requestOrExtend(shared_name + Var_Grad::grad_suffix,
- dim, grad_exec_order, grad_ls,
+ dim_g, grad_exec_order, grad_ls,
Tensor::Initializer::ZEROS);
}
} else {
/** case requesting fresh weights */
var =
- weight_pool.request(name, dim, var_exec_order, var_ls, t_initializer);
+ weight_pool.request(name, dim_v, var_exec_order, var_ls, t_initializer);
if (trainable && need_gradient) {
/** is_wgrad is the index which is true when it is the gradient tensor
bool is_wgrad = true;
if (Weight::isGradientClipByGlobalNorm(clip_by_global_norm))
is_wgrad = false;
- grad = tensor_pool.request(name + Var_Grad::grad_suffix, dim,
+ grad = tensor_pool.request(name + Var_Grad::grad_suffix, dim_g,
grad_exec_order, grad_ls,
Tensor::Initializer::ZEROS, is_wgrad);
}
* regularizer_constant, decay, clip gradient constant, need_gradient property,
* name, output axis of the tensor object and loss Scale Factor.
*/
-typedef std::tuple<TensorDim, Tensor::Initializer, WeightRegularizer, float,
- float, float, bool, const std::string, unsigned int, float>
+typedef std::tuple<TensorDim, TensorDim, Tensor::Initializer, WeightRegularizer,
+ float, float, float, bool, const std::string, unsigned int,
+ float>
WeightSpec;
/**
grad = std::make_shared<Tensor>(grad_name);
}
+Var_Grad::Var_Grad(const TensorDim &dim_v, const TensorDim &dim_g,
+ const Tensor::Initializer init, bool need_gradient,
+ bool alloc_now, const std::string &name) :
+ is_dependent(false),
+ is_first_access_gradient(false),
+ is_last_access_gradient(false) {
+ var = std::make_shared<Tensor>(dim_v, alloc_now, init, name);
+
+ std::string grad_name = name + grad_suffix;
+ if (need_gradient)
+ /**
+ * @todo gradient initializer should be none, and then they should be set
+ * zero right before using by the user itself.
+ */
+
+ grad = std::make_shared<Tensor>(dim_g, alloc_now,
+ Tensor::Initializer::ZEROS, grad_name);
+ else
+ grad = std::make_shared<Tensor>(grad_name);
+}
+
void Var_Grad::initializeVariable(const Tensor &preallocated) {
/**
* Making a new tensor is intentional here as this tensor is not shared
bool ng = true, bool alloc_now = false,
const std::string &name = "");
+ /**
+ * @brief Construct a new Var_Grad object
+ *
+ * @param dim_v Variable tensor dimension
+ * @param dim_g Gradient tensor dimension
+ * @param ng If the variable is need_gradient
+ * @param alloc_now The memory for the var_grad tensors be allocated upon init
+ * @param name Name for this Var_Grad
+ */
+ explicit Var_Grad(const TensorDim &dim_v, const TensorDim &dim_g,
+ const Tensor::Initializer init = Tensor::Initializer::NONE,
+ bool ng = true, bool alloc_now = false,
+ const std::string &name = "");
+
/**
* @brief Construct a new Var_Grad object
*
decay(decay_const),
clip_by_global_norm(max_norm),
output_axis(axis),
- loss_scale(loss_scale_);
-{
+ loss_scale(loss_scale_) {
+ if (init == Tensor::Initializer::NONE)
+ throw std::invalid_argument("Weight initializer cannot be none");
+ if (regularizer == WeightRegularizer::UNKNOWN)
+ throw std::invalid_argument("Weight regularizer unknown");
+}
+
+Weight::Weight(const TensorDim &dim_v, const TensorDim &dim_g,
+ const Tensor::Initializer init, const WeightRegularizer reg,
+ const float reg_const, const float decay_const,
+ const float max_norm, bool train, bool alloc_now_,
+ std::string name, unsigned int axis, float loss_scale_) :
+ Var_Grad(dim_v, dim_g, init, train, alloc_now_, name),
+ regularizer(reg),
+ regularizer_constant(reg_const),
+ decay(decay_const),
+ clip_by_global_norm(max_norm),
+ output_axis(axis),
+ loss_scale(loss_scale_) {
if (init == Tensor::Initializer::NONE)
throw std::invalid_argument("Weight initializer cannot be none");
if (regularizer == WeightRegularizer::UNKNOWN)
bool alloc_now = false, std::string name = "", unsigned int axis = 3,
float loss_scale_ = 0.0);
+ /**
+ * @brief Construct a new Weight object
+ *
+ * @param dim_v Variable and gradient tensor dimension
+ * @param dim_g Gradient tensor dimension
+ * @param init Initializer for the weight
+ * @param reg Regularizer for the weight
+ * @param reg_const Constant multiplier for regularizer
+ * @param ng If the variable needs gradient
+ * @param alloc_now The memory for the weight tensors be allocated upon init
+ * @param name Name for this weight
+ */
+ explicit Weight(
+ const TensorDim &dim_v, const TensorDim &dim_g,
+ const Tensor::Initializer init = Tensor::Initializer::XAVIER_UNIFORM,
+ const WeightRegularizer reg = WeightRegularizer::NONE,
+ const float reg_const = 1.0f, const float decay = 0.0f,
+ const float clip_by_global_norm = 0.0f, bool ng = true,
+ bool alloc_now = false, std::string name = "", unsigned int axis = 3,
+ float loss_scale_ = 0.0);
+
/**
* @brief Construct a new Weight object
*
* @param spec Weight specification
*/
explicit Weight(const Spec &spec, bool alloc_now = false) :
- Weight(std::get<0>(spec), // TensorDim
- std::get<1>(spec), // Tensor::Initializer
- std::get<2>(spec), // WeightRegularizer
- std::get<3>(spec), // WeightRegularizerConstant
- std::get<4>(spec), // weight decay constant
- std::get<5>(spec), // MaxNorm for clipping
- std::get<6>(spec), // need_gradient
+ Weight(std::get<0>(spec), // TensorDim for Variable
+ std::get<1>(spec), // TensorDim for Gradient
+ std::get<2>(spec), // Tensor::Initializer
+ std::get<3>(spec), // WeightRegularizer
+ std::get<4>(spec), // WeightRegularizerConstant
+ std::get<5>(spec), // weight decay constant
+ std::get<6>(spec), // MaxNorm for clipping
+ std::get<7>(spec), // need_gradient
alloc_now,
- std::get<7>(spec), // Name
- std::get<8>(spec), // out axis
- std::get<9>(spec) // loss scale
+ std::get<8>(spec), // Name
+ std::get<9>(spec), // out axis
+ std::get<10>(spec) // loss scale
) {}
/**
explicit Weight(Tensor *v, Tensor *g, const WeightRegularizer reg,
const float reg_const, const float decay,
bool is_dependent = false, const float max_norm = 0.0f,
- unsigned int output_axis_ = 3) :
+ unsigned int output_axis_ = 3, float loss_scale_ = 0.0f) :
Var_Grad(v, g, is_dependent),
regularizer(reg),
regularizer_constant(reg_const),
decay(decay),
clip_by_global_norm(max_norm),
- output_axis(output_axis_) {}
+ output_axis(output_axis_),
+ loss_scale(loss_scale_) {}
/**
* @brief Swap for weight
unsigned int output_axis;
float loss_scale;
std::vector<Tensor *> opt_vars; /**< optimizer variables */
+ std::shared_ptr<Tensor> var32;
/**
* @brief Apply the weight decay to the weight
const std::tuple<props::Name, props::Distribute, props::Trainable,
std::vector<props::InputConnection>,
std::vector<props::InputShape>, props::SharedFrom,
- props::ClipGradByGlobalNorm, props::Packed> &props,
+ props::ClipGradByGlobalNorm, props::Packed,
+ props::LossScaleForMixed> &props,
const LayerNode *self) {
createIfNull(tf_node);
tf_node->setLayerNode(*self);
class Activation;
class BatchNormalization;
class Packed;
+class LossScaleForMixed;
} // namespace props
class LayerNode;
*/
template <>
void Exporter::saveTflResult(
-
const std::tuple<props::Name, props::Distribute, props::Trainable,
std::vector<props::InputConnection>,
std::vector<props::InputShape>, props::SharedFrom,
- props::ClipGradByGlobalNorm, props::Packed> &props,
+ props::ClipGradByGlobalNorm, props::Packed,
+ props::LossScaleForMixed> &props,
const LayerNode *self);
class BatchNormalizationLayer;