This patch provides weight cleanup related to the initializer.
Weights dont take care of the initializer but rather let var_grad handle
it which in turn let tensor handle it.
Signed-off-by: Parichay Kapoor <pk.kapoor@samsung.com>
data = std::shared_ptr<float>(src_tensor->tensor()->data,
src_tensor->tensor()->data.get() +
src_tensor->offset());
+ /** as this memory is shared, do NOT initialize */
} else {
/// allocate new memory for the tensor data
data = std::shared_ptr<float>(new float[dim.getDataLen()],
std::default_delete<float[]>());
+ initialize();
}
}
* @note src.data and src.src_tensor CAN co-exist. src.src_tensor is stored
* if the batch size of src is updated and needs reallocation.
*/
- if (src.data)
- dest.data = std::shared_ptr<float>(src.data, src.data.get() + offset);
- else if (!src.src_tensor)
+ dest.data = nullptr;
+ if (src.data) {
+ dest.src_tensor = std::make_shared<SrcSharedTensor>(&src, offset);
+ dest.allocate();
+ } else if (!src.src_tensor)
dest.src_tensor = std::make_shared<SrcSharedTensor>(&src, offset);
else
dest.src_tensor = std::make_shared<SrcSharedTensor>(
name(name) {
var = std::make_shared<Tensor>(dim, alloc_now, init);
if (need_gradient)
+ /**
+ * @todo gradient initializer should be none, and then they should be set
+ * zero right before using by the user itself.
+ */
grad = std::make_shared<Tensor>(dim, alloc_now, Tensor::Initializer::ZEROS);
else
grad = std::make_shared<Tensor>();
void Var_Grad::initializeVariable(const Tensor &preallocated) {
if (!preallocated.empty()) {
var->makeSharedDataTensor(preallocated);
+ /** intentionally not initialized tensor memory for shared tensors */
}
}
* with other layers but the internal memory is.
*/
grad->makeSharedDataTensor(preallocated);
+ /** intentionally not initialized tensor memory for shared tensors */
}
/**
* No need to reset gradient here. With shared memory, each gradient setting
need_gradient = ng;
if (need_gradient && grad->empty()) {
bool alloc_now_ = var->isAllocated();
- grad = std::make_shared<Tensor>(var->getDim(), alloc_now_);
+ grad =
+ std::make_shared<Tensor>(dim, alloc_now_, Tensor::Initializer::ZEROS);
}
}
*
* @note New dimension must maintain the shape of the variable
*/
- void reset(const TensorDim &tdim, bool ng) {
+ void reset(const TensorDim &tdim, Tensor::Initializer init, bool ng) {
dim = tdim;
if (!var->empty())
var->reshape(dim);
+ var->initialize(init);
+
if (!grad->empty())
grad->reshape(dim);
need_gradient = ng;
const WeightRegularizer reg, const float reg_const, bool train,
bool alloc_now_, std::string name) :
Var_Grad(dim, init, train, alloc_now_, name),
- initializer(init),
regularizer(reg),
regularizer_constant(reg_const) {
- if (initializer == Tensor::Initializer::NONE)
- throw std::invalid_argument("Weight initializer unknown");
+ if (init == Tensor::Initializer::NONE)
+ throw std::invalid_argument("Weight initializer cannot be none");
if (regularizer == WeightRegularizer::UNKNOWN)
throw std::invalid_argument("Weight regularizer unknown");
}
-void Weight::initializeVariable(const Tensor &preallocated) {
- Var_Grad::initializeVariable(preallocated);
-
- if (alloc_now)
- runVariableInitializer();
-}
-
-void Weight::runVariableInitializer() {
- Tensor &var_ref = getVariableRef();
- const TensorDim dim = var_ref.getDim();
-
- unsigned int fan_in, fan_out;
-
- /// @fixme: when unit is equal to one, this does not work, we need to rely on
- /// effective dimension then actual numbers here. For now, some heuristics
- /// added to infer what would be fan_in/fan_out
- if (dim.batch() * dim.channel() * dim.height() == 1) {
- fan_out = fan_in = dim.width();
- } else if (dim.batch() * dim.channel() == 1) { /// fully connected layers
- fan_in = dim.height();
- fan_out = dim.width();
- } else { /// convolution filters, @todo extend this to > 4
- auto field_size = dim.height() * dim.width();
-
- // this also handles below cases.
- // 1. fan_in = fan_out = 1 as well.
- // 2. batch == 1, channel == 1 and height == 1, theoretical rank of 1
- fan_in = dim.channel() * field_size;
- fan_out = dim.batch() * field_size;
- }
-
- switch (initializer) {
- case Tensor::Initializer::ZEROS:
- var_ref.setZero();
- break;
- case Tensor::Initializer::ONES:
- var_ref.setValue(1.0f);
- break;
- case Tensor::Initializer::LECUN_NORMAL:
- var_ref.setRandNormal(0.0f, sqrtFloat(1.0f / fan_in));
- break;
- case Tensor::Initializer::XAVIER_NORMAL:
- var_ref.setRandNormal(0.0f, sqrtFloat(2.0f / (fan_in + fan_out)));
- break;
- case Tensor::Initializer::HE_NORMAL:
- var_ref.setRandNormal(0.0f, sqrtFloat(2.0f / (fan_in)));
- break;
- case Tensor::Initializer::LECUN_UNIFORM:
- var_ref.setRandUniform(-1.0f * sqrtFloat(1.0f / fan_in),
- sqrtFloat(1.0f / fan_in));
- break;
- case Tensor::Initializer::XAVIER_UNIFORM:
- var_ref.setRandUniform(-1.0f * sqrtFloat(6.0f / (fan_in + fan_out)),
- sqrtFloat(6.0 / (fan_in + fan_out)));
- break;
- case Tensor::Initializer::HE_UNIFORM:
- var_ref.setRandUniform(-1.0f * sqrtFloat(6.0f / (fan_in)),
- sqrtFloat(6.0 / (fan_in)));
- break;
- default:
- break;
- }
-}
-
void Weight::initializeGradient(const Tensor &preallocated) {
// Use self variable to initialize itself
Var_Grad::initializeGradient(preallocated);
*/
Weight() :
Var_Grad(),
- initializer(Tensor::Initializer::NONE),
regularizer(WeightRegularizer::UNKNOWN),
regularizer_constant(1.0f) {}
*/
explicit Weight(const Tensor &v, const Tensor &g, const std::string &n = "") :
Var_Grad(v, g, n),
- initializer(Tensor::Initializer::XAVIER_UNIFORM),
regularizer(WeightRegularizer::NONE),
regularizer_constant(1.0f) {}
- /**
- * @copydoc var_grad::initializeVariable(const Tensor &)
- */
- void initializeVariable(const Tensor &preallocated = Tensor());
-
/**
* @copydoc var_grad::initializeGradient(const Tensor &)
*/
friend void swap(Weight &lhs, Weight &rhs) noexcept {
using std::swap;
swap(static_cast<Var_Grad &>(lhs), static_cast<Var_Grad &>(rhs));
- swap(lhs.initializer, rhs.initializer);
swap(lhs.regularizer, rhs.regularizer);
}
*/
void reset(const TensorDim &dim, const Tensor::Initializer init,
const WeightRegularizer reg, const float reg_const, bool ng) {
- initializer = init;
regularizer = reg;
regularizer_constant = reg_const;
- Var_Grad::reset(dim, ng);
+ Var_Grad::reset(dim, init, ng);
}
/**
/**
* @brief Allocate and initialize the weight variable, if needed
*/
- void allocateVariable() {
- Var_Grad::allocateVariable();
- runVariableInitializer();
- }
+ void allocateVariable() { Var_Grad::allocateVariable(); }
/**
* @brief Allocate and initialize the weight gradient, if needed
*/
void allocateGradient() {
Var_Grad::allocateGradient();
- resetGradient();
allocateOptimizerVariables();
}
}
private:
- Tensor::Initializer initializer; /**< initializer for this variable */
- WeightRegularizer regularizer; /**< regularizer for this variable */
- float regularizer_constant; /**< constant factor for regularization */
+ WeightRegularizer regularizer; /**< regularizer for this variable */
+ float regularizer_constant; /**< constant factor for regularization */
std::vector<Tensor> opt_vars; /**< optimizer variables */
std::vector<TensorDim> opt_vars_dim; /**< optimizer variables dimensions */
- /**
- * @brief Initialize the weight with the initializer
- */
- void runVariableInitializer();
-
/**
* @brief Allocate optimizer related variables for the given weights
*/