}
}
setBatchSize(batch_size);
+ // Allocate and initialize weights
+ manager->initializeWeights();
if (in_place_optimization) {
model_graph.inPlaceOptimize(*manager);
}
- manager->initialize();
-
initialized = true;
return status;
}
int NeuralNetwork::assignMem(bool trainable) {
// TODO: directly replace this
- manager->initializeInOuts(trainable);
+ manager->initializeTensors(trainable);
return ML_ERROR_NONE;
}
max_grad_size = std::max(max_grad_size, grad_size);
}
-/**
- * @brief Allocate and initialize the weight variable
- */
-void Manager::initialize() {
- if (total_weight_size == 0) {
- ml_logw("Nothing done on initialize because there is no weight registered");
- return;
- }
- using AllocFunc = std::function<Tensor(const TensorDim &, size_t)>;
-
+Manager::AllocFunc Manager::getAllocFunc(bool is_weight) {
AllocFunc allocate_none = [](const TensorDim &dim, size_t) {
return Tensor();
};
- AllocFunc allocate_weight = allocate_none;
- AllocFunc allocate_grad = allocate_none;
+ AllocFunc allocate_func = allocate_none;
if (use_shared_memory) {
};
};
- allocate_weight = get_allocfunc(total_weight_size, weight_mmaped_memory);
-
- size_t grad_size =
- enable_gradient_memory_opt ? max_grad_size : total_grad_size;
- allocate_grad = get_allocfunc(grad_size, grad_mmaped_memory);
-
- } else {
+ if (is_weight) {
+ /** For weights */
+ allocate_func = get_allocfunc(total_weight_size, weight_mmaped_memory);
+ } else {
+ /** for gradients */
+ size_t grad_size =
+ enable_gradient_memory_opt ? max_grad_size : total_grad_size;
+ allocate_func = get_allocfunc(grad_size, grad_mmaped_memory);
+ }
+ } else if (!is_weight) {
+ /** only for gradients */
if (max_grad_size > 0 && enable_gradient_memory_opt) {
std::shared_ptr<float> window(new float[max_grad_size],
std::default_delete<float[]>());
- allocate_grad = [window](const TensorDim &dim, size_t offset) {
+ allocate_func = [window](const TensorDim &dim, size_t offset) {
return Tensor::Map(window, dim, offset);
};
}
}
+ return allocate_func;
+}
+
+/**
+ * @brief Allocate and initialize the weight variable
+ */
+void Manager::initializeWeights() {
+ if (total_weight_size == 0) {
+ ml_logw("Nothing done on initialize because there is no weight registered");
+ return;
+ }
+
+ AllocFunc allocate_weight = getAllocFunc(true);
+
size_t weight_offset = 0;
+
+ for (auto &l_w : weights) {
+ for (auto &w : l_w) {
+ Weight &weight = w.get();
+ auto dim = weight.getDim();
+ Tensor weight_prealloc = allocate_weight(dim, weight_offset);
+ Tensor grad_prealloc = Tensor();
+
+ weight_offset += dim.getDataLen();
+ weight.initialize(weight_prealloc, Tensor(), false);
+ }
+ }
+}
+
+/**
+ * @brief Allocate and initialize the weight variable
+ */
+void Manager::initializeGradients() {
+ if (total_weight_size == 0) {
+ ml_logw("Nothing done on initialize because there is no weight registered");
+ return;
+ }
+
+ AllocFunc allocate_grad = getAllocFunc(false);
+
size_t grad_offset = 0;
for (auto &l_w : weights) {
for (auto &w : l_w) {
Weight &weight = w.get();
auto dim = weight.getDim();
- Tensor weight_prealloc = allocate_weight(dim, weight_offset);
- Tensor grad_prealloc =
- weight.getTrainable() ? allocate_grad(dim, grad_offset) : Tensor();
+ Tensor grad_prealloc = Tensor();
+ if (weight.getTrainable())
+ grad_prealloc = allocate_grad(dim, grad_offset);
- weight_offset += dim.getDataLen();
grad_offset += dim.getDataLen();
- weight.initialize(weight_prealloc, grad_prealloc);
+ weight.initializeGrad(grad_prealloc, true);
}
}
}
}
/**
- * @brief Initialize the inputs/outputs for the layer
+ * @brief Initialize the inputs/outputs/gradients/derivatives for the layer
*/
-void Manager::initializeInOuts(bool trainable) {
+void Manager::initializeTensors(bool trainable) {
+ // Allocate gradients
+ if (trainable)
+ initializeGradients();
+
// Allocate shared derivative memory
Tensor shared_deriv;
if (max_derivative_size > 0 && enable_activation_memory_opt && trainable)
/**
* @brief Allocate and initialize the weight variable
+ * @note This only allocates weights and does not handle training related
+ * memory for weights
*/
- void initialize();
+ void initializeWeights();
/**
* @brief Reset the manager state
void untrackLayerInOuts(const std::string &layer_name);
/**
- * @brief Initialize the inputs/outputs for the layers
- * @todo Make initialize() and initializeInOuts() coherent but still separated
- * @param[in] trainable If true, initialize derivates, else, do not.
+ * @brief Initialize the inputs/outputs/derivatives/gradients for the layers
+ * @param[in] trainable If true, initialize derivates/gradients, else, do not.
+ * @note The memory allocation strategy varies based on the trainable. The
+ * memory allocated for inference mode is not compatible with training, and
+ * will require full allocation than reusing memory allocated with inference
+ * mode.
*/
- void initializeInOuts(bool trainable);
+ void initializeTensors(bool trainable);
/**
* @brief Set the batch size for the inputs/outputs of the layers
std::unique_ptr<MMapedMemory> weight_mmaped_memory;
std::unique_ptr<MMapedMemory> grad_mmaped_memory;
+ using AllocFunc = std::function<Tensor(const TensorDim &, size_t)>;
+
/**
* @brief Track the inputs/ouputs of the layer
* @param[in] layer_type Type of the layer
* @param[in] var_name Name of the variable
*/
void untrackVariable(const std::string &var_name);
+
+ /**
+ * @brief Allocate and initialize the weight gradients
+ * @note This only allocates weight's gradients and assumes that weights are
+ * pre-allocated.
+ */
+ void initializeGradients();
+
+ /**
+ * @brief Get helper allocator function to use for weight or gradient
+ * @param[in] is_weight true if weight, else false meaning its gradient
+ */
+ AllocFunc getAllocFunc(bool is_weight);
};
} // namespace nntrainer
}
void Weight::initialize(const Tensor &weights_preallocated,
- const Tensor &grad_preallocated) {
- Var_Grad::initialize(weights_preallocated, grad_preallocated);
+ const Tensor &grad_preallocated, bool gtrain) {
+ Var_Grad::initialize(weights_preallocated, grad_preallocated, gtrain);
+
+ if (gtrain) {
+ // If trainable, allocate optimizer parameters
+ for (auto const &dim : opt_vars_dim) {
+ opt_vars.emplace_back(dim);
+ opt_vars.back().setZero();
+ }
+ }
Tensor &var_ref = getVariableRef();
const TensorDim dim = var_ref.getDim();
}
}
+void Weight::initializeGrad(const Tensor &grad_preallocated, bool gtrain) {
+ // Use self variable to initialize itself
+ Var_Grad::initialize(this->getVariableRef(), grad_preallocated, gtrain);
+
+ if (gtrain) {
+ // If trainable, allocate optimizer parameters
+ for (auto const &dim : opt_vars_dim) {
+ opt_vars.emplace_back(dim);
+ opt_vars.back().setZero();
+ }
+ }
+}
+
} // namespace nntrainer
* @copydoc var_grad::initialize(const Tensor &, const Tensor &)
*/
void initialize(const Tensor &weight_preallocated = Tensor(),
- const Tensor &grad_preallocated = Tensor());
+ const Tensor &grad_preallocated = Tensor(),
+ bool gtrain = true);
+
+ /**
+ * @brief Initialize the gradient for the weight
+ * @param grad_preallocated if initialized, use this tensor for grad
+ * @param gtrain If all the variables should be trainable
+ */
+ void initializeGrad(const Tensor &grad_preallocated = Tensor(),
+ bool gtrain = true);
/**
* @brief Swap for weight
/**
* @brief Clear optimizer variables
*/
- void clearOptimizerVariables() { opt_vars.clear(); }
+ void clearOptimizerVariables() {
+ opt_vars.clear();
+ opt_vars_dim.clear();
+ }
/**
* @brief Add optimizer variables
* @param dim Optimizer variable dimension
*/
void addOptimizerVariable(const TensorDim &dim) {
- opt_vars.emplace_back(dim);
+ opt_vars_dim.emplace_back(dim);
// TODO: Move this out when an optimizer does not initialize with 0.
- opt_vars.back().setZero();
}
/**
private:
WeightInitializer initializer; /**< initializer for this variable */
- std::vector<Tensor> opt_vars; /**< optimizer variables */
+ std::vector<Tensor> opt_vars; /**< optimizer variables */
+ std::vector<TensorDim> opt_vars_dim; /**< optimizer variables dimensions */
};
} // namespace nntrainer
layer.setOutputBuffers(manager.trackLayerOutputs(
layer.getType(), layer.getName(), layer.getOutputDimension()));
- manager.initializeInOuts(true);
- manager.initialize();
+ manager.initializeTensors(true);
return status;
}
EXPECT_EQ(status, ML_ERROR_NONE);
EXPECT_NO_THROW(opt->addOptimizerVariable(layer.getWeightsRef()));
+ manager.initializeTensors(true);
return status;
}
label =
MAKE_SHARED_TENSOR(nntrainer::Tensor(layer.getOutputDimension()[0]));
- std::vector<nntrainer::Tensor> v;
-
- for (unsigned int i = 0; i < layer.getNumWeights(); ++i) {
- v.push_back(layer.weightAt(i).getVariable());
- }
-
- loadFile("tc_fc_1_FCLayer.in", in);
- loadFile("tc_fc_1_FCKernel.in", v);
- loadFile("tc_fc_1_FCLabel.in", *label);
layers.clear();
return status;
manager.trackLayerOutputs(act_layer->getType(), act_layer->getName(),
act_layer->getOutputDimension()));
- manager.initializeInOuts(true);
+ manager.initializeTensors(true);
layers.push_back(act_layer);
}
manager.trackLayerOutputs(loss_layer->getType(), loss_layer->getName(),
loss_layer->getOutputDimension()));
- manager.initializeInOuts(true);
+ manager.initializeTensors(true);
layers.push_back(loss_layer);
if (type == nntrainer::LossType::LOSS_ENTROPY_SOFTMAX) {
}
void matchForwarding(const char *file) {
+ std::vector<nntrainer::Tensor> v;
+ for (unsigned int i = 0; i < layer.getNumWeights(); ++i) {
+ v.push_back(layer.weightAt(i).getVariable());
+ }
+
+ loadFile("tc_fc_1_FCLayer.in", in);
+ loadFile("tc_fc_1_FCKernel.in", v);
+ loadFile("tc_fc_1_FCLabel.in", *label);
+
sharedConstTensor out;
EXPECT_NO_THROW(out =
layer.forwarding_with_val({MAKE_SHARED_TENSOR(in)})[0]);
layer.setOutputBuffers(manager.trackLayerOutputs(
layer.getType(), layer.getName(), layer.getOutputDimension()));
- manager.initializeInOuts(true);
+ manager.initializeTensors(true);
EXPECT_THROW(
layer.forwarding_with_val({MAKE_SHARED_TENSOR(a)}, {MAKE_SHARED_TENSOR(b)}),
std::runtime_error);
layer.setOutputBuffers(manager.trackLayerOutputs(
layer.getType(), layer.getName(), layer.getOutputDimension()));
- manager.initializeInOuts(true);
+ manager.initializeTensors(true);
EXPECT_THROW(layer.backwarding_with_val({MAKE_SHARED_TENSOR(a)}),
std::runtime_error);
}
layer.setOutputBuffers(manager.trackLayerOutputs(
layer.getType(), layer.getName(), layer.getOutputDimension()));
- manager.initializeInOuts(true);
+ manager.initializeTensors(true);
EXPECT_THROW(
layer.forwarding_with_val({MAKE_SHARED_TENSOR(a)}, {MAKE_SHARED_TENSOR(b)}),
std::runtime_error);
layer.setOutputBuffers(manager.trackLayerOutputs(
layer.getType(), layer.getName(), layer.getOutputDimension()));
- manager.initializeInOuts(true);
+ manager.initializeTensors(true);
EXPECT_THROW(layer.backwarding_with_val({MAKE_SHARED_TENSOR(a)}),
std::runtime_error);
}
layer.getType(), layer.getName(), layer.getInputDimension()));
layer.setOutputBuffers(manager.trackLayerOutputs(
layer.getType(), layer.getName(), layer.getOutputDimension()));
- manager.initializeInOuts(true);
+ manager.initializeTensors(true);
nntrainer::Tensor result;
EXPECT_NO_THROW(result =
layer.setOutputBuffers(manager.trackLayerOutputs(
layer.getType(), layer.getName(), layer.getOutputDimension()));
- manager.initializeInOuts(true);
+ manager.initializeTensors(true);
EXPECT_THROW(layer.forwarding_with_val({input}), std::invalid_argument);
}
layer.setOutputBuffers(manager.trackLayerOutputs(
layer.getType(), layer.getName(), layer.getOutputDimension()));
- manager.initializeInOuts(true);
+ manager.initializeTensors(true);
EXPECT_THROW(layer.forwarding_with_val({input}), std::runtime_error);
}