This patch updates manager to use TensorPool for all of its requests.
Other changes are as listed below:
- NetworkGraph now caches the inputs and labels Var_Grads which can
directly by the model before execution.
- setLabels and setInputs in the model have been updated. Further,
manual setting of inputs and labels has been removed.
- Introduce clear in memory pool to clear any allocations and requests
- TensorPool clears any requests made before making more requests in
finalize
- models unittest updated to not pass label when loss is not given in
the model. Passing label without loss in the model now results in error
- Manager updated to use tensor pool for all the memory requests. The
allocation, initialization and deallocation has been correspondingly simplified
Much needed cleanup will be done in the next commit.
Signed-off-by: Parichay Kapoor <pk.kapoor@samsung.com>
[squash commit]
Signed-off-by: Parichay Kapoor <pk.kapoor@samsung.com>
auto allocated = tensor_manager->isAllocated();
if (allocated)
- tensor_manager->deallocateTensors();
+ deallocateTensors();
for (auto iter = cbegin(); iter != cend(); iter++) {
(*iter)->setBatch(batch_size);
}
}
}
+ /// resize input and output spec
tensor_manager->setBatchSize(batch_size);
if (allocated)
- tensor_manager->allocateTensors();
+ allocateTensors();
}
sharedConstTensors NetworkGraph::forwarding(bool training) const {
const std::vector<Var_Grad *> &outputs =
tensor_manager->requestOutputs(gnode, init_context.getOutputDimensions());
+ /**
+ * @note cache the labels and input var_grads to be able to fill them when
+ * running the graph.
+ */
+ if (gnode.getInputConnections().empty())
+ input_list.insert(input_list.end(), inputs.begin(), inputs.end());
+ /** @todo check compatibility of requireLabel() and
+ * getOutputConnections().empty() */
+ if (lnode->requireLabel())
+ label_list.insert(label_list.end(), outputs.begin(), outputs.end());
+
+ /**
+ * @note must use existing properties like name/trainable of run_context to
+ * create the new run_context
+ */
lnode->configureRunContext(
// TODO: update weights spec for trainable based on layer trainable prop
tensor_manager->requestWeights(gnode, init_context.getWeightsSpec()),
/**
* @brief Allocate and initialize the weight variable
*/
- void initializeWeights() { tensor_manager->initializeWeights(); }
+ void initializeWeights() {
+ tensor_manager->initializeWeights(
+ std::get<0>((*(cend() - 1))->getExecutionOrder()));
+ }
/**
* @brief Initialize the inputs/outputs/derivatives/gradients for the layers
* @param[in] training If true, initialize derivates/gradients, else, do not.
*/
void initializeTensors(bool training) {
- tensor_manager->initializeTensors(training);
+ if (!training)
+ tensor_manager->initializeTensors(
+ training, std::get<0>((*(cend() - 1))->getExecutionOrder()));
+ else
+ /** @todo update this to skip non-trainable layers */
+ tensor_manager->initializeTensors(
+ training, std::get<1>((*(cbegin()))->getExecutionOrder()));
}
/**
for (auto const &w : tensor_manager->getWeights()) {
const TensorDim &dim = w->getDim();
std::vector<TensorDim> dims = cb(dim);
- for (auto &dim : dims) {
- w->addOptimizerVariable(dim);
- }
+ w->setOptimizerVariables(tensor_manager->requestWeightOptimizerVariables(
+ dims, w->getName(), TensorLifespan::MAX_LIFESPAN,
+ Tensor::Initializer::ZEROS));
}
}
+ /**
+ * @brief Get the Input List for the graph
+ *
+ * @return const std::vector<Var_Grad *>& lists of inputs
+ */
+ const std::vector<Var_Grad *> &getInputList() { return input_list; };
+
+ /**
+ * @brief Get the Label List for the graph
+ *
+ * @return const std::vector<Var_Grad *>& lists of labels
+ */
+ const std::vector<Var_Grad *> &getLabelList() { return label_list; };
+
private:
std::map<std::string, std::string> sub_in_out; /** This is map to identify
input and output layer name of subgraph */
at the start of the graph */
bool compiled; /**< if the model graph is compiled */
unsigned int batch_size; /**< current batch_size */
+ std::vector<Var_Grad *> label_list; /**< var_grads for the labels */
+ std::vector<Var_Grad *> input_list; /**< var_grads for the inputs */
/**
* @brief topological sort
// initialize optimizer and related variables
if (opt) {
+ /** TODO: update request of optimizer to be of same format as
+ * Layer::requestTensor */
opt->finalize();
std::function<std::vector<TensorDim>(const TensorDim &)> cb =
[this](const TensorDim &dim) {
*/
NeuralNetwork::~NeuralNetwork() { model_graph.reset(); }
-void NeuralNetwork::setLabels(sharedConstTensors label) {
- auto fill_label = [&label](auto const &layer_node) {
- NNTR_THROW_IF(label.size() != layer_node->getNumOutputs(),
- std::invalid_argument)
- << "label size does not match with the layer requirements"
- << " layer: " << layer_node->getName() << " label size: " << label.size()
- << " requirements size: " << layer_node->getNumOutputs();
+static void setLabels(const std::vector<Tensor> &data,
+ const std::vector<Var_Grad *> &label_list) {
- for (unsigned int i = 0; i < layer_node->getNumOutputs(); i++) {
- layer_node->getOutputGradUnsafe(i) = *label[i];
- }
- };
+ NNTR_THROW_IF(!data.empty() && data.size() != label_list.size(),
+ std::invalid_argument)
+ << "label size does not match with the network requirements"
+ << " label size: " << data.size()
+ << " requirements size: " << label_list.size();
- auto clear_label = [](auto const &layer_node) {
- for (unsigned int i = 0; i < layer_node->getNumOutputs(); i++) {
- layer_node->getOutputGradUnsafe(i) = Tensor();
- }
- };
+ /// feed or clear label
+ for (unsigned int idx = 0; idx < data.size(); idx++) {
+ if (data.empty())
+ label_list[idx]->initializeGradient();
+ else if (data.size() == 1)
+ label_list[idx]->initializeGradient(data[0]);
+ else
+ label_list[idx]->initializeGradient(data[idx]);
+ }
+}
+
+static void setInputs(const std::vector<Tensor> &data,
+ const std::vector<Var_Grad *> &input_list) {
+
+ NNTR_THROW_IF(data.size() != input_list.size(), std::invalid_argument)
+ << "input size does not match with the network requirements"
+ << " input size: " << data.size()
+ << " requirements size: " << input_list.size();
/// feed or clear label
- for (auto iter = model_graph.cbegin(); iter != model_graph.cend(); iter++) {
- auto const &lnode = *iter;
- if (lnode->requireLabel()) {
- label.empty() ? clear_label(lnode) : fill_label(lnode);
- }
+ for (unsigned int idx = 0; idx < input_list.size(); idx++) {
+ input_list[idx]->initializeVariable(data[idx]);
+ }
+}
+
+static void setLabels(sharedConstTensors &data,
+ const std::vector<Var_Grad *> &label_list) {
+
+ NNTR_THROW_IF(data.size() > 1 && data.size() != label_list.size(),
+ std::invalid_argument)
+ << "label size does not match with the network requirements"
+ << " label size: " << data.size()
+ << " requirements size: " << label_list.size();
+
+ /// feed or clear label
+ for (unsigned int idx = 0; idx < label_list.size(); idx++) {
+ if (data.empty())
+ label_list[idx]->initializeGradient();
+ else if (data.size() == 1)
+ label_list[idx]->initializeGradient(*data[0]);
+ else
+ label_list[idx]->initializeGradient(*data[idx]);
+ }
+}
+
+static void setInputs(sharedConstTensors &data,
+ const std::vector<Var_Grad *> &input_list) {
+
+ NNTR_THROW_IF(data.size() != input_list.size(), std::invalid_argument)
+ << "input size does not match with the network requirements"
+ << " input size: " << data.size()
+ << " requirements size: " << input_list.size();
+
+ /// feed or clear label
+ for (unsigned int idx = 0; idx < data.size(); idx++) {
+ input_list[idx]->initializeVariable(*data[idx]);
}
}
<< " label_batch: " << label[0]->batch()
<< " target_batch: " << current_batch;
- setLabels(label);
- model_graph.getSortedLayerNode(0)->getInput(0) = *input[0].get();
+ setLabels(label, model_graph.getLabelList());
+ setInputs(input, model_graph.getInputList());
return forwarding(training);
}
* No need to call at first Input Layer (No data to be updated)
*/
void NeuralNetwork::backwarding(sharedConstTensors label, int iteration) {
- setLabels(label);
+ setLabels(label, model_graph.getLabelList());
backwarding(iteration);
}
}
int NeuralNetwork::allocate(bool trainable) {
+ model_graph.deallocateTensors(false);
+
// TODO: directly replace this
model_graph.initializeTensors(trainable);
auto batch_size = std::get<props::TrainingBatchSize>(model_flex_props);
auto &output = last_layer_node->getOutput(0);
- auto &label = last_layer_node->getOutputGrad(0);
- auto &in = first_layer_node->getInput(0);
+ /** @todo use model_graph.getInputDimensions() and getOutputDimensions() */
auto in_dims = first_layer_node->getInputDimensions();
auto label_dims = last_layer_node->getOutputDimensions();
* @param on_epoch_end function that will recieve reference to stat,
* buffer which will be called on the epoch end
*/
- auto run_epoch = [this, &in, &label, &in_dims, &label_dims, batch_size](
+ auto run_epoch = [this, &in_dims, &label_dims, &output, batch_size](
DataBuffer *buffer, bool shuffle,
- auto &&on_iteration_fetch, auto &&on_epoch_end) {
+ auto &&on_iteration_fetch, auto &&on_iteration_update_stat,
+ auto &&on_epoch_end) {
/// @todo managing metrics must be handled here as well!! for now it is
/// handled in individual callbacks
RunStats stat;
/// @todo support partial batch
continue;
}
- /// @todo multiple input support
- in = iteration.getInputsRef().front();
- label = iteration.getLabelsRef().front();
+
+ auto const &labels = iteration.getLabelsRef();
+ setLabels(labels, model_graph.getLabelList());
+ auto const &inputs = iteration.getInputsRef();
+ setInputs(inputs, model_graph.getInputList());
on_iteration_fetch(stat, *buffer);
+ on_iteration_update_stat(stat, {output}, labels);
}
future_iq.get();
on_epoch_end(stat, *buffer);
std::cout << "#" << epoch_idx << "/" << getEpochs();
auto loss = getLoss();
- stat.loss += loss;
- buffer.displayProgress(stat.num_iterations++, loss);
+ buffer.displayProgress(stat.num_iterations, loss);
+ };
+
+ auto update_train_stat = [this](RunStats &stat,
+ const std::vector<Tensor> &outputs,
+ const std::vector<Tensor> &labels) {
+ stat.loss += getLoss();
+ stat.num_iterations++;
};
auto train_epoch_end = [this](RunStats &stat, DataBuffer &buffer) {
<< " - Training Loss: " << stat.loss;
};
- auto eval_for_iteration = [this, &output, &label,
- batch_size](RunStats &stat, DataBuffer &buffer) {
+ auto eval_for_iteration = [this, batch_size](RunStats &stat,
+ DataBuffer &buffer) {
forwarding(false);
- auto model_out = output.argmax();
- auto label_out = label.argmax();
+ };
+
+ auto update_eval_stat = [this, batch_size, &update_train_stat](
+ RunStats &stat, const std::vector<Tensor> &outputs,
+ const std::vector<Tensor> &labels) {
+ auto model_out = outputs[0].argmax();
+ auto label_out = labels[0].argmax();
+
for (unsigned int b = 0; b < batch_size; b++) {
if (model_out[b] == label_out[b])
stat.num_correct_predictions++;
}
- stat.num_iterations++;
- stat.loss += getLoss();
+
+ update_train_stat(stat, outputs, labels);
};
auto eval_epoch_end = [this, batch_size, max_acc = 0.0f,
auto epochs = getEpochs();
for (epoch_idx = epoch_idx + 1; epoch_idx <= epochs; ++epoch_idx) {
- training =
- run_epoch(train_buffer.get(), true, train_for_iteration, train_epoch_end);
+ training = run_epoch(train_buffer.get(), true, train_for_iteration,
+ update_train_stat, train_epoch_end);
if (valid_buffer) {
validation = run_epoch(valid_buffer.get(), false, eval_for_iteration,
- eval_epoch_end);
+ update_eval_stat, eval_epoch_end);
}
std::cout << '\n';
}
if (test_buffer) {
std::cout << "Evaluation with test data...\n";
- testing =
- run_epoch(test_buffer.get(), false, eval_for_iteration, eval_epoch_end);
+ testing = run_epoch(test_buffer.get(), false, eval_for_iteration,
+ update_eval_stat, eval_epoch_end);
}
return status;
~NeuralNetwork();
/**
- * @brief Set labels to the layers which require the label
- * @todo Set label with multiple labels
- * @param label label
- */
- void setLabels(sharedConstTensors label);
-
- /**
* @brief Get Loss from the previous ran batch of data
* @retval loss value
*/
/**
* @brief Allocate and initialize the weight variable
*/
-void Manager::initializeWeights() {
+void Manager::initializeWeights(unsigned int max_exec_order_) {
if (weights_initialized)
return;
if (LAYER_V2) {
- weight_pool.finalize(BasicPlanner(), 0, max_exec_order);
+ weight_pool.finalize(BasicPlanner(), 0, max_exec_order_);
} else {
if (total_weight_size == 0) {
ml_logw(
}
void Manager::allocateGradients() {
- if (LAYER_V2) {
- for (auto &w : weights_v2) {
- w->allocateOptimizerVariables();
- }
- } else {
+ if (!LAYER_V2) {
/** Allocate the source tensors for shared memories */
if (!shared_grad.empty())
shared_grad.allocate();
}
void Manager::deallocateGradients() {
- if (LAYER_V2) {
- for (auto &w : weights_v2) {
- w->deallocateOptimizerVariables();
- }
- } else {
+ if (!LAYER_V2) {
shared_grad.deallocate();
for (auto &l_w : weights) {
for (auto &w : l_w) {
if (!shared_inout.empty())
shared_inout.allocate();
- if (LAYER_V2) {
- for (auto &in : inputs_v2) {
- in->allocateVariable();
- }
- for (auto &out : outputs_v2) {
- out->allocateVariable();
- }
- } else {
+ if (!LAYER_V2) {
for (auto &l_io : in_outs) {
for (auto &io : l_io) {
io->allocateVariable();
void Manager::deallocateInOuts() {
shared_inout.deallocate();
- if (LAYER_V2) {
- for (auto &in : inputs_v2) {
- in->deallocateVariable();
- }
- for (auto &out : outputs_v2) {
- out->deallocateVariable();
- }
- // for (auto &t : tensors_v2) {
- // t->deallocateVariable();
- // }
- } else {
+ if (!LAYER_V2) {
for (auto &l_io : in_outs) {
for (auto &io : l_io) {
io->deallocateVariable();
if (!shared_deriv.empty())
shared_deriv.allocate();
- if (LAYER_V2) {
- for (auto &in : inputs_v2) {
- in->allocateGradient();
- }
- for (auto &out : outputs_v2) {
- out->allocateGradient();
- }
- // for (auto &t : tensors_v2) {
- // t->allocateGradient();
- // }
- } else {
+ if (!LAYER_V2) {
for (auto &l_io : in_outs) {
for (auto &io : l_io) {
io->allocateGradient();
void Manager::deallocateDerivatives() {
shared_deriv.deallocate();
- if (LAYER_V2) {
- for (auto &in : inputs_v2) {
- in->deallocateGradient();
- }
- for (auto &out : outputs_v2) {
- out->deallocateGradient();
- }
- // for (auto &t : tensors_v2) {
- // t->deallocateGradient();
- // }
- } else {
+ if (!LAYER_V2) {
for (auto &l_io : in_outs) {
for (auto &io : l_io) {
io->deallocateGradient();
}
}
-void Manager::initializeTensorsInference() {
+void Manager::initializeTensorsInference(unsigned int max_exec_order_) {
// @todo Do not count memory of the input tensor of the input layer and
// output tensor of the last layer in the estimate of max_shared_inout as it
// is not used
use_first_last = 1 - use_first_last;
}
} else {
- // Inference Mode without optimizations
- for (auto &outs : outputs_v2) {
- outs->initialize(Tensor(), Tensor(), false);
- }
-
- // Inference Mode without optimizations
- // for (auto &ts : tensors_v2) {
- // ts->initialize(Tensor(), Tensor(), false);
- // }
-
- // In inference mode, do not allocate the memory for the input of the first
- // layer. These is the first entry in the in_outs. Inference() will override
- // input tensors of the first layer
- for ([[maybe_unused]] auto &ins : inputs_v2) {
- // as inputs_v2 are only set for input layers, this can be skipped all the
- // way
- continue;
- }
+ tensor_pool.finalize(BasicPlanner(), 0, max_exec_order_);
}
}
-void Manager::initializeTensorsTrain() {
+void Manager::initializeTensorsTrain(unsigned int max_exec_order_) {
// Initialize gradients
initializeGradients();
}
}
} else {
- tensor_pool.finalize(BasicPlanner(), 0, max_exec_order);
-
- // Training Mode without optimizations
- for (auto &outs : outputs_v2) {
- outs->initialize(Tensor(), Tensor(), true);
- }
-
- // Training Mode without optimizations
- // for (auto &ts : tensors_v2) {
- // ts->initialize(Tensor(), Tensor(), true);
- // }
-
- // Training Mode without optimizations
- for (auto &ins : inputs_v2) {
- ins->initialize(Tensor(), Tensor(), true);
- }
+ tensor_pool.finalize(BasicPlanner(), 0, max_exec_order_);
}
}
/**
* @brief Initialize the inputs/outputs/gradients/derivatives for the layer
*/
-void Manager::initializeTensors(bool training) {
+void Manager::initializeTensors(bool training, unsigned int max_exec_order_) {
// If weights not initialized, initialize weights as well
if (!weights_initialized)
- initializeWeights();
+ initializeWeights(max_exec_order_);
+
+ if (tensors_allocated)
+ throw std::invalid_argument("Cannot initialize allocated tensors");
if (tensors_initialized && model_training == training)
return;
model_training = training;
if (model_training)
- initializeTensorsTrain();
+ initializeTensorsTrain(max_exec_order_);
else
- initializeTensorsInference();
+ initializeTensorsInference(max_exec_order_);
tensors_initialized = true;
}
*/
void Manager::deinitializeTensors() {
+ deallocateTensors(false);
+
shared_deriv = Tensor();
shared_inout = Tensor();
shared_grad = Tensor();
*std::max_element(var_exec_order.begin(), var_exec_order.end()));
Tensor *grad = nullptr;
- // TODO: change to enum_class_and
if (std::get<2>(ts) /** need gradient */ &&
- enum_class_or(tspan, TensorLifespan::FORWARD_FUNC_LIFESPAN) !=
- TensorLifespan::FORWARD_FUNC_LIFESPAN)
+ tspan > TensorLifespan::FORWARD_FUNC_LIFESPAN)
grad = tensor_pool.requestTensor(
std::get<0>(ts), /// tensor dim
grad_exec_order, tspan,
Manager::requestInputs(const GraphNode &node,
const std::vector<TensorDim> &inputs_dim,
const std::vector<std::string> &outputs_name) {
+ const auto &exec_order = node.getExecutionOrder();
+ std::vector<unsigned int> var_exec_order(
+ {std::get<0>(exec_order), std::get<2>(exec_order)});
+ std::vector<unsigned int> grad_exec_order(
+ {std::get<1>(exec_order), std::get<2>(exec_order)});
+ max_exec_order =
+ std::max(max_exec_order,
+ *std::max_element(var_exec_order.begin(), var_exec_order.end()));
- auto const &tspan = TensorLifespan::ITERATION_LIFESPAN;
- std::vector<Var_Grad *> ret;
+ TensorLifespan var_ls = TensorLifespan::ITERATION_LIFESPAN;
+ TensorLifespan grad_ls = TensorLifespan::ITERATION_LIFESPAN;
- if (outputs_name.empty()) {
- unsigned int count = 0;
- std::vector<Var_Grad::Spec> inputs_spec;
+ std::vector<Var_Grad *> ret;
+ size_t current_size = inputs_v2.size();
+
+ for (unsigned int idx = 0; idx < inputs_dim.size(); idx++) {
+ auto const &dim = inputs_dim[idx];
+ Tensor *var = nullptr, *grad = nullptr;
+ if (!outputs_name.empty()) {
+ var = tensor_pool.requestPrerequestedTensor(
+ dim, /// tensor dim
+ var_exec_order, var_ls,
+ outputs_name[idx], /// name
+ Tensor::Initializer::NONE /// tensor initializer
+ );
- std::transform(
- inputs_dim.begin(), inputs_dim.end(), std::back_inserter(inputs_spec),
- [&count, &node, &tspan](auto const &elem) {
- return std::make_tuple(elem, Tensor::Initializer::NONE, true,
- node.getName() + std::string(":input") +
- std::to_string(count++),
- tspan);
- });
+ grad = tensor_pool.requestPrerequestedTensor(
+ dim, /// tensor dim
+ grad_exec_order, grad_ls,
+ outputs_name[idx] + Var_Grad::grad_suffix, /// name
+ Tensor::Initializer::ZEROS /// tensor initializer
+ );
+ } else if (!node.getInputConnections().empty()) {
+ /** skip requesting tensor for input */
+ const std::string &var_name =
+ node.getName() + std::string(":input") + std::to_string(idx);
+ var = tensor_pool.requestTensor(
+ dim, /// tensor dim
+ var_exec_order, var_ls,
+ var_name, /// name
+ Tensor::Initializer::NONE /// tensor initializer
+ );
- ret = requestTensors<Var_Grad>(node, inputs_spec, inputs_v2);
- } else {
- ret.reserve(inputs_dim.size());
-
- /**
- * Find already allocated output which must match the name and dimensions
- */
- for (unsigned int idx = 0; idx < inputs_dim.size(); idx++) {
- auto output_loc = name_map.at(outputs_name.at(idx));
- Var_Grad *vg = outputs_v2.at(output_loc).get();
- if (vg->getDim() != inputs_dim[idx])
- throw std::invalid_argument(
- "Dimension mismatch for the requested input");
- ret.push_back(vg);
+ grad = tensor_pool.requestTensor(
+ dim, /// tensor dim
+ grad_exec_order, grad_ls,
+ var_name + Var_Grad::grad_suffix, /// name
+ Tensor::Initializer::ZEROS /// tensor initializer
+ );
}
- }
-
- const auto &exec_order = node.getExecutionOrder();
- for (auto const &in : ret) {
- auto const &vname = in->getName();
- auto const &gname = in->getGradientName();
-
- /** usage for inputs */
- tensor_exec_order[vname].push_back(std::get<0>(exec_order));
- tensor_exec_order[vname].push_back(std::get<1>(exec_order));
- /** usage for inputs gradients (outgoing derivatives) */
- tensor_exec_order[gname].push_back(std::get<2>(exec_order));
-
- /** set tensor lifespan */
- expandLifespan(vname, tspan);
- expandLifespan(gname, tspan);
+ inputs_v2.emplace_back(std::make_unique<Var_Grad>(var, grad));
}
+ std::transform(inputs_v2.begin() + current_size, inputs_v2.end(),
+ std::back_inserter(ret),
+ [](auto const &elem) { return elem.get(); });
+
return ret;
}
std::vector<Var_Grad *>
Manager::requestOutputs(const GraphNode &node,
const std::vector<TensorDim> &outputs_dim) {
- unsigned int count = 0;
- auto const &tspan = TensorLifespan::ITERATION_LIFESPAN;
- std::vector<Var_Grad::Spec> outputs_spec;
-
- std::transform(
- outputs_dim.begin(), outputs_dim.end(), std::back_inserter(outputs_spec),
- [&count, &node, &tspan](auto const &elem) {
- return std::make_tuple(elem, Tensor::Initializer::NONE, true,
- node.getName() + std::string(":output") +
- std::to_string(count++),
- tspan);
- });
-
- auto ret = requestTensors<Var_Grad>(node, outputs_spec, outputs_v2);
const auto &exec_order = node.getExecutionOrder();
- for (auto const &out : ret) {
- auto const &vname = out->getName();
- auto const &gname = out->getGradientName();
+ std::vector<unsigned int> var_exec_order(
+ {std::get<0>(exec_order), std::get<2>(exec_order)});
+ std::vector<unsigned int> grad_exec_order(
+ {std::get<1>(exec_order), std::get<2>(exec_order)});
+ max_exec_order =
+ std::max(max_exec_order,
+ *std::max_element(var_exec_order.begin(), var_exec_order.end()));
- /** usage for outputs */
- tensor_exec_order[vname].push_back(std::get<0>(exec_order));
+ TensorLifespan var_ls = TensorLifespan::ITERATION_LIFESPAN;
+ TensorLifespan grad_ls = TensorLifespan::ITERATION_LIFESPAN;
- /** usage for outputs gradients (incoming derivatives) */
- tensor_exec_order[gname].push_back(std::get<1>(exec_order));
- tensor_exec_order[gname].push_back(std::get<2>(exec_order));
+ std::vector<Var_Grad *> ret;
+ size_t current_size = outputs_v2.size();
+
+ unsigned int count = 0;
+ for (auto const &dim : std::as_const(outputs_dim)) {
+ const std::string &var_name =
+ node.getName() + std::string(":output") + std::to_string(count++);
+ Tensor *var =
+ tensor_pool.requestTensor(dim, /// tensor dim
+ var_exec_order, var_ls,
+ var_name, /// name
+ Tensor::Initializer::NONE /// tensor initializer
+ );
- /**
- * TODO: below is needed only for activation layer as of now -
- * check if this can be worked around
- */
- tensor_exec_order[vname].push_back(std::get<2>(exec_order));
+ Tensor *grad = nullptr;
+ /** skip requesting tensor for label */
+ if (!node.getOutputConnections().empty())
+ grad = tensor_pool.requestTensor(
+ dim, /// tensor dim
+ grad_exec_order, grad_ls,
+ var_name + Var_Grad::grad_suffix, /// name
+ Tensor::Initializer::ZEROS /// tensor initializer
+ );
- /** set tensor lifespan */
- expandLifespan(vname, tspan);
- expandLifespan(gname, tspan);
+ outputs_v2.emplace_back(std::make_unique<Var_Grad>(var, grad));
}
+ std::transform(outputs_v2.begin() + current_size, outputs_v2.end(),
+ std::back_inserter(ret),
+ [](auto const &elem) { return elem.get(); });
+
return ret;
}
* @brief Create tensors with the given spec
*
* @param node Graph node to extract node identifiers/info
+ * @param tensors_spec Specficiation for the tensors
+ *
+ * @return created tensors list
+ */
+ std::vector<Tensor *> requestWeightOptimizerVariables(
+ const std::vector<TensorDim> &dims, const std::string &name,
+ const TensorLifespan &lifespan,
+ Tensor::Initializer initializer = Tensor::Initializer::NONE) {
+ auto const &exec_order = weight_pool.getExecutionOrder(name);
+
+ std::vector<Tensor *> ret;
+ ret.reserve(dims.size());
+
+ for (unsigned int idx = 0; idx < dims.size(); idx++)
+ ret.push_back(tensor_pool.requestTensor(
+ dims[idx], exec_order, lifespan, name + ":opt" + std::to_string(idx),
+ initializer));
+
+ return ret;
+ }
+
+ /**
+ * @brief Create tensors with the given spec
+ *
+ * @param node Graph node to extract node identifiers/info
* @param inputs_dim Specficiation for the tensors
* @param outputs_name Name of the already requested output tensors
*
* @note This only allocates weights and does not handle training related
* memory for weights
*/
- void initializeWeights();
+ void initializeWeights(unsigned int max_exec_order);
/**
* @brief Reset the manager state
* will require full allocation than reusing memory allocated with inference
* mode.
*/
- void initializeTensors(bool training);
+ void initializeTensors(bool training, unsigned int max_exec_order);
/**
* @brief Check if the manager has allocated tensors
/**
* @brief Initialize the tensors for inference mode
*/
- void initializeTensorsInference();
+ void initializeTensorsInference(unsigned int);
/**
* @brief Initialize the tensors for training mode
*/
- void initializeTensorsTrain();
+ void initializeTensorsTrain(unsigned int);
/**
* @brief Create tensors with the given spec
return *std::max_element(interval_req.begin(), interval_req.end());
}
+void MemoryPool::clear() {
+ if (mem_pool != nullptr)
+ throw std::invalid_argument("Cannot clear allocated memory pool");
+
+ memory_size.clear();
+ memory_validity.clear();
+ memory_offset.clear();
+
+ pool_size = 0;
+ min_pool_size = 0;
+}
+
} // namespace nntrainer
*/
size_t minMemoryRequirement();
+ /**
+ * @brief Clear the memory pool
+ *
+ */
+ void clear();
+
private:
/**
* @brief Validate the provided layout
*/
void TensorPool::finalize(const MemoryPlanner &planner,
unsigned int start_order, unsigned int end_order) {
+ mem_pool.clear();
unsigned int bytes_requested = 0;
for (auto &spec : pool) {
spec.token = 0;
* @note returns empty tensor which will be filled when allocate is called.
* @note we assume that the caller checks if the exec_order and lifespan are
* compatible.
+ *
+ * @note This interface is separated from requestTensor to reduce bugs related
+ * to unintentional tensor sharing.
*/
Tensor *requestPrerequestedTensor(
const TensorDim &dim, const std::vector<unsigned int> &exec_order,
void Var_Grad::initializeVariable(const Tensor &preallocated) {
if (!preallocated.empty()) {
- var->makeSharedDataTensor(preallocated);
+ var = std::make_shared<Tensor>(preallocated);
/** intentionally not initialized tensor memory for shared tensors */
}
}
* Making a new tensor is intentional here as this tensor is not shared
* with other layers but the internal memory is.
*/
- grad->makeSharedDataTensor(preallocated);
+ grad = std::make_shared<Tensor>(preallocated);
/** intentionally not initialized tensor memory for shared tensors */
}
/**
*/
explicit Var_Grad(Tensor *v, Tensor *g) :
var(std::shared_ptr<Tensor>(v, [](void *) {})),
- grad(std::shared_ptr<Tensor>(g, [](void *) {})) {}
+ grad(std::shared_ptr<Tensor>(g, [](void *) {})) {
+ if (!v)
+ var = std::make_shared<Tensor>();
+ if (!g)
+ grad = std::make_shared<Tensor>();
+ }
/**
* @brief Copy constructor for Var_Grad
throw std::invalid_argument("Weight regularizer unknown");
}
-void Weight::initializeGradient(const Tensor &preallocated) {
- // Use self variable to initialize itself
- Var_Grad::initializeGradient(preallocated);
- if (var->isAllocated())
- allocateOptimizerVariables();
-}
-
-void Weight::allocateOptimizerVariables() {
- // Allocate optimizer parameters
- for (auto const &dim : opt_vars_dim) {
- opt_vars.emplace_back(dim);
- opt_vars.back().setZero();
- }
-}
-
} // namespace nntrainer
regularizer_constant(reg_const) {}
/**
- * @copydoc var_grad::initializeGradient(const Tensor &)
- */
- void initializeGradient(const Tensor &preallocated = Tensor());
-
- /**
* @brief Swap for weight
*
* @param lhs Swap to
/**
* @brief Clear optimizer variables
*/
- void clearOptimizerVariables() {
- opt_vars.clear();
- opt_vars_dim.clear();
- }
+ void clearOptimizerVariables() { opt_vars.clear(); }
/**
* @brief Add optimizer variables
* @param dim Optimizer variable dimension
*/
- void addOptimizerVariable(const TensorDim &dim) {
- opt_vars_dim.emplace_back(dim);
- // TODO: Move this out when an optimizer does not initialize with 0.
+ void setOptimizerVariables(std::vector<Tensor *> tensors) {
+ opt_vars = tensors;
}
/**
* @param idx Index of the optimizer variable to get
* @retval Reference of the optimizer variable
*/
- Tensor &getOptimizerVariableRef(unsigned int idx) { return opt_vars[idx]; }
+ Tensor &getOptimizerVariableRef(unsigned int idx) { return *opt_vars[idx]; }
/**
* @brief Allocate and initialize the weight variable, if needed
/**
* @brief Allocate and initialize the weight gradient, if needed
*/
- void allocateGradient() {
- Var_Grad::allocateGradient();
- allocateOptimizerVariables();
- }
+ void allocateGradient() { Var_Grad::allocateGradient(); }
/**
* @brief check if weight regularizer type is l2norm
/**
* @brief Deallocate memory for the gradient of the weight
*/
- void deallocateGradient() {
- Var_Grad::deallocateGradient();
- opt_vars.clear();
- }
+ void deallocateGradient() { Var_Grad::deallocateGradient(); }
/**
* @brief Deallocate the weight gardient and variable
deallocateVariable();
}
- /**
- * @brief Allocate optimizer related variables for the given weights
- */
- void allocateOptimizerVariables();
-
- /**
- * @brief Allocate optimizer related variables for the given weights
- */
- void deallocateOptimizerVariables() { opt_vars.clear(); }
-
private:
WeightRegularizer regularizer; /**< regularizer for this variable */
float regularizer_constant; /**< constant factor for regularization */
- std::vector<Tensor> opt_vars; /**< optimizer variables */
- std::vector<TensorDim> opt_vars_dim; /**< optimizer variables dimensions */
+ std::vector<Tensor *> opt_vars; /**< optimizer variables */
};
} // namespace nntrainer
label_tensor->setRandNormal();
nntrainer::sharedConstTensors label = {label_tensor};
- EXPECT_NO_THROW(nn.forwarding(input, label));
+ if (loss_nodes.size()) {
+ EXPECT_NO_THROW(nn.forwarding(input, label));
+ } else {
+ EXPECT_NO_THROW(nn.forwarding(input, {}));
+ }
if (loss_nodes.size()) {
EXPECT_NO_THROW(nn.backwarding(label, 0));