From 04c3da9aa1c2e3171419a02c720c42ac2f815c96 Mon Sep 17 00:00:00 2001 From: Parichay Kapoor Date: Thu, 17 Jun 2021 14:52:27 +0900 Subject: [PATCH] [manager] Memory allocation for non-weight tensors Added memory allocation for non-weight tensors has been added including inputs, outputs and tensors. For now, this does include any optimization except the gradient based optimization. Signed-off-by: Parichay Kapoor --- nntrainer/graph/network_graph.cpp | 2 - nntrainer/tensor/manager.cpp | 144 ++++++++++++++++++++---------- 2 files changed, 98 insertions(+), 48 deletions(-) diff --git a/nntrainer/graph/network_graph.cpp b/nntrainer/graph/network_graph.cpp index 9ae6d707..f369419c 100644 --- a/nntrainer/graph/network_graph.cpp +++ b/nntrainer/graph/network_graph.cpp @@ -752,8 +752,6 @@ int NetworkGraph::initialize(std::shared_ptr manager) { * For input layer, as input dimension is known, set input tensor. */ if (!is_input_node(cur_type, idx)) { - std::string l_pre_type = getSortedLayerNode(idx - 1)->getType(); - auto &input_layers = lnode->getInputLayers(); for (unsigned int i = 0; i < input_layers.size(); ++i) { auto in_layer_node = getLayerNode(input_layers[i]); diff --git a/nntrainer/tensor/manager.cpp b/nntrainer/tensor/manager.cpp index c2c089d0..a34dba70 100644 --- a/nntrainer/tensor/manager.cpp +++ b/nntrainer/tensor/manager.cpp @@ -308,6 +308,7 @@ void Manager::allocateWeights() { } } } + weights_allocated = true; } @@ -326,6 +327,7 @@ void Manager::deallocateWeights() { } } } + weights_allocated = false; } @@ -563,41 +565,68 @@ void Manager::initializeTensorsInference() { * @note Label for the last layer is not initialized in inference. * @note Input for the first layer is not initialized in inference. */ - bool use_first_last = 0; - for (unsigned int idx = 0; idx < in_outs.size(); idx++) { - auto &l_io = in_outs[idx]; - unsigned int offset = 0; - bool is_first_layer = idx == 0; - - // For flatten layer, do not assign new memory - if (idx > 0 && is_flat_type[idx]) + if (!LAYER_V2) { + bool use_first_last = 0; + for (unsigned int idx = 0; idx < in_outs.size(); idx++) { + auto &l_io = in_outs[idx]; + unsigned int offset = 0; + bool is_first_layer = idx == 0; + + // For flatten layer, do not assign new memory + if (idx > 0 && is_flat_type[idx]) + use_first_last = 1 - use_first_last; + + // In inference mode, do not allocate the memory for the input of the + // first layer. These is the first entry in the in_outs. Inference() will + // override input tensors of the first layer + if (is_first_layer) + continue; + + for (auto &io : l_io) { + Tensor shared_inout_cur = Tensor(); + if (enable_inference_inout_memory_opt) { + // if optimized + if (use_first_last) { + // Create tensor with from the front of shared tensor + shared_inout_cur = + shared_inout.getSharedDataTensor(io->getDim(), offset); + } else { + // Create tensor with from the back of shared tensor + shared_inout_cur = shared_inout.getSharedDataTensor( + io->getDim(), + max_shared_inout - io->getDim().getDataLen() - offset); + } + offset += io->getDim().getDataLen(); + } + io->initialize(shared_inout_cur, Tensor(), false); + } use_first_last = 1 - use_first_last; + } + } else { + // Inference Mode without optimizations + for (auto &layer_outs : outputs_v2) { + // TODO:For flatten layer, do not assign new memory + + for (auto &outs : layer_outs) { + outs->initialize(Tensor(), Tensor(), false); + } + } + + // Inference Mode without optimizations + for (auto &layer_ts : tensors_v2) { + for (auto &ts : layer_ts) { + ts->initialize(Tensor(), Tensor(), false); + } + } // In inference mode, do not allocate the memory for the input of the first // layer. These is the first entry in the in_outs. Inference() will override // input tensors of the first layer - if (is_first_layer) + for ([[maybe_unused]] auto &layer_ins : inputs_v2) { + // as inputs_v2 are only set for input layers, this can be skipped all the + // way continue; - - for (auto &io : l_io) { - Tensor shared_inout_cur = Tensor(); - if (enable_inference_inout_memory_opt) { - // if optimized - if (use_first_last) { - // Create tensor with from the front of shared tensor - shared_inout_cur = - shared_inout.getSharedDataTensor(io->getDim(), offset); - } else { - // Create tensor with from the back of shared tensor - shared_inout_cur = shared_inout.getSharedDataTensor( - io->getDim(), - max_shared_inout - io->getDim().getDataLen() - offset); - } - offset += io->getDim().getDataLen(); - } - io->initialize(shared_inout_cur, Tensor(), false); } - use_first_last = 1 - use_first_last; } } @@ -609,28 +638,51 @@ void Manager::initializeTensorsTrain() { if (max_derivative_size > 0 && enable_activation_memory_opt) shared_deriv = Tensor(TensorDim({max_derivative_size}), false); - for (unsigned int idx = 0; idx < in_outs.size(); idx++) { - auto &l_io = in_outs[idx]; - unsigned int offset = 0; - bool is_last_layer = idx == in_outs.size() - 1; + if (!LAYER_V2) { + for (unsigned int idx = 0; idx < in_outs.size(); idx++) { + auto &l_io = in_outs[idx]; + unsigned int offset = 0; + bool is_last_layer = idx == in_outs.size() - 1; + + for (auto &io : l_io) { + // Last layer requires separate memory allocations for output and label + // (deriv) + if (enable_derivative_memory_opt && !is_last_layer) { + // Training Mode with optimizations + if (enable_activation_memory_opt && + (is_rnn_type[idx] || is_act_type[idx])) { + io->initialize( + Tensor(), shared_deriv.getSharedDataTensor(io->getDim(), offset)); + offset += io->getDim().getDataLen(); + } else { + io->initializeShared(); + } - for (auto &io : l_io) { - // Last layer requires separate memory allocations for output and label - // (deriv) - if (enable_derivative_memory_opt && !is_last_layer) { - // Training Mode with optimizations - if (enable_activation_memory_opt && - (is_rnn_type[idx] || is_act_type[idx])) { - io->initialize( - Tensor(), shared_deriv.getSharedDataTensor(io->getDim(), offset)); - offset += io->getDim().getDataLen(); } else { - io->initializeShared(); + // Training Mode without optimizations + io->initialize(Tensor(), Tensor(), true); } + } + } + } else { + // Training Mode without optimizations + for (auto &layer_outs : outputs_v2) { + for (auto &outs : layer_outs) { + outs->initialize(Tensor(), Tensor(), true); + } + } + + // Training Mode without optimizations + for (auto &layer_ts : tensors_v2) { + for (auto &ts : layer_ts) { + ts->initialize(Tensor(), Tensor(), true); + } + } - } else { - // Training Mode without optimizations - io->initialize(Tensor(), Tensor(), true); + // Training Mode without optimizations + for (auto &layer_ins : inputs_v2) { + for (auto &ins : layer_ins) { + ins->initialize(Tensor(), Tensor(), true); } } } -- 2.34.1