[Manager] Manager to use TensorPool for all requests

author Parichay Kapoor <pk.kapoor@samsung.com>

Wed, 25 Aug 2021 08:26:21 +0000 (17:26 +0900)

committer Jijoong Moon <jijoong.moon@samsung.com>

Fri, 1 Oct 2021 02:59:03 +0000 (11:59 +0900)
author Parichay Kapoor <pk.kapoor@samsung.com>
Wed, 25 Aug 2021 08:26:21 +0000 (17:26 +0900)
committer Jijoong Moon <jijoong.moon@samsung.com>
Fri, 1 Oct 2021 02:59:03 +0000 (11:59 +0900)
diff --git a/nntrainer/graph/network_graph.cpp b/nntrainer/graph/network_graph.cpp

index e42bebf..0d3ffc2 100644 (file)
--- a/nntrainer/graph/network_graph.cpp
+++ b/nntrainer/graph/network_graph.cpp
@@ -429,7 +429,7 @@ void NetworkGraph::setBatchSize(unsigned int batch_size) {
    auto allocated = tensor_manager->isAllocated();
  
    if (allocated)
-    tensor_manager->deallocateTensors();
+    deallocateTensors();
  
    for (auto iter = cbegin(); iter != cend(); iter++) {
      (*iter)->setBatch(batch_size);
@@ -446,10 +446,11 @@ void NetworkGraph::setBatchSize(unsigned int batch_size) {
        }
      }
    }
+  /// resize input and output spec
    tensor_manager->setBatchSize(batch_size);
  
    if (allocated)
-    tensor_manager->allocateTensors();
+    allocateTensors();
  }
  
  sharedConstTensors NetworkGraph::forwarding(bool training) const {
@@ -712,6 +713,21 @@ NetworkGraph::finalizeContext(const std::shared_ptr<LayerNode> &lnode,
    const std::vector<Var_Grad *> &outputs =
      tensor_manager->requestOutputs(gnode, init_context.getOutputDimensions());
  
+  /**
+   * @note cache the labels and input var_grads to be able to fill them when
+   * running the graph.
+   */
+  if (gnode.getInputConnections().empty())
+    input_list.insert(input_list.end(), inputs.begin(), inputs.end());
+  /** @todo check compatibility of requireLabel() and
+   * getOutputConnections().empty() */
+  if (lnode->requireLabel())
+    label_list.insert(label_list.end(), outputs.begin(), outputs.end());
+
+  /**
+   * @note must use existing properties like name/trainable of run_context to
+   * create the new run_context
+   */
    lnode->configureRunContext(
      // TODO: update weights spec for trainable based on layer trainable prop
      tensor_manager->requestWeights(gnode, init_context.getWeightsSpec()),
diff --git a/nntrainer/graph/network_graph.h b/nntrainer/graph/network_graph.h

index c3dc96c..dbb372b 100644 (file)
--- a/nntrainer/graph/network_graph.h
+++ b/nntrainer/graph/network_graph.h
@@ -292,14 +292,23 @@ public:
    /**
     * @brief Allocate and initialize the weight variable
     */
-  void initializeWeights() { tensor_manager->initializeWeights(); }
+  void initializeWeights() {
+    tensor_manager->initializeWeights(
+      std::get<0>((*(cend() - 1))->getExecutionOrder()));
+  }
  
    /**
     * @brief Initialize the inputs/outputs/derivatives/gradients for the layers
     * @param[in] training If true, initialize derivates/gradients, else, do not.
     */
    void initializeTensors(bool training) {
-    tensor_manager->initializeTensors(training);
+    if (!training)
+      tensor_manager->initializeTensors(
+        training, std::get<0>((*(cend() - 1))->getExecutionOrder()));
+    else
+      /** @todo update this to skip non-trainable layers */
+      tensor_manager->initializeTensors(
+        training, std::get<1>((*(cbegin()))->getExecutionOrder()));
    }
  
    /**
@@ -336,12 +345,26 @@ public:
      for (auto const &w : tensor_manager->getWeights()) {
        const TensorDim &dim = w->getDim();
        std::vector<TensorDim> dims = cb(dim);
-      for (auto &dim : dims) {
-        w->addOptimizerVariable(dim);
-      }
+      w->setOptimizerVariables(tensor_manager->requestWeightOptimizerVariables(
+        dims, w->getName(), TensorLifespan::MAX_LIFESPAN,
+        Tensor::Initializer::ZEROS));
      }
    }
  
+  /**
+   * @brief Get the Input List for the graph
+   *
+   * @return const std::vector<Var_Grad *>& lists of inputs
+   */
+  const std::vector<Var_Grad *> &getInputList() { return input_list; };
+
+  /**
+   * @brief Get the Label List for the graph
+   *
+   * @return const std::vector<Var_Grad *>& lists of labels
+   */
+  const std::vector<Var_Grad *> &getLabelList() { return label_list; };
+
  private:
    std::map<std::string, std::string> sub_in_out; /** This is map to identify
                     input and output layer name of subgraph */
@@ -353,6 +376,8 @@ private:
                                    at the start of the graph */
    bool compiled;               /**< if the model graph is compiled */
    unsigned int batch_size;     /**< current batch_size */
+  std::vector<Var_Grad *> label_list; /**< var_grads for the labels */
+  std::vector<Var_Grad *> input_list; /**< var_grads for the inputs */
  
    /**
     * @brief     topological sort
diff --git a/nntrainer/models/neuralnet.cpp b/nntrainer/models/neuralnet.cpp

index 820d6a2..9d14eab 100644 (file)
--- a/nntrainer/models/neuralnet.cpp
+++ b/nntrainer/models/neuralnet.cpp
@@ -132,6 +132,8 @@ int NeuralNetwork::initialize() {
  
    // initialize optimizer and related variables
    if (opt) {
+    /** TODO: update request of optimizer to be of same format as
+     * Layer::requestTensor */
      opt->finalize();
      std::function<std::vector<TensorDim>(const TensorDim &)> cb =
        [this](const TensorDim &dim) {
@@ -162,31 +164,71 @@ int NeuralNetwork::initialize() {
   */
  NeuralNetwork::~NeuralNetwork() { model_graph.reset(); }
  
-void NeuralNetwork::setLabels(sharedConstTensors label) {
-  auto fill_label = [&label](auto const &layer_node) {
-    NNTR_THROW_IF(label.size() != layer_node->getNumOutputs(),
-                  std::invalid_argument)
-      << "label size does not match with the layer requirements"
-      << " layer: " << layer_node->getName() << " label size: " << label.size()
-      << " requirements size: " << layer_node->getNumOutputs();
+static void setLabels(const std::vector<Tensor> &data,
+                      const std::vector<Var_Grad *> &label_list) {
  
-    for (unsigned int i = 0; i < layer_node->getNumOutputs(); i++) {
-      layer_node->getOutputGradUnsafe(i) = *label[i];
-    }
-  };
+  NNTR_THROW_IF(!data.empty() && data.size() != label_list.size(),
+                std::invalid_argument)
+    << "label size does not match with the network requirements"
+    << " label size: " << data.size()
+    << " requirements size: " << label_list.size();
  
-  auto clear_label = [](auto const &layer_node) {
-    for (unsigned int i = 0; i < layer_node->getNumOutputs(); i++) {
-      layer_node->getOutputGradUnsafe(i) = Tensor();
-    }
-  };
+  /// feed or clear label
+  for (unsigned int idx = 0; idx < data.size(); idx++) {
+    if (data.empty())
+      label_list[idx]->initializeGradient();
+    else if (data.size() == 1)
+      label_list[idx]->initializeGradient(data[0]);
+    else
+      label_list[idx]->initializeGradient(data[idx]);
+  }
+}
+
+static void setInputs(const std::vector<Tensor> &data,
+                      const std::vector<Var_Grad *> &input_list) {
+
+  NNTR_THROW_IF(data.size() != input_list.size(), std::invalid_argument)
+    << "input size does not match with the network requirements"
+    << " input size: " << data.size()
+    << " requirements size: " << input_list.size();
  
    /// feed or clear label
-  for (auto iter = model_graph.cbegin(); iter != model_graph.cend(); iter++) {
-    auto const &lnode = *iter;
-    if (lnode->requireLabel()) {
-      label.empty() ? clear_label(lnode) : fill_label(lnode);
-    }
+  for (unsigned int idx = 0; idx < input_list.size(); idx++) {
+    input_list[idx]->initializeVariable(data[idx]);
+  }
+}
+
+static void setLabels(sharedConstTensors &data,
+                      const std::vector<Var_Grad *> &label_list) {
+
+  NNTR_THROW_IF(data.size() > 1 && data.size() != label_list.size(),
+                std::invalid_argument)
+    << "label size does not match with the network requirements"
+    << " label size: " << data.size()
+    << " requirements size: " << label_list.size();
+
+  /// feed or clear label
+  for (unsigned int idx = 0; idx < label_list.size(); idx++) {
+    if (data.empty())
+      label_list[idx]->initializeGradient();
+    else if (data.size() == 1)
+      label_list[idx]->initializeGradient(*data[0]);
+    else
+      label_list[idx]->initializeGradient(*data[idx]);
+  }
+}
+
+static void setInputs(sharedConstTensors &data,
+                      const std::vector<Var_Grad *> &input_list) {
+
+  NNTR_THROW_IF(data.size() != input_list.size(), std::invalid_argument)
+    << "input size does not match with the network requirements"
+    << " input size: " << data.size()
+    << " requirements size: " << input_list.size();
+
+  /// feed or clear label
+  for (unsigned int idx = 0; idx < data.size(); idx++) {
+    input_list[idx]->initializeVariable(*data[idx]);
    }
  }
  
@@ -212,8 +254,8 @@ sharedConstTensors NeuralNetwork::forwarding(sharedConstTensors input,
      << " label_batch: " << label[0]->batch()
      << " target_batch: " << current_batch;
  
-  setLabels(label);
-  model_graph.getSortedLayerNode(0)->getInput(0) = *input[0].get();
+  setLabels(label, model_graph.getLabelList());
+  setInputs(input, model_graph.getInputList());
  
    return forwarding(training);
  }
@@ -306,7 +348,7 @@ void NeuralNetwork::backwarding(int iteration) {
   *            No need to call at first Input Layer (No data to be updated)
   */
  void NeuralNetwork::backwarding(sharedConstTensors label, int iteration) {
-  setLabels(label);
+  setLabels(label, model_graph.getLabelList());
    backwarding(iteration);
  }
  
@@ -551,6 +593,8 @@ int NeuralNetwork::setDataset(const DatasetModeType &mode,
  }
  
  int NeuralNetwork::allocate(bool trainable) {
+  model_graph.deallocateTensors(false);
+
    // TODO: directly replace this
    model_graph.initializeTensors(trainable);
  
@@ -616,9 +660,8 @@ int NeuralNetwork::train_run() {
    auto batch_size = std::get<props::TrainingBatchSize>(model_flex_props);
  
    auto &output = last_layer_node->getOutput(0);
-  auto &label = last_layer_node->getOutputGrad(0);
-  auto &in = first_layer_node->getInput(0);
  
+  /** @todo use model_graph.getInputDimensions() and getOutputDimensions() */
    auto in_dims = first_layer_node->getInputDimensions();
    auto label_dims = last_layer_node->getOutputDimensions();
  
@@ -639,9 +682,10 @@ int NeuralNetwork::train_run() {
     * @param on_epoch_end function that will recieve reference to stat,
     * buffer which will be called on the epoch end
     */
-  auto run_epoch = [this, &in, &label, &in_dims, &label_dims, batch_size](
+  auto run_epoch = [this, &in_dims, &label_dims, &output, batch_size](
                       DataBuffer *buffer, bool shuffle,
-                     auto &&on_iteration_fetch, auto &&on_epoch_end) {
+                     auto &&on_iteration_fetch, auto &&on_iteration_update_stat,
+                     auto &&on_epoch_end) {
      /// @todo managing metrics must be handled here as well!! for now it is
      /// handled in individual callbacks
      RunStats stat;
@@ -657,11 +701,14 @@ int NeuralNetwork::train_run() {
          /// @todo support partial batch
          continue;
        }
-      /// @todo multiple input support
-      in = iteration.getInputsRef().front();
-      label = iteration.getLabelsRef().front();
+
+      auto const &labels = iteration.getLabelsRef();
+      setLabels(labels, model_graph.getLabelList());
+      auto const &inputs = iteration.getInputsRef();
+      setInputs(inputs, model_graph.getInputList());
  
        on_iteration_fetch(stat, *buffer);
+      on_iteration_update_stat(stat, {output}, labels);
      }
      future_iq.get();
      on_epoch_end(stat, *buffer);
@@ -679,8 +726,14 @@ int NeuralNetwork::train_run() {
  
      std::cout << "#" << epoch_idx << "/" << getEpochs();
      auto loss = getLoss();
-    stat.loss += loss;
-    buffer.displayProgress(stat.num_iterations++, loss);
+    buffer.displayProgress(stat.num_iterations, loss);
+  };
+
+  auto update_train_stat = [this](RunStats &stat,
+                                  const std::vector<Tensor> &outputs,
+                                  const std::vector<Tensor> &labels) {
+    stat.loss += getLoss();
+    stat.num_iterations++;
    };
  
    auto train_epoch_end = [this](RunStats &stat, DataBuffer &buffer) {
@@ -694,17 +747,23 @@ int NeuralNetwork::train_run() {
                << " - Training Loss: " << stat.loss;
    };
  
-  auto eval_for_iteration = [this, &output, &label,
-                             batch_size](RunStats &stat, DataBuffer &buffer) {
+  auto eval_for_iteration = [this, batch_size](RunStats &stat,
+                                               DataBuffer &buffer) {
      forwarding(false);
-    auto model_out = output.argmax();
-    auto label_out = label.argmax();
+  };
+
+  auto update_eval_stat = [this, batch_size, &update_train_stat](
+                            RunStats &stat, const std::vector<Tensor> &outputs,
+                            const std::vector<Tensor> &labels) {
+    auto model_out = outputs[0].argmax();
+    auto label_out = labels[0].argmax();
+
      for (unsigned int b = 0; b < batch_size; b++) {
        if (model_out[b] == label_out[b])
          stat.num_correct_predictions++;
      }
-    stat.num_iterations++;
-    stat.loss += getLoss();
+
+    update_train_stat(stat, outputs, labels);
    };
  
    auto eval_epoch_end = [this, batch_size, max_acc = 0.0f,
@@ -732,19 +791,19 @@ int NeuralNetwork::train_run() {
  
    auto epochs = getEpochs();
    for (epoch_idx = epoch_idx + 1; epoch_idx <= epochs; ++epoch_idx) {
-    training =
-      run_epoch(train_buffer.get(), true, train_for_iteration, train_epoch_end);
+    training = run_epoch(train_buffer.get(), true, train_for_iteration,
+                         update_train_stat, train_epoch_end);
      if (valid_buffer) {
        validation = run_epoch(valid_buffer.get(), false, eval_for_iteration,
-                             eval_epoch_end);
+                             update_eval_stat, eval_epoch_end);
      }
      std::cout << '\n';
    }
  
    if (test_buffer) {
      std::cout << "Evaluation with test data...\n";
-    testing =
-      run_epoch(test_buffer.get(), false, eval_for_iteration, eval_epoch_end);
+    testing = run_epoch(test_buffer.get(), false, eval_for_iteration,
+                        update_eval_stat, eval_epoch_end);
    }
  
    return status;
diff --git a/nntrainer/models/neuralnet.h b/nntrainer/models/neuralnet.h

index 746f413..010efd2 100644 (file)
--- a/nntrainer/models/neuralnet.h
+++ b/nntrainer/models/neuralnet.h
@@ -107,13 +107,6 @@ public:
    ~NeuralNetwork();
  
    /**
-   * @brief     Set labels to the layers which require the label
-   * @todo      Set label with multiple labels
-   * @param     label label
-   */
-  void setLabels(sharedConstTensors label);
-
-  /**
     * @brief     Get Loss from the previous ran batch of data
     * @retval    loss value
     */
diff --git a/nntrainer/tensor/manager.cpp b/nntrainer/tensor/manager.cpp

index 7ac374a..44a942c 100644 (file)
--- a/nntrainer/tensor/manager.cpp
+++ b/nntrainer/tensor/manager.cpp
@@ -260,13 +260,13 @@ Manager::getValidity(const std::string &name) {
  /**
   * @brief Allocate and initialize the weight variable
   */
-void Manager::initializeWeights() {
+void Manager::initializeWeights(unsigned int max_exec_order_) {
  
    if (weights_initialized)
      return;
  
    if (LAYER_V2) {
-    weight_pool.finalize(BasicPlanner(), 0, max_exec_order);
+    weight_pool.finalize(BasicPlanner(), 0, max_exec_order_);
    } else {
      if (total_weight_size == 0) {
        ml_logw(
@@ -327,11 +327,7 @@ void Manager::deallocateWeights() {
  }
  
  void Manager::allocateGradients() {
-  if (LAYER_V2) {
-    for (auto &w : weights_v2) {
-      w->allocateOptimizerVariables();
-    }
-  } else {
+  if (!LAYER_V2) {
      /** Allocate the source tensors for shared memories */
      if (!shared_grad.empty())
        shared_grad.allocate();
@@ -345,11 +341,7 @@ void Manager::allocateGradients() {
  }
  
  void Manager::deallocateGradients() {
-  if (LAYER_V2) {
-    for (auto &w : weights_v2) {
-      w->deallocateOptimizerVariables();
-    }
-  } else {
+  if (!LAYER_V2) {
      shared_grad.deallocate();
      for (auto &l_w : weights) {
        for (auto &w : l_w) {
@@ -499,14 +491,7 @@ void Manager::allocateInOuts() {
    if (!shared_inout.empty())
      shared_inout.allocate();
  
-  if (LAYER_V2) {
-    for (auto &in : inputs_v2) {
-      in->allocateVariable();
-    }
-    for (auto &out : outputs_v2) {
-      out->allocateVariable();
-    }
-  } else {
+  if (!LAYER_V2) {
      for (auto &l_io : in_outs) {
        for (auto &io : l_io) {
          io->allocateVariable();
@@ -518,17 +503,7 @@ void Manager::allocateInOuts() {
  void Manager::deallocateInOuts() {
    shared_inout.deallocate();
  
-  if (LAYER_V2) {
-    for (auto &in : inputs_v2) {
-      in->deallocateVariable();
-    }
-    for (auto &out : outputs_v2) {
-      out->deallocateVariable();
-    }
-    // for (auto &t : tensors_v2) {
-    //   t->deallocateVariable();
-    // }
-  } else {
+  if (!LAYER_V2) {
      for (auto &l_io : in_outs) {
        for (auto &io : l_io) {
          io->deallocateVariable();
@@ -542,17 +517,7 @@ void Manager::allocateDerivatives() {
    if (!shared_deriv.empty())
      shared_deriv.allocate();
  
-  if (LAYER_V2) {
-    for (auto &in : inputs_v2) {
-      in->allocateGradient();
-    }
-    for (auto &out : outputs_v2) {
-      out->allocateGradient();
-    }
-    // for (auto &t : tensors_v2) {
-    //   t->allocateGradient();
-    // }
-  } else {
+  if (!LAYER_V2) {
      for (auto &l_io : in_outs) {
        for (auto &io : l_io) {
          io->allocateGradient();
@@ -564,17 +529,7 @@ void Manager::allocateDerivatives() {
  void Manager::deallocateDerivatives() {
    shared_deriv.deallocate();
  
-  if (LAYER_V2) {
-    for (auto &in : inputs_v2) {
-      in->deallocateGradient();
-    }
-    for (auto &out : outputs_v2) {
-      out->deallocateGradient();
-    }
-    // for (auto &t : tensors_v2) {
-    //   t->deallocateGradient();
-    // }
-  } else {
+  if (!LAYER_V2) {
      for (auto &l_io : in_outs) {
        for (auto &io : l_io) {
          io->deallocateGradient();
@@ -583,7 +538,7 @@ void Manager::deallocateDerivatives() {
    }
  }
  
-void Manager::initializeTensorsInference() {
+void Manager::initializeTensorsInference(unsigned int max_exec_order_) {
    // @todo Do not count memory of the input tensor of the input layer and
    // output tensor of the last layer in the estimate of max_shared_inout as it
    // is not used
@@ -640,28 +595,11 @@ void Manager::initializeTensorsInference() {
        use_first_last = 1 - use_first_last;
      }
    } else {
-    // Inference Mode without optimizations
-    for (auto &outs : outputs_v2) {
-      outs->initialize(Tensor(), Tensor(), false);
-    }
-
-    // Inference Mode without optimizations
-    // for (auto &ts : tensors_v2) {
-    //   ts->initialize(Tensor(), Tensor(), false);
-    // }
-
-    // In inference mode, do not allocate the memory for the input of the first
-    // layer. These is the first entry in the in_outs. Inference() will override
-    // input tensors of the first layer
-    for ([[maybe_unused]] auto &ins : inputs_v2) {
-      // as inputs_v2 are only set for input layers, this can be skipped all the
-      // way
-      continue;
-    }
+    tensor_pool.finalize(BasicPlanner(), 0, max_exec_order_);
    }
  }
  
-void Manager::initializeTensorsTrain() {
+void Manager::initializeTensorsTrain(unsigned int max_exec_order_) {
    // Initialize gradients
    initializeGradients();
  
@@ -695,32 +633,20 @@ void Manager::initializeTensorsTrain() {
        }
      }
    } else {
-    tensor_pool.finalize(BasicPlanner(), 0, max_exec_order);
-
-    // Training Mode without optimizations
-    for (auto &outs : outputs_v2) {
-      outs->initialize(Tensor(), Tensor(), true);
-    }
-
-    // Training Mode without optimizations
-    // for (auto &ts : tensors_v2) {
-    //   ts->initialize(Tensor(), Tensor(), true);
-    // }
-
-    // Training Mode without optimizations
-    for (auto &ins : inputs_v2) {
-      ins->initialize(Tensor(), Tensor(), true);
-    }
+    tensor_pool.finalize(BasicPlanner(), 0, max_exec_order_);
    }
  }
  
  /**
   * @brief Initialize the inputs/outputs/gradients/derivatives for the layer
   */
-void Manager::initializeTensors(bool training) {
+void Manager::initializeTensors(bool training, unsigned int max_exec_order_) {
    // If weights not initialized, initialize weights as well
    if (!weights_initialized)
-    initializeWeights();
+    initializeWeights(max_exec_order_);
+
+  if (tensors_allocated)
+    throw std::invalid_argument("Cannot initialize allocated tensors");
  
    if (tensors_initialized && model_training == training)
      return;
@@ -730,9 +656,9 @@ void Manager::initializeTensors(bool training) {
  
    model_training = training;
    if (model_training)
-    initializeTensorsTrain();
+    initializeTensorsTrain(max_exec_order_);
    else
-    initializeTensorsInference();
+    initializeTensorsInference(max_exec_order_);
    tensors_initialized = true;
  }
  
@@ -741,6 +667,8 @@ void Manager::initializeTensors(bool training) {
   */
  void Manager::deinitializeTensors() {
  
+  deallocateTensors(false);
+
    shared_deriv = Tensor();
    shared_inout = Tensor();
    shared_grad = Tensor();
@@ -845,10 +773,8 @@ Manager::requestTensors(const GraphNode &node,
                 *std::max_element(var_exec_order.begin(), var_exec_order.end()));
  
      Tensor *grad = nullptr;
-    // TODO: change to enum_class_and
      if (std::get<2>(ts) /** need gradient */ &&
-        enum_class_or(tspan, TensorLifespan::FORWARD_FUNC_LIFESPAN) !=
-          TensorLifespan::FORWARD_FUNC_LIFESPAN)
+        tspan > TensorLifespan::FORWARD_FUNC_LIFESPAN)
        grad = tensor_pool.requestTensor(
          std::get<0>(ts), /// tensor dim
          grad_exec_order, tspan,
@@ -873,57 +799,64 @@ std::vector<Var_Grad *>
  Manager::requestInputs(const GraphNode &node,
                         const std::vector<TensorDim> &inputs_dim,
                         const std::vector<std::string> &outputs_name) {
+  const auto &exec_order = node.getExecutionOrder();
+  std::vector<unsigned int> var_exec_order(
+    {std::get<0>(exec_order), std::get<2>(exec_order)});
+  std::vector<unsigned int> grad_exec_order(
+    {std::get<1>(exec_order), std::get<2>(exec_order)});
+  max_exec_order =
+    std::max(max_exec_order,
+             *std::max_element(var_exec_order.begin(), var_exec_order.end()));
  
-  auto const &tspan = TensorLifespan::ITERATION_LIFESPAN;
-  std::vector<Var_Grad *> ret;
+  TensorLifespan var_ls = TensorLifespan::ITERATION_LIFESPAN;
+  TensorLifespan grad_ls = TensorLifespan::ITERATION_LIFESPAN;
  
-  if (outputs_name.empty()) {
-    unsigned int count = 0;
-    std::vector<Var_Grad::Spec> inputs_spec;
+  std::vector<Var_Grad *> ret;
+  size_t current_size = inputs_v2.size();
+
+  for (unsigned int idx = 0; idx < inputs_dim.size(); idx++) {
+    auto const &dim = inputs_dim[idx];
+    Tensor *var = nullptr, *grad = nullptr;
+    if (!outputs_name.empty()) {
+      var = tensor_pool.requestPrerequestedTensor(
+        dim, /// tensor dim
+        var_exec_order, var_ls,
+        outputs_name[idx],        /// name
+        Tensor::Initializer::NONE /// tensor initializer
+      );
  
-    std::transform(
-      inputs_dim.begin(), inputs_dim.end(), std::back_inserter(inputs_spec),
-      [&count, &node, &tspan](auto const &elem) {
-        return std::make_tuple(elem, Tensor::Initializer::NONE, true,
-                               node.getName() + std::string(":input") +
-                                 std::to_string(count++),
-                               tspan);
-      });
+      grad = tensor_pool.requestPrerequestedTensor(
+        dim, /// tensor dim
+        grad_exec_order, grad_ls,
+        outputs_name[idx] + Var_Grad::grad_suffix, /// name
+        Tensor::Initializer::ZEROS                 /// tensor initializer
+      );
+    } else if (!node.getInputConnections().empty()) {
+      /** skip requesting tensor for input */
+      const std::string &var_name =
+        node.getName() + std::string(":input") + std::to_string(idx);
+      var = tensor_pool.requestTensor(
+        dim, /// tensor dim
+        var_exec_order, var_ls,
+        var_name,                 /// name
+        Tensor::Initializer::NONE /// tensor initializer
+      );
  
-    ret = requestTensors<Var_Grad>(node, inputs_spec, inputs_v2);
-  } else {
-    ret.reserve(inputs_dim.size());
-
-    /**
-     * Find already allocated output which must match the name and dimensions
-     */
-    for (unsigned int idx = 0; idx < inputs_dim.size(); idx++) {
-      auto output_loc = name_map.at(outputs_name.at(idx));
-      Var_Grad *vg = outputs_v2.at(output_loc).get();
-      if (vg->getDim() != inputs_dim[idx])
-        throw std::invalid_argument(
-          "Dimension mismatch for the requested input");
-      ret.push_back(vg);
+      grad = tensor_pool.requestTensor(
+        dim, /// tensor dim
+        grad_exec_order, grad_ls,
+        var_name + Var_Grad::grad_suffix, /// name
+        Tensor::Initializer::ZEROS        /// tensor initializer
+      );
      }
-  }
-
-  const auto &exec_order = node.getExecutionOrder();
-  for (auto const &in : ret) {
-    auto const &vname = in->getName();
-    auto const &gname = in->getGradientName();
-
-    /** usage for inputs */
-    tensor_exec_order[vname].push_back(std::get<0>(exec_order));
-    tensor_exec_order[vname].push_back(std::get<1>(exec_order));
  
-    /** usage for inputs gradients (outgoing derivatives) */
-    tensor_exec_order[gname].push_back(std::get<2>(exec_order));
-
-    /** set tensor lifespan */
-    expandLifespan(vname, tspan);
-    expandLifespan(gname, tspan);
+    inputs_v2.emplace_back(std::make_unique<Var_Grad>(var, grad));
    }
  
+  std::transform(inputs_v2.begin() + current_size, inputs_v2.end(),
+                 std::back_inserter(ret),
+                 [](auto const &elem) { return elem.get(); });
+
    return ret;
  }
  
@@ -933,43 +866,49 @@ Manager::requestInputs(const GraphNode &node,
  std::vector<Var_Grad *>
  Manager::requestOutputs(const GraphNode &node,
                          const std::vector<TensorDim> &outputs_dim) {
-  unsigned int count = 0;
-  auto const &tspan = TensorLifespan::ITERATION_LIFESPAN;
-  std::vector<Var_Grad::Spec> outputs_spec;
-
-  std::transform(
-    outputs_dim.begin(), outputs_dim.end(), std::back_inserter(outputs_spec),
-    [&count, &node, &tspan](auto const &elem) {
-      return std::make_tuple(elem, Tensor::Initializer::NONE, true,
-                             node.getName() + std::string(":output") +
-                               std::to_string(count++),
-                             tspan);
-    });
-
-  auto ret = requestTensors<Var_Grad>(node, outputs_spec, outputs_v2);
    const auto &exec_order = node.getExecutionOrder();
-  for (auto const &out : ret) {
-    auto const &vname = out->getName();
-    auto const &gname = out->getGradientName();
+  std::vector<unsigned int> var_exec_order(
+    {std::get<0>(exec_order), std::get<2>(exec_order)});
+  std::vector<unsigned int> grad_exec_order(
+    {std::get<1>(exec_order), std::get<2>(exec_order)});
+  max_exec_order =
+    std::max(max_exec_order,
+             *std::max_element(var_exec_order.begin(), var_exec_order.end()));
  
-    /** usage for outputs */
-    tensor_exec_order[vname].push_back(std::get<0>(exec_order));
+  TensorLifespan var_ls = TensorLifespan::ITERATION_LIFESPAN;
+  TensorLifespan grad_ls = TensorLifespan::ITERATION_LIFESPAN;
  
-    /** usage for outputs gradients (incoming derivatives) */
-    tensor_exec_order[gname].push_back(std::get<1>(exec_order));
-    tensor_exec_order[gname].push_back(std::get<2>(exec_order));
+  std::vector<Var_Grad *> ret;
+  size_t current_size = outputs_v2.size();
+
+  unsigned int count = 0;
+  for (auto const &dim : std::as_const(outputs_dim)) {
+    const std::string &var_name =
+      node.getName() + std::string(":output") + std::to_string(count++);
+    Tensor *var =
+      tensor_pool.requestTensor(dim, /// tensor dim
+                                var_exec_order, var_ls,
+                                var_name,                 /// name
+                                Tensor::Initializer::NONE /// tensor initializer
+      );
  
-    /**
-     * TODO: below is needed only for activation layer as of now -
-     * check if this can be worked around
-     */
-    tensor_exec_order[vname].push_back(std::get<2>(exec_order));
+    Tensor *grad = nullptr;
+    /** skip requesting tensor for label */
+    if (!node.getOutputConnections().empty())
+      grad = tensor_pool.requestTensor(
+        dim, /// tensor dim
+        grad_exec_order, grad_ls,
+        var_name + Var_Grad::grad_suffix, /// name
+        Tensor::Initializer::ZEROS        /// tensor initializer
+      );
  
-    /** set tensor lifespan */
-    expandLifespan(vname, tspan);
-    expandLifespan(gname, tspan);
+    outputs_v2.emplace_back(std::make_unique<Var_Grad>(var, grad));
    }
  
+  std::transform(outputs_v2.begin() + current_size, outputs_v2.end(),
+                 std::back_inserter(ret),
+                 [](auto const &elem) { return elem.get(); });
+
    return ret;
  }
  
diff --git a/nntrainer/tensor/manager.h b/nntrainer/tensor/manager.h

index 96f31d9..511adb2 100644 (file)
--- a/nntrainer/tensor/manager.h
+++ b/nntrainer/tensor/manager.h
@@ -188,6 +188,31 @@ public:
     * @brief     Create tensors with the given spec
     *
     * @param node Graph node to extract node identifiers/info
+   * @param tensors_spec Specficiation for the tensors
+   *
+   * @return created tensors list
+   */
+  std::vector<Tensor *> requestWeightOptimizerVariables(
+    const std::vector<TensorDim> &dims, const std::string &name,
+    const TensorLifespan &lifespan,
+    Tensor::Initializer initializer = Tensor::Initializer::NONE) {
+    auto const &exec_order = weight_pool.getExecutionOrder(name);
+
+    std::vector<Tensor *> ret;
+    ret.reserve(dims.size());
+
+    for (unsigned int idx = 0; idx < dims.size(); idx++)
+      ret.push_back(tensor_pool.requestTensor(
+        dims[idx], exec_order, lifespan, name + ":opt" + std::to_string(idx),
+        initializer));
+
+    return ret;
+  }
+
+  /**
+   * @brief     Create tensors with the given spec
+   *
+   * @param node Graph node to extract node identifiers/info
     * @param inputs_dim Specficiation for the tensors
     * @param outputs_name Name of the already requested output tensors
     *
@@ -288,7 +313,7 @@ public:
     * @note This only allocates weights and does not handle training related
     * memory for weights
     */
-  void initializeWeights();
+  void initializeWeights(unsigned int max_exec_order);
  
    /**
     * @brief Reset the manager state
@@ -365,7 +390,7 @@ public:
     * will require full allocation than reusing memory allocated with inference
     * mode.
     */
-  void initializeTensors(bool training);
+  void initializeTensors(bool training, unsigned int max_exec_order);
  
    /**
     * @brief   Check if the manager has allocated tensors
@@ -611,12 +636,12 @@ private:
    /**
     * @brief Initialize the tensors for inference mode
     */
-  void initializeTensorsInference();
+  void initializeTensorsInference(unsigned int);
  
    /**
     * @brief Initialize the tensors for training mode
     */
-  void initializeTensorsTrain();
+  void initializeTensorsTrain(unsigned int);
  
    /**
     * @brief     Create tensors with the given spec
diff --git a/nntrainer/tensor/memory_pool.cpp b/nntrainer/tensor/memory_pool.cpp

index 6d33dd8..ebcfd3c 100644 (file)
--- a/nntrainer/tensor/memory_pool.cpp
+++ b/nntrainer/tensor/memory_pool.cpp
@@ -287,4 +287,16 @@ size_t MemoryPool::calcMinMemoryRequirement() {
    return *std::max_element(interval_req.begin(), interval_req.end());
  }
  
+void MemoryPool::clear() {
+  if (mem_pool != nullptr)
+    throw std::invalid_argument("Cannot clear allocated memory pool");
+
+  memory_size.clear();
+  memory_validity.clear();
+  memory_offset.clear();
+
+  pool_size = 0;
+  min_pool_size = 0;
+}
+
  } // namespace nntrainer
diff --git a/nntrainer/tensor/memory_pool.h b/nntrainer/tensor/memory_pool.h

index c9fcc23..76e0309 100644 (file)
--- a/nntrainer/tensor/memory_pool.h
+++ b/nntrainer/tensor/memory_pool.h
@@ -104,6 +104,12 @@ public:
     */
    size_t minMemoryRequirement();
  
+  /**
+   * @brief Clear the memory pool
+   *
+   */
+  void clear();
+
  private:
    /**
     * @brief Validate the provided layout
diff --git a/nntrainer/tensor/tensor_pool.cpp b/nntrainer/tensor/tensor_pool.cpp

index f9dd89c..e26529e 100644 (file)
--- a/nntrainer/tensor/tensor_pool.cpp
+++ b/nntrainer/tensor/tensor_pool.cpp
@@ -87,6 +87,7 @@ Tensor *TensorPool::requestPrerequestedTensor(
   */
  void TensorPool::finalize(const MemoryPlanner &planner,
                            unsigned int start_order, unsigned int end_order) {
+  mem_pool.clear();
    unsigned int bytes_requested = 0;
    for (auto &spec : pool) {
      spec.token = 0;
diff --git a/nntrainer/tensor/tensor_pool.h b/nntrainer/tensor/tensor_pool.h

index 408c7c6..c556684 100644 (file)
--- a/nntrainer/tensor/tensor_pool.h
+++ b/nntrainer/tensor/tensor_pool.h
@@ -77,6 +77,9 @@ public:
     * @note returns empty tensor which will be filled when allocate is called.
     * @note we assume that the caller checks if the exec_order and lifespan are
     * compatible.
+   *
+   * @note This interface is separated from requestTensor to reduce bugs related
+   * to unintentional tensor sharing.
     */
    Tensor *requestPrerequestedTensor(
      const TensorDim &dim, const std::vector<unsigned int> &exec_order,
diff --git a/nntrainer/tensor/var_grad.cpp b/nntrainer/tensor/var_grad.cpp

index c3d7259..436a41f 100644 (file)
--- a/nntrainer/tensor/var_grad.cpp
+++ b/nntrainer/tensor/var_grad.cpp
@@ -37,7 +37,7 @@ Var_Grad::Var_Grad(const TensorDim &dim, const Tensor::Initializer init,
  
  void Var_Grad::initializeVariable(const Tensor &preallocated) {
    if (!preallocated.empty()) {
-    var->makeSharedDataTensor(preallocated);
+    var = std::make_shared<Tensor>(preallocated);
      /** intentionally not initialized tensor memory for shared tensors */
    }
  }
@@ -48,7 +48,7 @@ void Var_Grad::initializeGradient(const Tensor &preallocated) {
       * Making a new tensor is intentional here as this tensor is not shared
       * with other layers but the internal memory is.
       */
-    grad->makeSharedDataTensor(preallocated);
+    grad = std::make_shared<Tensor>(preallocated);
      /** intentionally not initialized tensor memory for shared tensors */
    }
    /**
diff --git a/nntrainer/tensor/var_grad.h b/nntrainer/tensor/var_grad.h

index a8c09e3..320a1de 100644 (file)
--- a/nntrainer/tensor/var_grad.h
+++ b/nntrainer/tensor/var_grad.h
@@ -101,7 +101,12 @@ public:
     */
    explicit Var_Grad(Tensor *v, Tensor *g) :
      var(std::shared_ptr<Tensor>(v, [](void *) {})),
-    grad(std::shared_ptr<Tensor>(g, [](void *) {})) {}
+    grad(std::shared_ptr<Tensor>(g, [](void *) {})) {
+    if (!v)
+      var = std::make_shared<Tensor>();
+    if (!g)
+      grad = std::make_shared<Tensor>();
+  }
  
    /**
     * @brief Copy constructor for Var_Grad
diff --git a/nntrainer/tensor/weight.cpp b/nntrainer/tensor/weight.cpp

index 743b45d..36cec1b 100644 (file)
--- a/nntrainer/tensor/weight.cpp
+++ b/nntrainer/tensor/weight.cpp
@@ -30,19 +30,4 @@ Weight::Weight(const TensorDim &dim, const Tensor::Initializer init,
      throw std::invalid_argument("Weight regularizer unknown");
  }
  
-void Weight::initializeGradient(const Tensor &preallocated) {
-  // Use self variable to initialize itself
-  Var_Grad::initializeGradient(preallocated);
-  if (var->isAllocated())
-    allocateOptimizerVariables();
-}
-
-void Weight::allocateOptimizerVariables() {
-  // Allocate optimizer parameters
-  for (auto const &dim : opt_vars_dim) {
-    opt_vars.emplace_back(dim);
-    opt_vars.back().setZero();
-  }
-}
-
  } // namespace nntrainer
diff --git a/nntrainer/tensor/weight.h b/nntrainer/tensor/weight.h

index 2a0f476..42103d1 100644 (file)
--- a/nntrainer/tensor/weight.h
+++ b/nntrainer/tensor/weight.h
@@ -111,11 +111,6 @@ public:
      regularizer_constant(reg_const) {}
  
    /**
-   * @copydoc var_grad::initializeGradient(const Tensor &)
-   */
-  void initializeGradient(const Tensor &preallocated = Tensor());
-
-  /**
     * @brief Swap for weight
     *
     * @param lhs Swap to
@@ -194,18 +189,14 @@ public:
    /**
     * @brief Clear optimizer variables
     */
-  void clearOptimizerVariables() {
-    opt_vars.clear();
-    opt_vars_dim.clear();
-  }
+  void clearOptimizerVariables() { opt_vars.clear(); }
  
    /**
     * @brief Add optimizer variables
     * @param dim Optimizer variable dimension
     */
-  void addOptimizerVariable(const TensorDim &dim) {
-    opt_vars_dim.emplace_back(dim);
-    // TODO: Move this out when an optimizer does not initialize with 0.
+  void setOptimizerVariables(std::vector<Tensor *> tensors) {
+    opt_vars = tensors;
    }
  
    /**
@@ -213,7 +204,7 @@ public:
     * @param idx Index of the optimizer variable to get
     * @retval Reference of the optimizer variable
     */
-  Tensor &getOptimizerVariableRef(unsigned int idx) { return opt_vars[idx]; }
+  Tensor &getOptimizerVariableRef(unsigned int idx) { return *opt_vars[idx]; }
  
    /**
     * @brief Allocate and initialize the weight variable, if needed
@@ -223,10 +214,7 @@ public:
    /**
     * @brief Allocate and initialize the weight gradient, if needed
     */
-  void allocateGradient() {
-    Var_Grad::allocateGradient();
-    allocateOptimizerVariables();
-  }
+  void allocateGradient() { Var_Grad::allocateGradient(); }
  
    /**
     * @brief     check if weight regularizer type is l2norm
@@ -262,10 +250,7 @@ public:
    /**
     * @brief Deallocate memory for the gradient of the weight
     */
-  void deallocateGradient() {
-    Var_Grad::deallocateGradient();
-    opt_vars.clear();
-  }
+  void deallocateGradient() { Var_Grad::deallocateGradient(); }
  
    /**
     * @brief Deallocate the weight gardient and variable
@@ -275,22 +260,11 @@ public:
      deallocateVariable();
    }
  
-  /**
-   * @brief Allocate optimizer related variables for the given weights
-   */
-  void allocateOptimizerVariables();
-
-  /**
-   * @brief Allocate optimizer related variables for the given weights
-   */
-  void deallocateOptimizerVariables() { opt_vars.clear(); }
-
  private:
    WeightRegularizer regularizer; /**< regularizer for this variable */
    float regularizer_constant;    /**< constant factor for regularization */
  
-  std::vector<Tensor> opt_vars;        /**< optimizer variables */
-  std::vector<TensorDim> opt_vars_dim; /**< optimizer variables dimensions */
+  std::vector<Tensor *> opt_vars; /**< optimizer variables */
  };
  
  } // namespace nntrainer
diff --git a/test/unittest/unittest_nntrainer_models.cpp b/test/unittest/unittest_nntrainer_models.cpp

index ad7d56a..87f3b6e 100644 (file)
--- a/test/unittest/unittest_nntrainer_models.cpp
+++ b/test/unittest/unittest_nntrainer_models.cpp
@@ -485,7 +485,11 @@ void GraphWatcher::validateFor(const nntrainer::TensorDim &label_shape) {
    label_tensor->setRandNormal();
    nntrainer::sharedConstTensors label = {label_tensor};
  
-  EXPECT_NO_THROW(nn.forwarding(input, label));
+  if (loss_nodes.size()) {
+    EXPECT_NO_THROW(nn.forwarding(input, label));
+  } else {
+    EXPECT_NO_THROW(nn.forwarding(input, {}));
+  }
  
    if (loss_nodes.size()) {
      EXPECT_NO_THROW(nn.backwarding(label, 0));
author	Parichay Kapoor <pk.kapoor@samsung.com>
	Wed, 25 Aug 2021 08:26:21 +0000 (17:26 +0900)
committer	Jijoong Moon <jijoong.moon@samsung.com>
	Fri, 1 Oct 2021 02:59:03 +0000 (11:59 +0900)
nntrainer/graph/network_graph.cpp		patch \| blob \| history
nntrainer/graph/network_graph.h		patch \| blob \| history
nntrainer/models/neuralnet.cpp		patch \| blob \| history
nntrainer/models/neuralnet.h		patch \| blob \| history
nntrainer/tensor/manager.cpp		patch \| blob \| history
nntrainer/tensor/manager.h		patch \| blob \| history
nntrainer/tensor/memory_pool.cpp		patch \| blob \| history
nntrainer/tensor/memory_pool.h		patch \| blob \| history
nntrainer/tensor/tensor_pool.cpp		patch \| blob \| history
nntrainer/tensor/tensor_pool.h		patch \| blob \| history
nntrainer/tensor/var_grad.cpp		patch \| blob \| history
nntrainer/tensor/var_grad.h		patch \| blob \| history
nntrainer/tensor/weight.cpp		patch \| blob \| history
nntrainer/tensor/weight.h		patch \| blob \| history
test/unittest/unittest_nntrainer_models.cpp		patch \| blob \| history