From 6f01a34c92bcc58af113ffc20d52d0ba10885dbd Mon Sep 17 00:00:00 2001
From: Jihoon Lee <jhoon.it.lee@samsung.com>
Date: Wed, 19 Jan 2022 19:40:02 +0900
Subject: [PATCH] [Tensor Pool] Add expose/persist concept

This patch add expose/persist concept to the tensor pool and manager.

When a tensor is exposed, this means that the tensor is gauranteed to
remain valid max_exec where max_exec is the value passed along allocateTensors(max_exec);

**Self evaluation:**
1. Build test: [X]Passed [ ]Failed [ ]Skipped
2. Run test: [X]Passed [ ]Failed [ ]Skipped

Signed-off-by: Jihoon Lee <jhoon.it.lee@samsung.com>
---
 nntrainer/graph/network_graph.cpp |  3 +--
 nntrainer/tensor/manager.cpp      | 28 ++++++++++++++++------------
 nntrainer/tensor/manager.h        |  4 +---
 nntrainer/tensor/tensor_pool.cpp  |  8 +++++++-
 nntrainer/tensor/tensor_pool.h    |  3 +++
 5 files changed, 28 insertions(+), 18 deletions(-)
diff --git a/nntrainer/graph/network_graph.cpp b/nntrainer/graph/network_graph.cpp
index 638974e2..6101a844 100644
--- a/nntrainer/graph/network_graph.cpp
+++ b/nntrainer/graph/network_graph.cpp
@@ -799,8 +799,7 @@ NetworkGraph::finalizeContext(const std::shared_ptr<LayerNode> &lnode,
   lnode->configureRunContext(
     // TODO: update weights spec for trainable based on layer trainable prop
     tensor_manager->requestWeights(gnode, init_context.getWeightsSpec(),
-                                   lnode->getTrainable(), shared_weight_names,
-                                   graph_exec_end),
+                                   lnode->getTrainable(), shared_weight_names),
     inputs, outputs,
     tensor_manager->requestTensors(gnode, init_context.getTensorsSpec(),
                                    shared_tensor_names));
diff --git a/nntrainer/tensor/manager.cpp b/nntrainer/tensor/manager.cpp
index e9e8ed5d..fb3fdc2d 100644
--- a/nntrainer/tensor/manager.cpp
+++ b/nntrainer/tensor/manager.cpp
@@ -144,7 +144,8 @@ void Manager::deallocateWeights() { weight_pool.deallocate(); }
 
 static Tensor *requestTensor_(const TensorSpecV2 &spec,
                               const GraphNode::ExecutionOrder &exec_order,
-                              const std::string &scope, TensorPool &tp) {
+                              const std::string &scope, TensorPool &tp,
+                              bool expose) {
   using RT = TensorSpecV2::RequestType;
   using LS = TensorLifespan;
   NNTR_THROW_IF(spec.request_type == RT::MAYBE_MODIFYING_VIEW,
@@ -155,6 +156,9 @@ static Tensor *requestTensor_(const TensorSpecV2 &spec,
   auto [forward, calc_grad, calc_deriv] = exec_order;
 
   std::vector<unsigned> order = spec.additional_exec_order;
+  if (expose) {
+    order.push_back(TensorPool::PERSIST_END_ORDER);
+  }
 
   const auto name = scope + ":" + spec.name;
 
@@ -202,12 +206,12 @@ Var_Grad *Manager::requestTensor(const VarGradSpecV2 &spec,
     << "Currently, input and tensors group type is not yet implemented, use "
        "requestInputs() requestTensors() instead";
 
-  Tensor *var =
-    requestTensor_(spec.variable_spec, exec_order, scope, tensor_pool);
-  Tensor *grad =
-    spec.gradient_spec
-      ? requestTensor_(*spec.gradient_spec, exec_order, scope, tensor_pool)
-      : nullptr;
+  Tensor *var = requestTensor_(spec.variable_spec, exec_order, scope,
+                               tensor_pool, expose_var);
+  Tensor *grad = spec.gradient_spec
+                   ? requestTensor_(*spec.gradient_spec, exec_order, scope,
+                                    tensor_pool, expose_grad)
+                   : nullptr;
 
   /// @note as only supporting identify_as == TensorGroupType::output, only
   /// saves to outputs for now
@@ -347,10 +351,10 @@ void Manager::initializeTensorsTrain(unsigned int max_exec_order_) {
  */
 std::vector<Weight *> Manager::requestWeights(
   const GraphNode &node, const std::vector<Weight::Spec> &weights_spec,
-  bool trainable, const std::vector<std::string> &shared_names,
-  const unsigned int max_exec_order) {
+  bool trainable, const std::vector<std::string> &shared_names) {
   const auto [forwarding_order, calcGradient_order, calcDerivative_order] =
     node.getExecutionOrder();
+
   std::vector<unsigned int> var_exec_order(
     {forwarding_order, calcGradient_order, calcDerivative_order});
   std::vector<unsigned int> default_grad_exec_order(
@@ -372,7 +376,7 @@ std::vector<Weight *> Manager::requestWeights(
      * applied to the weight.
      */
     if (Weight::isGradientClipByGlobalNorm(clip_by_global_norm))
-      grad_exec_order.push_back(max_exec_order);
+      grad_exec_order.push_back(TensorPool::PERSIST_END_ORDER);
 
     Tensor *var = nullptr, *grad = nullptr;
     bool is_dependent = !shared_names.empty();
@@ -529,9 +533,9 @@ Manager::requestInputs(const GraphNode &node,
 
     inputs_v2.emplace_back(std::make_unique<Var_Grad>(
       requestTensor_(var_spec, node.getExecutionOrder(), node.getName(),
-                     tensor_pool),
+                     tensor_pool, false),
       requestTensor_(grad_spec, node.getExecutionOrder(), node.getName(),
-                     tensor_pool)));
+                     tensor_pool, false)));
   }
 
   ret.reserve(inputs_dim.size());
diff --git a/nntrainer/tensor/manager.h b/nntrainer/tensor/manager.h
index a93af9b5..7308bad4 100644
--- a/nntrainer/tensor/manager.h
+++ b/nntrainer/tensor/manager.h
@@ -172,15 +172,13 @@ public:
    * @param trainable make the weight trainable if true
    * @param shared_names name to refer to when the weights are borrowed from the
    * original source. if not shared pass empty vector
-   * @param max_exec_order the maximum execution order
    *
    * @return created weights list
    */
   std::vector<Weight *>
   requestWeights(const GraphNode &node,
                  const std::vector<Weight::Spec> &weights_spec, bool trainable,
-                 const std::vector<std::string> &shared_names,
-                 const unsigned int max_exec_order);
+                 const std::vector<std::string> &shared_names);
 
   /**
    * @brief     Create tensors with the given spec
diff --git a/nntrainer/tensor/tensor_pool.cpp b/nntrainer/tensor/tensor_pool.cpp
index 3b4db9ff..648db6ce 100644
--- a/nntrainer/tensor/tensor_pool.cpp
+++ b/nntrainer/tensor/tensor_pool.cpp
@@ -126,8 +126,14 @@ void TensorPool::finalize(const MemoryPlanner &planner,
 
     unsigned int validity_end = validity_start;
     for (unsigned int idx = 0; idx < details->exec_order.size(); idx++) {
-      if (details->exec_order[idx] <= end_order)
+      if (details->exec_order[idx] == PERSIST_END_ORDER) {
+        validity_end = end_order;
+        break;
+      }
+
+      if (details->exec_order[idx] <= end_order) {
         validity_end = std::max(validity_end, details->exec_order[idx]);
+      }
     }
 
     /**
diff --git a/nntrainer/tensor/tensor_pool.h b/nntrainer/tensor/tensor_pool.h
index 271463d2..c3e590e3 100644
--- a/nntrainer/tensor/tensor_pool.h
+++ b/nntrainer/tensor/tensor_pool.h
@@ -17,6 +17,7 @@
 #ifdef __cplusplus
 
 #include <functional>
+#include <limits>
 #include <memory>
 #include <unordered_map>
 #include <variant>
@@ -35,6 +36,8 @@ namespace nntrainer {
 class TensorPool {
 
 public:
+  static constexpr unsigned PERSIST_END_ORDER =
+    std::numeric_limits<unsigned>::max();
   /**
    * @brief     Constructor of TensorPool
    */
-- 
2.34.1