[Tensor Pool] Add expose/persist concept

author Jihoon Lee <jhoon.it.lee@samsung.com>

Wed, 19 Jan 2022 10:40:02 +0000 (19:40 +0900)

committer Jijoong Moon <jijoong.moon@samsung.com>

Wed, 19 Jan 2022 23:31:47 +0000 (08:31 +0900)
author Jihoon Lee <jhoon.it.lee@samsung.com>
Wed, 19 Jan 2022 10:40:02 +0000 (19:40 +0900)
committer Jijoong Moon <jijoong.moon@samsung.com>
Wed, 19 Jan 2022 23:31:47 +0000 (08:31 +0900)
diff --git a/nntrainer/graph/network_graph.cpp b/nntrainer/graph/network_graph.cpp

index 638974e..6101a84 100644 (file)
--- a/nntrainer/graph/network_graph.cpp
+++ b/nntrainer/graph/network_graph.cpp
@@ -799,8 +799,7 @@ NetworkGraph::finalizeContext(const std::shared_ptr<LayerNode> &lnode,
    lnode->configureRunContext(
      // TODO: update weights spec for trainable based on layer trainable prop
      tensor_manager->requestWeights(gnode, init_context.getWeightsSpec(),
-                                   lnode->getTrainable(), shared_weight_names,
-                                   graph_exec_end),
+                                   lnode->getTrainable(), shared_weight_names),
      inputs, outputs,
      tensor_manager->requestTensors(gnode, init_context.getTensorsSpec(),
                                     shared_tensor_names));
diff --git a/nntrainer/tensor/manager.cpp b/nntrainer/tensor/manager.cpp

index e9e8ed5..fb3fdc2 100644 (file)
--- a/nntrainer/tensor/manager.cpp
+++ b/nntrainer/tensor/manager.cpp
@@ -144,7 +144,8 @@ void Manager::deallocateWeights() { weight_pool.deallocate(); }
  
  static Tensor *requestTensor_(const TensorSpecV2 &spec,
                                const GraphNode::ExecutionOrder &exec_order,
-                              const std::string &scope, TensorPool &tp) {
+                              const std::string &scope, TensorPool &tp,
+                              bool expose) {
    using RT = TensorSpecV2::RequestType;
    using LS = TensorLifespan;
    NNTR_THROW_IF(spec.request_type == RT::MAYBE_MODIFYING_VIEW,
@@ -155,6 +156,9 @@ static Tensor *requestTensor_(const TensorSpecV2 &spec,
    auto [forward, calc_grad, calc_deriv] = exec_order;
  
    std::vector<unsigned> order = spec.additional_exec_order;
+  if (expose) {
+    order.push_back(TensorPool::PERSIST_END_ORDER);
+  }
  
    const auto name = scope + ":" + spec.name;
  
@@ -202,12 +206,12 @@ Var_Grad *Manager::requestTensor(const VarGradSpecV2 &spec,
      << "Currently, input and tensors group type is not yet implemented, use "
         "requestInputs() requestTensors() instead";
  
-  Tensor *var =
-    requestTensor_(spec.variable_spec, exec_order, scope, tensor_pool);
-  Tensor *grad =
-    spec.gradient_spec
-      ? requestTensor_(*spec.gradient_spec, exec_order, scope, tensor_pool)
-      : nullptr;
+  Tensor *var = requestTensor_(spec.variable_spec, exec_order, scope,
+                               tensor_pool, expose_var);
+  Tensor *grad = spec.gradient_spec
+                   ? requestTensor_(*spec.gradient_spec, exec_order, scope,
+                                    tensor_pool, expose_grad)
+                   : nullptr;
  
    /// @note as only supporting identify_as == TensorGroupType::output, only
    /// saves to outputs for now
@@ -347,10 +351,10 @@ void Manager::initializeTensorsTrain(unsigned int max_exec_order_) {
   */
  std::vector<Weight *> Manager::requestWeights(
    const GraphNode &node, const std::vector<Weight::Spec> &weights_spec,
-  bool trainable, const std::vector<std::string> &shared_names,
-  const unsigned int max_exec_order) {
+  bool trainable, const std::vector<std::string> &shared_names) {
    const auto [forwarding_order, calcGradient_order, calcDerivative_order] =
      node.getExecutionOrder();
+
    std::vector<unsigned int> var_exec_order(
      {forwarding_order, calcGradient_order, calcDerivative_order});
    std::vector<unsigned int> default_grad_exec_order(
@@ -372,7 +376,7 @@ std::vector<Weight *> Manager::requestWeights(
       * applied to the weight.
       */
      if (Weight::isGradientClipByGlobalNorm(clip_by_global_norm))
-      grad_exec_order.push_back(max_exec_order);
+      grad_exec_order.push_back(TensorPool::PERSIST_END_ORDER);
  
      Tensor *var = nullptr, *grad = nullptr;
      bool is_dependent = !shared_names.empty();
@@ -529,9 +533,9 @@ Manager::requestInputs(const GraphNode &node,
  
      inputs_v2.emplace_back(std::make_unique<Var_Grad>(
        requestTensor_(var_spec, node.getExecutionOrder(), node.getName(),
-                     tensor_pool),
+                     tensor_pool, false),
        requestTensor_(grad_spec, node.getExecutionOrder(), node.getName(),
-                     tensor_pool)));
+                     tensor_pool, false)));
    }
  
    ret.reserve(inputs_dim.size());
diff --git a/nntrainer/tensor/manager.h b/nntrainer/tensor/manager.h

index a93af9b..7308bad 100644 (file)
--- a/nntrainer/tensor/manager.h
+++ b/nntrainer/tensor/manager.h
@@ -172,15 +172,13 @@ public:
     * @param trainable make the weight trainable if true
     * @param shared_names name to refer to when the weights are borrowed from the
     * original source. if not shared pass empty vector
-   * @param max_exec_order the maximum execution order
     *
     * @return created weights list
     */
    std::vector<Weight *>
    requestWeights(const GraphNode &node,
                   const std::vector<Weight::Spec> &weights_spec, bool trainable,
-                 const std::vector<std::string> &shared_names,
-                 const unsigned int max_exec_order);
+                 const std::vector<std::string> &shared_names);
  
    /**
     * @brief     Create tensors with the given spec
diff --git a/nntrainer/tensor/tensor_pool.cpp b/nntrainer/tensor/tensor_pool.cpp

index 3b4db9f..648db6c 100644 (file)
--- a/nntrainer/tensor/tensor_pool.cpp
+++ b/nntrainer/tensor/tensor_pool.cpp
@@ -126,8 +126,14 @@ void TensorPool::finalize(const MemoryPlanner &planner,
  
      unsigned int validity_end = validity_start;
      for (unsigned int idx = 0; idx < details->exec_order.size(); idx++) {
-      if (details->exec_order[idx] <= end_order)
+      if (details->exec_order[idx] == PERSIST_END_ORDER) {
+        validity_end = end_order;
+        break;
+      }
+
+      if (details->exec_order[idx] <= end_order) {
          validity_end = std::max(validity_end, details->exec_order[idx]);
+      }
      }
  
      /**
diff --git a/nntrainer/tensor/tensor_pool.h b/nntrainer/tensor/tensor_pool.h

index 271463d..c3e590e 100644 (file)
--- a/nntrainer/tensor/tensor_pool.h
+++ b/nntrainer/tensor/tensor_pool.h
@@ -17,6 +17,7 @@
  #ifdef __cplusplus
  
  #include <functional>
+#include <limits>
  #include <memory>
  #include <unordered_map>
  #include <variant>
@@ -35,6 +36,8 @@ namespace nntrainer {
  class TensorPool {
  
  public:
+  static constexpr unsigned PERSIST_END_ORDER =
+    std::numeric_limits<unsigned>::max();
    /**
     * @brief     Constructor of TensorPool
     */
author	Jihoon Lee <jhoon.it.lee@samsung.com>
	Wed, 19 Jan 2022 10:40:02 +0000 (19:40 +0900)
committer	Jijoong Moon <jijoong.moon@samsung.com>
	Wed, 19 Jan 2022 23:31:47 +0000 (08:31 +0900)
nntrainer/graph/network_graph.cpp		patch \| blob \| history
nntrainer/tensor/manager.cpp		patch \| blob \| history
nntrainer/tensor/manager.h		patch \| blob \| history
nntrainer/tensor/tensor_pool.cpp		patch \| blob \| history
nntrainer/tensor/tensor_pool.h		patch \| blob \| history