[neurun] Apply tensor lifetime-info(use-def) to Linear/TensorBuilder (#3409)

author 김용섭/동작제어Lab(SR)/Engineer/삼성전자 <yons.kim@samsung.com>

Mon, 5 Nov 2018 01:12:03 +0000 (10:12 +0900)

committer 오형석/동작제어Lab(SR)/Staff Engineer/삼성전자 <hseok82.oh@samsung.com>

Mon, 5 Nov 2018 01:12:03 +0000 (10:12 +0900)
author 김용섭/동작제어Lab(SR)/Engineer/삼성전자 <yons.kim@samsung.com>
Mon, 5 Nov 2018 01:12:03 +0000 (10:12 +0900)
committer 오형석/동작제어Lab(SR)/Staff Engineer/삼성전자 <hseok82.oh@samsung.com>
Mon, 5 Nov 2018 01:12:03 +0000 (10:12 +0900)
diff --git a/runtimes/neurun/src/backend/acl_cl/TensorBuilder.cc b/runtimes/neurun/src/backend/acl_cl/TensorBuilder.cc

index 262f4f2..c0cc2c8 100644 (file)
--- a/runtimes/neurun/src/backend/acl_cl/TensorBuilder.cc
+++ b/runtimes/neurun/src/backend/acl_cl/TensorBuilder.cc
@@ -34,14 +34,19 @@ TensorBuilder::TensorBuilder()
    // DO NOTHING
  }
  
-void TensorBuilder::mark(const ::neurun::graph::operand::Index &ind,
-                         const ::arm_compute::TensorInfo &info)
+void TensorBuilder::notifyFirstUse(const graph::operand::Index &ind,
+                                   const ::arm_compute::TensorInfo &info)
  {
    assert(_tensors.size() == 0);
  
    _tensor_info_map.insert({ind, info});
  }
  
+void TensorBuilder::notifyLastUse(const graph::operand::Index &)
+{
+  // DO NOTHING
+}
+
  void TensorBuilder::prepare(void)
  {
    assert(_tensors.size() == 0);
diff --git a/runtimes/neurun/src/backend/acl_cl/TensorBuilder.h b/runtimes/neurun/src/backend/acl_cl/TensorBuilder.h

index 78a985a..d57cb5c 100644 (file)
--- a/runtimes/neurun/src/backend/acl_cl/TensorBuilder.h
+++ b/runtimes/neurun/src/backend/acl_cl/TensorBuilder.h
@@ -35,8 +35,10 @@ class TensorBuilder : public ITensorBuilder
  public:
    TensorBuilder();
  
-  virtual void mark(const ::neurun::graph::operand::Index &ind,
-                    const ::arm_compute::TensorInfo &info) override;
+  virtual void notifyFirstUse(const graph::operand::Index &,
+                              const ::arm_compute::TensorInfo &) override;
+  virtual void notifyLastUse(const graph::operand::Index &) override;
+
    virtual void prepare(void) override;
    virtual void allocate(void) override;
  
diff --git a/runtimes/neurun/src/backend/cpu/MemoryAllocator.cc b/runtimes/neurun/src/backend/cpu/MemoryAllocator.cc

index 889f94c..b9cc213 100644 (file)
--- a/runtimes/neurun/src/backend/cpu/MemoryAllocator.cc
+++ b/runtimes/neurun/src/backend/cpu/MemoryAllocator.cc
@@ -48,9 +48,10 @@ void BumpAllocator::finalize()
  {
    assert(!_base && _pos != 0);
  
-  VERBOSE(BP_ALLOC) << "final position: " << _pos << std::endl;
-
    _base = new uint8_t[_pos];
+
+  VERBOSE(BP_ALLOC) << "final position: " << _pos << std::endl;
+  VERBOSE(BP_ALLOC) << "base pointer: " << static_cast<void *>(_base) << std::endl;
  }
  
  void BumpAllocator::free(const graph::operand::Index &index)
diff --git a/runtimes/neurun/src/backend/cpu/TensorBuilder.cc b/runtimes/neurun/src/backend/cpu/TensorBuilder.cc

index 4bb21b6..c349a67 100644 (file)
--- a/runtimes/neurun/src/backend/cpu/TensorBuilder.cc
+++ b/runtimes/neurun/src/backend/cpu/TensorBuilder.cc
@@ -20,6 +20,7 @@
  
  #include "operand/Object.h"
  #include "MemoryAllocator.h"
+#include "logging.h"
  
  namespace neurun
  {
@@ -34,12 +35,28 @@ TensorBuilder::TensorBuilder() : _mem_alloc(std::make_shared<BumpAllocator>())
    // DO NOTHING
  }
  
-void TensorBuilder::mark(const ::neurun::graph::operand::Index &ind,
-                         const ::arm_compute::TensorInfo &info)
+void TensorBuilder::notifyFirstUse(const graph::operand::Index &ind,
+                                   const ::arm_compute::TensorInfo &info)
  {
-  assert(_tensors.size() == 0);
+  assert(_mem_alloc);
  
    _tensor_info_map.insert({ind, info});
+
+  const auto size = info.total_size();
+  auto mem_blk = _mem_alloc->allocate(ind, size);
+  _tensor_mem_map[ind] = mem_blk;
+
+  VERBOSE(CPU_TENSORBUILDER) << "ASSIGN(#" << ind.value() << "): mem_blk[" << mem_blk.offset << ", "
+                             << mem_blk.size << "]" << std::endl;
+}
+
+void TensorBuilder::notifyLastUse(const graph::operand::Index &ind)
+{
+  assert(_mem_alloc);
+
+  _mem_alloc->free(ind);
+
+  VERBOSE(CPU_TENSORBUILDER) << "UNASSIGN(#" << ind.value() << ")" << std::endl;
  }
  
  void TensorBuilder::prepare(void)
@@ -47,26 +64,6 @@ void TensorBuilder::prepare(void)
    assert(_tensors.size() == 0);
    assert(_mem_alloc);
  
-  for (auto &entry : _tensor_info_map)
-  {
-    auto ind = entry.first;
-    const auto &info = entry.second;
-    auto tensor = std::make_shared<operand::Tensor>(info);
-    _tensors[ind] = tensor;
-    // If we do not make tensor here currently, stages would cause segment fault
-
-    const auto size = info.total_size(); // NOTE This size may not be accurate
-    auto mem_blk = _mem_alloc->allocate(ind, size);
-    _tensor_mem_map[ind] = mem_blk;
-  }
-  assert(_tensor_info_map.size() == _tensor_mem_map.size());
-
-  // TODO below code can be moved in TensorBuild::allocate()
-  // if StageGerator was modified like
-  //   from
-  //     fn->configure(ifm_alloc->buffer(), param.ifm_shape, ker_alloc->buffer(), param.ker_shape,
-  //   to
-  //     fn->configure(ifm_alloc, param.ifm_shape, ker_alloc, param.ker_shape,
    _mem_alloc->finalize();
    assert(_mem_alloc->base());
  
@@ -74,15 +71,22 @@ void TensorBuilder::prepare(void)
    {
      auto ind = entry.first;
      auto mem_blk = entry.second;
-    auto &tensor = _tensors[ind];
-    tensor->setBuffer(_mem_alloc->base() + mem_blk.offset);
+    const auto &info = _tensor_info_map[ind];
+
+    uint8_t *buffer = _mem_alloc->base() + mem_blk.offset;
+    auto tensor = std::make_shared<operand::Tensor>(info);
+    tensor->setBuffer(buffer);
+    _tensors[ind] = tensor;
+
+    VERBOSE(CPU_TENSORBUILDER) << "TENSOR(#" << ind.value() << "): " << static_cast<void *>(buffer)
+                               << std::endl;
+
+    // If we do not make tensor here currently, stages would cause segment fault
    }
  }
  
  void TensorBuilder::allocate(void)
  {
-  assert(_tensor_info_map.size() == _tensors.size());
-
    // NOTE For now nothing to do. Allocation is done in prepare stage, which is wrong
  }
  
diff --git a/runtimes/neurun/src/backend/cpu/TensorBuilder.h b/runtimes/neurun/src/backend/cpu/TensorBuilder.h

index 0c126d2..768f6ab 100644 (file)
--- a/runtimes/neurun/src/backend/cpu/TensorBuilder.h
+++ b/runtimes/neurun/src/backend/cpu/TensorBuilder.h
@@ -36,8 +36,10 @@ class TensorBuilder : public ITensorBuilder
  public:
    TensorBuilder();
  
-  virtual void mark(const ::neurun::graph::operand::Index &ind,
-                    const ::arm_compute::TensorInfo &info) override;
+  virtual void notifyFirstUse(const graph::operand::Index &,
+                              const ::arm_compute::TensorInfo &) override;
+  virtual void notifyLastUse(const graph::operand::Index &) override;
+
    virtual void prepare(void) override;
    virtual void allocate(void) override;
  
diff --git a/runtimes/neurun/src/backend/interface/ITensorBuilder.h b/runtimes/neurun/src/backend/interface/ITensorBuilder.h

index 9d0a6bb..d968c47 100644 (file)
--- a/runtimes/neurun/src/backend/interface/ITensorBuilder.h
+++ b/runtimes/neurun/src/backend/interface/ITensorBuilder.h
@@ -33,8 +33,10 @@ struct ITensorBuilder
    using IterateFunction = std::function<void(const graph::operand::Index &)>;
  
    virtual ~ITensorBuilder(void) = default;
-  virtual void mark(const ::neurun::graph::operand::Index &ind,
-                    const ::arm_compute::TensorInfo &info) = 0;
+
+  virtual void notifyFirstUse(const graph::operand::Index &, const ::arm_compute::TensorInfo &) = 0;
+  virtual void notifyLastUse(const graph::operand::Index &) = 0;
+
    // TODO Add an interface for adding subsumption info
    virtual void prepare(void) = 0;
    virtual void allocate(void) = 0;
diff --git a/runtimes/neurun/src/frontend/wrapper/compilation.cc b/runtimes/neurun/src/frontend/wrapper/compilation.cc

index 4258085..5f448d0 100644 (file)
--- a/runtimes/neurun/src/frontend/wrapper/compilation.cc
+++ b/runtimes/neurun/src/frontend/wrapper/compilation.cc
@@ -65,10 +65,10 @@ int ANeuralNetworksCompilation::finish()
  
    neurun::codegen::PlanBuilder plan_builder{plan};
  
-  auto tensor_builders = linear->markTensors();
-
    linear->accept(neurun::codegen::Planner{operands, plan_builder});
  
+  auto tensor_builders = linear->planTensors();
+
    // TODO Add optimization passes
    plan_builder.finalize(tensor_builders);
  
diff --git a/runtimes/neurun/src/linear/Linear.cc b/runtimes/neurun/src/linear/Linear.cc

index 41921a5..162477d 100644 (file)
--- a/runtimes/neurun/src/linear/Linear.cc
+++ b/runtimes/neurun/src/linear/Linear.cc
@@ -22,6 +22,8 @@
  #include "backend/interface/IStageGenerator.h"
  #include "internal/Convert.h"
  
+#include "logging.h"
+
  namespace neurun
  {
  namespace linear
@@ -50,23 +52,115 @@ void Linear::accept(graph::operation::NodeVisitor &&visitor) const
    }
  }
  
-backend::TensorBuilderSet Linear::markTensors() const
+backend::TensorBuilderSet Linear::planTensors()
  {
+  using ITensorBuilderPtr = std::shared_ptr<backend::ITensorBuilder>;
+  using FnOnTensorBuilder =
+      std::function<void(const graph::operand::Index &ind, ITensorBuilderPtr)>;
+
+  const auto &operands = _graph.operands();
+  auto iterTensorBuilders = [&operands](const graph::operand::Index &ind, FnOnTensorBuilder fn) {
+    const auto &obj = operands.at(ind);
+    for (auto backend : obj.lower_info()->def_backends())
+    {
+      auto tensor_builder = backend->tensor_builder();
+      fn(ind, tensor_builder);
+    }
+  };
+
    backend::TensorBuilderSet tensor_builders;
  
+  std::unordered_map<graph::operand::Index, uint32_t> uses_map;
+  std::vector<graph::operand::Index> constants;
+
    _graph.operands().iterate(
        [&](const graph::operand::Index &ind, const graph::operand::Object &obj) {
-        for (auto backend : obj.lower_info()->def_backends())
-        {
-          auto tensor_builder = backend->tensor_builder();
-          const auto info = ::internal::asTensorInfo(obj.shape(), obj.typeInfo());
+        uses_map[ind] = obj.getUses().size();
  
-          tensor_builder->mark(ind, info);
+        // If a tensor is a constant, increase the use of the tensor.
+        // It makes the tensor not be dealloced.
+        if (obj.getUsage() == graph::operand::OperandUsage::CONSTANT)
+        {
+          constants.push_back(ind);
+          uses_map[ind]++;
+        }
  
+        // Prepare tensor builders to be returned
+        iterTensorBuilders(ind, [&tensor_builders](const graph::operand::Index &,
+                                                   ITensorBuilderPtr tensor_builder) {
            tensor_builders.insert(tensor_builder);
-        }
+        });
        });
  
+  // If a tensor is model output, increase the use of the tensor.
+  // This aim is same to above one.
+  for (const auto &ind : _graph.getOutputs())
+  {
+    uses_map[ind]++;
+  }
+
+  // Allocate constant operands first
+  VERBOSE(LINEAR) << "TENSORS as CONSTANT" << std::endl;
+  for (const auto &ind : constants)
+  {
+    const auto &obj = operands.at(ind);
+    const auto info = ::internal::asTensorInfo(obj.shape(), obj.typeInfo());
+    iterTensorBuilders(ind,
+                       [&info](const graph::operand::Index &ind, ITensorBuilderPtr tensor_builder) {
+                         tensor_builder->notifyFirstUse(ind, info);
+                       });
+  }
+
+  // Allocate Model's inputs
+  VERBOSE(LINEAR) << "TENSORS as MODEL INPUT" << std::endl;
+  for (const auto &ind : _graph.getInputs())
+  {
+    const auto &obj = operands.at(ind);
+    const auto info = ::internal::asTensorInfo(obj.shape(), obj.typeInfo());
+    iterTensorBuilders(ind,
+                       [&info](const graph::operand::Index &ind, ITensorBuilderPtr tensor_builder) {
+                         tensor_builder->notifyFirstUse(ind, info);
+                       });
+  }
+
+  // At each operation,
+  //   1. Scan USE of inputs. Decrease the USE and deallocate if the USE is 0
+  //   2. Scan DEF of outputs. If the DEF, allocate it
+  VERBOSE(LINEAR) << "TENSORS" << std::endl;
+  for (const auto op : _operations)
+  {
+    for (const auto &ind : op->getOutputs())
+    {
+      const auto &obj = operands.at(ind);
+      if (obj.getDef().size())
+      {
+        const auto info = ::internal::asTensorInfo(obj.shape(), obj.typeInfo());
+        iterTensorBuilders(
+            ind, [&info](const graph::operand::Index &ind, ITensorBuilderPtr tensor_builder) {
+              tensor_builder->notifyFirstUse(ind, info);
+            });
+      }
+    }
+
+    for (const auto &ind : op->getInputs())
+    {
+      uses_map[ind]--;
+      if (uses_map[ind] == 0)
+      {
+        iterTensorBuilders(ind,
+                           [](const graph::operand::Index &ind, ITensorBuilderPtr tensor_builder) {
+                             tensor_builder->notifyLastUse(ind);
+                           });
+      }
+    }
+  }
+
+#ifndef NDEBUG
+  // Now, model outputs should be not deallocated
+  for (const auto &ind : _graph.getOutputs())
+    assert(uses_map[ind] > 0);
+#endif
+
    return tensor_builders;
  }
  
diff --git a/runtimes/neurun/src/linear/Linear.h b/runtimes/neurun/src/linear/Linear.h

index 46815e4..16ef785 100644 (file)
--- a/runtimes/neurun/src/linear/Linear.h
+++ b/runtimes/neurun/src/linear/Linear.h
@@ -58,7 +58,7 @@ public:
    void accept(graph::operation::NodeVisitor &&visitor) const;
  
    // TODO Should not return TensorBuilderSet
-  backend::TensorBuilderSet markTensors() const;
+  backend::TensorBuilderSet planTensors();
  
  private:
    const graph::Graph &_graph;
author	김용섭/동작제어Lab(SR)/Engineer/삼성전자 <yons.kim@samsung.com>
	Mon, 5 Nov 2018 01:12:03 +0000 (10:12 +0900)
committer	오형석/동작제어Lab(SR)/Staff Engineer/삼성전자 <hseok82.oh@samsung.com>
	Mon, 5 Nov 2018 01:12:03 +0000 (10:12 +0900)
runtimes/neurun/src/backend/acl_cl/TensorBuilder.cc		patch \| blob \| history
runtimes/neurun/src/backend/acl_cl/TensorBuilder.h		patch \| blob \| history
runtimes/neurun/src/backend/cpu/MemoryAllocator.cc		patch \| blob \| history
runtimes/neurun/src/backend/cpu/TensorBuilder.cc		patch \| blob \| history
runtimes/neurun/src/backend/cpu/TensorBuilder.h		patch \| blob \| history
runtimes/neurun/src/backend/interface/ITensorBuilder.h		patch \| blob \| history
runtimes/neurun/src/frontend/wrapper/compilation.cc		patch \| blob \| history
runtimes/neurun/src/linear/Linear.cc		patch \| blob \| history
runtimes/neurun/src/linear/Linear.h		patch \| blob \| history