[neurun] Set input/output as common tensor,Implement tensor conversion kernels (...
author김수진/동작제어Lab(SR)/Engineer/삼성전자 <sjsujin.kim@samsung.com>
Wed, 8 Aug 2018 07:54:33 +0000 (16:54 +0900)
committer이춘석/동작제어Lab(SR)/Staff Engineer/삼성전자 <chunseok.lee@samsung.com>
Wed, 8 Aug 2018 07:54:33 +0000 (16:54 +0900)
Related : #2142, #2040

- Set input/output as common tensor
- Implement tensor conversion kernels

Tests are matched.

Signed-off-by: sjsujinkim sjsujin.kim@samsung.com
runtimes/neurun/src/frontend/execution.cc
runtimes/neurun/src/internal/Sink.h
runtimes/neurun/src/internal/Source.h
runtimes/neurun/src/kernel/acl_cl/TensorConvertFromCommonLayer.cc
runtimes/neurun/src/kernel/acl_cl/TensorConvertToCommonLayer.cc
runtimes/neurun/src/kernel/cpufallback/TensorConvertFromCommonLayer.cc
runtimes/neurun/src/kernel/cpufallback/TensorConvertToCommonLayer.cc

index 13e4f51..6ea274e 100644 (file)
@@ -142,7 +142,7 @@ int ANeuralNetworksExecution_startCompute(ANeuralNetworksExecution *execution,
   {
     auto setter = [&](::arm_compute::ITensor &tensor) { execution->source(n).push(tensor); };
 
-    auto objects = plan.operands().at(model.inputs.at(n));
+    auto objects = plan.common_operands().at(model.inputs.at(n));
 
     for (auto object : objects)
     {
@@ -162,7 +162,7 @@ int ANeuralNetworksExecution_startCompute(ANeuralNetworksExecution *execution,
   {
     auto getter = [&](::arm_compute::ITensor &tensor) { execution->sink(n).pull(tensor); };
 
-    auto objects = plan.operands().at(model.outputs.at(n));
+    auto objects = plan.common_operands().at(model.outputs.at(n));
 
     for (auto object : objects)
     {
index 59dedf4..0d5cd5e 100644 (file)
@@ -12,6 +12,8 @@
 #include "internal/nnapi/feature/View.h"
 #include "internal/nnapi/feature/Reader.h"
 
+#include "internal/common/Tensor.h"
+
 struct Sink
 {
   virtual ~Sink() = default;
@@ -33,6 +35,9 @@ public:
 public:
   void pull(::arm_compute::ITensor &tensor) const override
   {
+    // Only for common tensor now
+    assert(typeid(tensor) == typeid(::internal::common::Tensor));
+
     float *base = reinterpret_cast<float *>(_base);
 
     for (int32_t n = 0; n < _vlen; ++n)
@@ -64,27 +69,17 @@ public:
 public:
   void pull(::arm_compute::ITensor &tensor) const override
   {
-    // TODO: This is just workaround codes, It needs to refactor.
-    if (typeid(tensor) == typeid(::internal::cpu::Tensor))
-    {
-      const ::internal::nnapi::feature::Reader<float> from{_shape, tensor.buffer(), _size};
-      ::internal::nnapi::feature::View<float> into{_shape, _base, _size};
+    // Only for common tensor now
+    assert(typeid(tensor) == typeid(::internal::common::Tensor));
 
-      ::nnfw::util::feature::iterate(_shape) << [&](uint32_t ch, uint32_t row, uint32_t col) {
-        const auto value = from.at(ch, row, col);
-        into.at(ch, row, col) = value;
-      };
-    }
-    else if (typeid(tensor) == typeid(::arm_compute::CLTensor))
-    {
-      const ::internal::arm_compute::feature::View<float> from{&tensor};
-      ::internal::nnapi::feature::View<float> into{_shape, _base, _size};
+    // nnapi tensor ordering == common tensor ordering
+    const ::internal::nnapi::feature::Reader<float> from{_shape, tensor.buffer(), _size};
+    ::internal::nnapi::feature::View<float> into{_shape, _base, _size};
 
-      ::nnfw::util::feature::iterate(_shape) << [&](uint32_t ch, uint32_t row, uint32_t col) {
-        const auto value = from.at(ch, row, col);
-        into.at(ch, row, col) = value;
-      };
-    }
+    ::nnfw::util::feature::iterate(_shape) << [&](uint32_t ch, uint32_t row, uint32_t col) {
+      const auto value = from.at(ch, row, col);
+      into.at(ch, row, col) = value;
+    };
   }
 
 private:
index e539d29..4b00285 100644 (file)
@@ -14,6 +14,8 @@
 
 #include "backend/acl_cl/feature/View.h"
 
+#include "internal/common/Tensor.h"
+
 struct Source
 {
   virtual ~Source() = default;
@@ -36,6 +38,9 @@ public:
 public:
   void push(::arm_compute::ITensor &tensor) const override
   {
+    // Only for common tensor now
+    assert(typeid(tensor) == typeid(::internal::common::Tensor));
+
     auto base = reinterpret_cast<const float *>(_base);
 
     for (int32_t n = 0; n < _vlen; ++n)
@@ -67,27 +72,17 @@ public:
 public:
   void push(::arm_compute::ITensor &tensor) const override
   {
-    // TODO: This is just workaround codes, It needs to refactor.
-    if (typeid(tensor) == typeid(::internal::cpu::Tensor))
-    {
-      const ::internal::nnapi::feature::Reader<float> from{_shape, _base, _size};
-      ::internal::nnapi::feature::View<float> into{_shape, tensor.buffer(), _size};
+    // Only for common tensor now
+    assert(typeid(tensor) == typeid(::internal::common::Tensor));
 
-      ::nnfw::util::feature::iterate(_shape) << [&](uint32_t ch, uint32_t row, uint32_t col) {
-        const auto value = from.at(ch, row, col);
-        into.at(ch, row, col) = value;
-      };
-    }
-    else if (typeid(tensor) == typeid(::arm_compute::CLTensor))
-    {
-      const ::internal::nnapi::feature::Reader<float> from{_shape, _base, _size};
-      ::internal::arm_compute::feature::View<float> into{&tensor};
+    // nnapi tensor ordering == common tensor ordering
+    const ::internal::nnapi::feature::Reader<float> from{_shape, _base, _size};
+    ::internal::nnapi::feature::View<float> into{_shape, tensor.buffer(), _size};
 
-      ::nnfw::util::feature::iterate(_shape) << [&](uint32_t ch, uint32_t row, uint32_t col) {
-        const auto value = from.at(ch, row, col);
-        into.at(ch, row, col) = value;
-      };
-    }
+    ::nnfw::util::feature::iterate(_shape) << [&](uint32_t ch, uint32_t row, uint32_t col) {
+      const auto value = from.at(ch, row, col);
+      into.at(ch, row, col) = value;
+    };
   }
 
 private:
index 7469fc6..27a0e86 100644 (file)
 
 #include "TensorConvertFromCommonLayer.h"
 
-#include "logging.h"
+#include "internal/nnapi/feature/Reader.h"
+#include "backend/acl_cl/feature/View.h"
+
+#include <util/feature/IndexIterator.h>
+#include <arm_compute/runtime/CL/CLScheduler.h>
 
 namespace neurun
 {
@@ -27,9 +31,42 @@ namespace acl_cl
 
 bool TensorConvertFromCommonLayer::convert()
 {
-  VERBOSE(TensorConvertFromCommonLayer)
-      << "Tensor conversion from common, but it is not yet implemented." << std::endl;
-  return true;
+  auto inputBuffer = _inputTensor->buffer();
+  auto inputSize = _inputTensor->info()->total_size();
+
+  auto &queue = ::arm_compute::CLScheduler::get().queue();
+
+  _outputTensor->map(queue);
+
+  if (_tensorShape.rank() == 2)
+  {
+    const auto len = _tensorShape.dim(1);
+
+    auto base = reinterpret_cast<const float *>(inputBuffer);
+
+    for (int32_t n = 0; n < len; ++n)
+    {
+      auto from = base + n;
+      auto into =
+          reinterpret_cast<float *>(_outputTensor->ptr_to_element(::arm_compute::Coordinates{n}));
+
+      *into = *from;
+    }
+  }
+  else if (_tensorShape.rank() == 4)
+  {
+    auto featureShape = _tensorShape.asFeature();
+
+    const ::internal::nnapi::feature::Reader<float> from{featureShape, inputBuffer, inputSize};
+    ::internal::arm_compute::feature::View<float> into{_outputTensor};
+
+    ::nnfw::util::feature::iterate(featureShape) << [&](uint32_t ch, uint32_t row, uint32_t col) {
+      const auto value = from.at(ch, row, col);
+      into.at(ch, row, col) = value;
+    };
+  }
+
+  _outputTensor->unmap(queue);
 }
 
 void TensorConvertFromCommonLayer::configure(::internal::common::Tensor *inputTensor,
index 2e0851e..aebab30 100644 (file)
 
 #include "TensorConvertToCommonLayer.h"
 
-#include "logging.h"
+#include "backend/acl_cl/feature/View.h"
+#include "internal/nnapi/feature/View.h"
+
+#include <util/feature/IndexIterator.h>
+#include <arm_compute/runtime/CL/CLScheduler.h>
 
 namespace neurun
 {
@@ -27,9 +31,42 @@ namespace acl_cl
 
 bool TensorConvertToCommonLayer::convert()
 {
-  VERBOSE(TensorConvertToCommonLayer)
-      << "Tensor conversion to common, but it is not yet implemented." << std::endl;
-  return true;
+  auto outputBuffer = _outputTensor->buffer();
+  auto outputSize = _outputTensor->info()->total_size();
+
+  auto &queue = ::arm_compute::CLScheduler::get().queue();
+
+  _inputTensor->map(queue);
+
+  if (_tensorShape.rank() == 2)
+  {
+    const auto len = _tensorShape.dim(1);
+
+    auto base = reinterpret_cast<float *>(outputBuffer);
+
+    for (int32_t n = 0; n < len; ++n)
+    {
+      auto from = reinterpret_cast<const float *>(
+          _inputTensor->ptr_to_element(::arm_compute::Coordinates{n}));
+      auto into = base + n;
+
+      *into = *from;
+    }
+  }
+  else if (_tensorShape.rank() == 4)
+  {
+    auto featureShape = _tensorShape.asFeature();
+
+    const ::internal::arm_compute::feature::View<float> from{_inputTensor};
+    ::internal::nnapi::feature::View<float> into{featureShape, outputBuffer, outputSize};
+
+    ::nnfw::util::feature::iterate(featureShape) << [&](uint32_t ch, uint32_t row, uint32_t col) {
+      const auto value = from.at(ch, row, col);
+      into.at(ch, row, col) = value;
+    };
+  }
+
+  _inputTensor->unmap(queue);
 }
 
 void TensorConvertToCommonLayer::configure(::arm_compute::ICLTensor *inputTensor,
index 18e2644..20d620d 100644 (file)
 
 #include "TensorConvertFromCommonLayer.h"
 
-#include "logging.h"
+#include "internal/nnapi/feature/Reader.h"
+#include "internal/nnapi/feature/View.h"
+
+#include <util/feature/IndexIterator.h>
 
 namespace neurun
 {
@@ -27,9 +30,39 @@ namespace cpu
 
 bool TensorConvertFromCommonLayer::convert()
 {
-  VERBOSE(TensorConvertFromCommonLayer)
-      << "Tensor conversion from common, but it is not yet implemented." << std::endl;
-  return true;
+  auto inputBuffer = _inputTensor->buffer();
+  auto inputSize = _inputTensor->info()->total_size();
+
+  auto outputBuffer = _outputTensor->buffer();
+  auto outputSize = _outputTensor->info()->total_size();
+
+  if (_tensorShape.rank() == 2)
+  {
+    const auto len = _tensorShape.dim(1);
+
+    auto base = reinterpret_cast<const float *>(inputBuffer);
+
+    for (int32_t n = 0; n < len; ++n)
+    {
+      auto from = base + n;
+      auto into =
+          reinterpret_cast<float *>(_outputTensor->ptr_to_element(::arm_compute::Coordinates{n}));
+
+      *into = *from;
+    }
+  }
+  else if (_tensorShape.rank() == 4)
+  {
+    auto featureShape = _tensorShape.asFeature();
+
+    const ::internal::nnapi::feature::Reader<float> from{featureShape, inputBuffer, inputSize};
+    ::internal::nnapi::feature::View<float> into{featureShape, outputBuffer, outputSize};
+
+    ::nnfw::util::feature::iterate(featureShape) << [&](uint32_t ch, uint32_t row, uint32_t col) {
+      const auto value = from.at(ch, row, col);
+      into.at(ch, row, col) = value;
+    };
+  }
 }
 
 void TensorConvertFromCommonLayer::configure(::internal::common::Tensor *inputTensor,
index a19a5cf..cebb91a 100644 (file)
 
 #include "TensorConvertToCommonLayer.h"
 
-#include "logging.h"
+#include "internal/nnapi/feature/Reader.h"
+#include "internal/nnapi/feature/View.h"
+
+#include <util/feature/IndexIterator.h>
 
 namespace neurun
 {
@@ -27,9 +30,39 @@ namespace cpu
 
 bool TensorConvertToCommonLayer::convert()
 {
-  VERBOSE(ConvertToCommon) << "Tensor conversion to common, but it is not yet implemented."
-                           << std::endl;
-  return true;
+  auto inputBuffer = _inputTensor->buffer();
+  auto inputSize = _inputTensor->info()->total_size();
+
+  auto outputBuffer = _outputTensor->buffer();
+  auto outputSize = _outputTensor->info()->total_size();
+
+  if (_tensorShape.rank() == 2)
+  {
+    const auto len = _tensorShape.dim(1);
+
+    auto base = reinterpret_cast<float *>(outputBuffer);
+
+    for (int32_t n = 0; n < len; ++n)
+    {
+      auto from = reinterpret_cast<const float *>(
+          _inputTensor->ptr_to_element(::arm_compute::Coordinates{n}));
+      auto into = base + n;
+
+      *into = *from;
+    }
+  }
+  else if (_tensorShape.rank() == 4)
+  {
+    auto featureShape = _tensorShape.asFeature();
+
+    const ::internal::nnapi::feature::Reader<float> from{featureShape, inputBuffer, inputSize};
+    ::internal::nnapi::feature::View<float> into{featureShape, outputBuffer, outputSize};
+
+    ::nnfw::util::feature::iterate(featureShape) << [&](uint32_t ch, uint32_t row, uint32_t col) {
+      const auto value = from.at(ch, row, col);
+      into.at(ch, row, col) = value;
+    };
+  }
 }
 
 void TensorConvertToCommonLayer::configure(::internal::cpu::Tensor *inputTensor,