From: 김수진/동작제어Lab(SR)/Engineer/삼성전자 <sjsujin.kim@samsung.com>
Date: Tue, 16 Oct 2018 09:58:09 +0000 (+0900)
Subject: [neurun] Support to permute for all other dimensions (#3187)
X-Git-Tag: 0.3~616
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=3e148fb564f6ced3f3ff06764fe19b8bda55e4c6;p=platform%2Fcore%2Fml%2Fnnfw.git

[neurun] Support to permute for all other dimensions (#3187)

Related : #3106, #2874

Part of : #3178 PR

This commit supports to permute for all other dimensions.

Signed-off-by: sjsujinkim <sjsujin.kim@samsung.com>
---

diff --git a/runtimes/neurun/src/kernel/cpu/PermuteLayer.cc b/runtimes/neurun/src/kernel/cpu/PermuteLayer.cc
index 3ffaa2a..e63b445 100644
--- a/runtimes/neurun/src/kernel/cpu/PermuteLayer.cc
+++ b/runtimes/neurun/src/kernel/cpu/PermuteLayer.cc
@@ -50,49 +50,170 @@ void PermuteLayer::run()
   auto output_buffer = _output->buffer();
   auto output_size = _output->info()->total_size();
 
-  assert(_shape.rank() == 4);
-  auto feature = _shape.asFeature();
+  auto rank = _shape.rank();
 
   switch (_type)
   {
     case Type::NHWC_TO_NCHW:
     {
-      const util::feature::nhwc::Reader<float> from{
-          feature, reinterpret_cast<const float *>(input_buffer), input_size};
-      util::feature::nchw::View<float> into{_output};
-
-      // TODO Fix this workaround (We may need codegen::operand::Object instead of ITensor)
       auto &queue = ::arm_compute::CLScheduler::get().queue();
       auto _output_cl = dynamic_cast<::arm_compute::ICLTensor *>(_output);
       _output_cl->map(queue);
-
-      ::nnfw::util::feature::iterate(feature)
-          << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
-               const auto value = from.at(batch, ch, row, col);
-               into.at(batch, ch, row, col) = value;
-             };
-
+      switch (rank)
+      {
+        case 0:
+        case 1:
+        {
+          memcpy(output_buffer, input_buffer, input_size);
+          break;
+        }
+        case 2:
+        {
+          using ::arm_compute::Window;
+          using ::arm_compute::Iterator;
+
+          auto matrix_shape = _shape.asMatrix();
+
+          Window window;
+          window.use_tensor_dimensions(_output->info()->tensor_shape(), Window::DimY);
+
+          Iterator it(_output, window);
+
+          const auto &y = window[Window::DimY];
+          for (auto h = y.start(); h < y.end(); h += y.step(), it.increment(Window::DimY))
+          {
+            memcpy(it.ptr(), input_buffer + h * matrix_shape.W,
+                   matrix_shape.W * sizeof(input_buffer));
+          }
+          break;
+        }
+        case 3:
+        {
+          using ::arm_compute::Window;
+          using ::arm_compute::Iterator;
+
+          const int32_t height_width = _shape.dim(1) * _shape.dim(2);
+          const int32_t width = _shape.dim(2);
+
+          Window window;
+          window.use_tensor_dimensions(_output->info()->tensor_shape(), Window::DimY);
+
+          Iterator it(_output, window);
+
+          const auto &z = window[Window::DimZ];
+          const auto &y = window[Window::DimY];
+          for (auto c = z.start(); c < z.end(); c += z.step(), it.increment(Window::DimZ))
+          {
+            for (auto h = y.start(); h < y.end(); h += y.step(), it.increment(Window::DimY))
+            {
+              memcpy(it.ptr(), input_buffer + c * height_width + h * width,
+                     width * sizeof(input_buffer));
+            }
+          }
+          break;
+        }
+        case 4:
+        {
+          auto feature = _shape.asFeature();
+
+          const util::feature::nhwc::Reader<float> from{
+              feature, reinterpret_cast<const float *>(input_buffer), input_size};
+          util::feature::nchw::View<float> into{_output};
+
+          // TODO Fix this workaround (We may need codegen::operand::Object instead of ITensor)
+          ::nnfw::util::feature::iterate(feature)
+              << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
+                   const auto value = from.at(batch, ch, row, col);
+                   into.at(batch, ch, row, col) = value;
+                 };
+          break;
+        }
+        default:
+          throw "NYI";
+          break;
+      }
       _output_cl->unmap(queue); // TODO Likewise above
 
       break;
     }
     case Type::NCHW_TO_NHWC:
     {
-      // TODO Fix this workaround (We may need codegen::operand::Object instead of ITensor)
       auto &queue = ::arm_compute::CLScheduler::get().queue();
       auto _input_cl = dynamic_cast<::arm_compute::ICLTensor *>(_input);
       _input_cl->map(queue);
-
-      const util::feature::nchw::View<float> from{_input};
-      util::feature::nhwc::View<float> into{feature, reinterpret_cast<float *>(output_buffer),
-                                            output_size};
-
-      ::nnfw::util::feature::iterate(feature)
-          << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
-               const auto value = from.at(batch, ch, row, col);
-               into.at(batch, ch, row, col) = value;
-             };
-
+      switch (rank)
+      {
+        case 0:
+        case 1:
+        {
+          memcpy(output_buffer, input_buffer, output_size);
+          break;
+        }
+        case 2:
+        {
+          using ::arm_compute::Window;
+          using ::arm_compute::Iterator;
+
+          Window window;
+          window.use_tensor_dimensions(_input->info()->tensor_shape(), Window::DimY);
+
+          Iterator it(_input, window);
+
+          int output_width = _shape.asMatrix().W;
+
+          const auto &y = window[Window::DimY];
+          for (auto h = y.start(); h < y.end(); h += y.step(), it.increment(Window::DimY))
+          {
+            memcpy(output_buffer + h * output_width, it.ptr(),
+                   output_width * sizeof(output_buffer));
+          }
+          break;
+        }
+        case 3:
+        {
+          using ::arm_compute::Window;
+          using ::arm_compute::Iterator;
+
+          const int32_t height_width = _shape.dim(1) * _shape.dim(2);
+          const int32_t width = _shape.dim(2);
+
+          Window window;
+          window.use_tensor_dimensions(_input->info()->tensor_shape(), Window::DimY);
+
+          Iterator it(_input, window);
+
+          const auto &z = window[Window::DimZ];
+          const auto &y = window[Window::DimY];
+          for (auto c = z.start(); c < z.end(); c += z.step(), it.increment(Window::DimZ))
+          {
+            for (auto h = y.start(); h < y.end(); h += y.step(), it.increment(Window::DimY))
+            {
+              memcpy(output_buffer + c * height_width + h * width, it.ptr(),
+                     width * sizeof(output_buffer));
+            }
+          }
+          break;
+        }
+        case 4:
+        {
+          auto feature = _shape.asFeature();
+
+          // TODO Fix this workaround (We may need codegen::operand::Object instead of ITensor)
+          const util::feature::nchw::View<float> from{_input};
+          util::feature::nhwc::View<float> into{feature, reinterpret_cast<float *>(output_buffer),
+                                                output_size};
+
+          ::nnfw::util::feature::iterate(feature)
+              << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
+                   const auto value = from.at(batch, ch, row, col);
+                   into.at(batch, ch, row, col) = value;
+                 };
+          break;
+        }
+        default:
+          throw "NYI";
+          break;
+      }
       _input_cl->unmap(queue); // TODO Likewise above
 
       break;