[neurun] Support to permute for all other dimensions (#3187)

author 김수진/동작제어Lab(SR)/Engineer/삼성전자 <sjsujin.kim@samsung.com>

Tue, 16 Oct 2018 09:58:09 +0000 (18:58 +0900)

committer 오형석/동작제어Lab(SR)/Staff Engineer/삼성전자 <hseok82.oh@samsung.com>

Tue, 16 Oct 2018 09:58:09 +0000 (18:58 +0900)
author 김수진/동작제어Lab(SR)/Engineer/삼성전자 <sjsujin.kim@samsung.com>
Tue, 16 Oct 2018 09:58:09 +0000 (18:58 +0900)
committer 오형석/동작제어Lab(SR)/Staff Engineer/삼성전자 <hseok82.oh@samsung.com>
Tue, 16 Oct 2018 09:58:09 +0000 (18:58 +0900)
diff --git a/runtimes/neurun/src/kernel/cpu/PermuteLayer.cc b/runtimes/neurun/src/kernel/cpu/PermuteLayer.cc

index 3ffaa2a..e63b445 100644 (file)
--- a/runtimes/neurun/src/kernel/cpu/PermuteLayer.cc
+++ b/runtimes/neurun/src/kernel/cpu/PermuteLayer.cc
@@ -50,49 +50,170 @@ void PermuteLayer::run()
    auto output_buffer = _output->buffer();
    auto output_size = _output->info()->total_size();
  
-  assert(_shape.rank() == 4);
-  auto feature = _shape.asFeature();
+  auto rank = _shape.rank();
  
    switch (_type)
    {
      case Type::NHWC_TO_NCHW:
      {
-      const util::feature::nhwc::Reader<float> from{
-          feature, reinterpret_cast<const float *>(input_buffer), input_size};
-      util::feature::nchw::View<float> into{_output};
-
-      // TODO Fix this workaround (We may need codegen::operand::Object instead of ITensor)
        auto &queue = ::arm_compute::CLScheduler::get().queue();
        auto _output_cl = dynamic_cast<::arm_compute::ICLTensor *>(_output);
        _output_cl->map(queue);
-
-      ::nnfw::util::feature::iterate(feature)
-          << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
-               const auto value = from.at(batch, ch, row, col);
-               into.at(batch, ch, row, col) = value;
-             };
-
+      switch (rank)
+      {
+        case 0:
+        case 1:
+        {
+          memcpy(output_buffer, input_buffer, input_size);
+          break;
+        }
+        case 2:
+        {
+          using ::arm_compute::Window;
+          using ::arm_compute::Iterator;
+
+          auto matrix_shape = _shape.asMatrix();
+
+          Window window;
+          window.use_tensor_dimensions(_output->info()->tensor_shape(), Window::DimY);
+
+          Iterator it(_output, window);
+
+          const auto &y = window[Window::DimY];
+          for (auto h = y.start(); h < y.end(); h += y.step(), it.increment(Window::DimY))
+          {
+            memcpy(it.ptr(), input_buffer + h * matrix_shape.W,
+                   matrix_shape.W * sizeof(input_buffer));
+          }
+          break;
+        }
+        case 3:
+        {
+          using ::arm_compute::Window;
+          using ::arm_compute::Iterator;
+
+          const int32_t height_width = _shape.dim(1) * _shape.dim(2);
+          const int32_t width = _shape.dim(2);
+
+          Window window;
+          window.use_tensor_dimensions(_output->info()->tensor_shape(), Window::DimY);
+
+          Iterator it(_output, window);
+
+          const auto &z = window[Window::DimZ];
+          const auto &y = window[Window::DimY];
+          for (auto c = z.start(); c < z.end(); c += z.step(), it.increment(Window::DimZ))
+          {
+            for (auto h = y.start(); h < y.end(); h += y.step(), it.increment(Window::DimY))
+            {
+              memcpy(it.ptr(), input_buffer + c * height_width + h * width,
+                     width * sizeof(input_buffer));
+            }
+          }
+          break;
+        }
+        case 4:
+        {
+          auto feature = _shape.asFeature();
+
+          const util::feature::nhwc::Reader<float> from{
+              feature, reinterpret_cast<const float *>(input_buffer), input_size};
+          util::feature::nchw::View<float> into{_output};
+
+          // TODO Fix this workaround (We may need codegen::operand::Object instead of ITensor)
+          ::nnfw::util::feature::iterate(feature)
+              << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
+                   const auto value = from.at(batch, ch, row, col);
+                   into.at(batch, ch, row, col) = value;
+                 };
+          break;
+        }
+        default:
+          throw "NYI";
+          break;
+      }
        _output_cl->unmap(queue); // TODO Likewise above
  
        break;
      }
      case Type::NCHW_TO_NHWC:
      {
-      // TODO Fix this workaround (We may need codegen::operand::Object instead of ITensor)
        auto &queue = ::arm_compute::CLScheduler::get().queue();
        auto _input_cl = dynamic_cast<::arm_compute::ICLTensor *>(_input);
        _input_cl->map(queue);
-
-      const util::feature::nchw::View<float> from{_input};
-      util::feature::nhwc::View<float> into{feature, reinterpret_cast<float *>(output_buffer),
-                                            output_size};
-
-      ::nnfw::util::feature::iterate(feature)
-          << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
-               const auto value = from.at(batch, ch, row, col);
-               into.at(batch, ch, row, col) = value;
-             };
-
+      switch (rank)
+      {
+        case 0:
+        case 1:
+        {
+          memcpy(output_buffer, input_buffer, output_size);
+          break;
+        }
+        case 2:
+        {
+          using ::arm_compute::Window;
+          using ::arm_compute::Iterator;
+
+          Window window;
+          window.use_tensor_dimensions(_input->info()->tensor_shape(), Window::DimY);
+
+          Iterator it(_input, window);
+
+          int output_width = _shape.asMatrix().W;
+
+          const auto &y = window[Window::DimY];
+          for (auto h = y.start(); h < y.end(); h += y.step(), it.increment(Window::DimY))
+          {
+            memcpy(output_buffer + h * output_width, it.ptr(),
+                   output_width * sizeof(output_buffer));
+          }
+          break;
+        }
+        case 3:
+        {
+          using ::arm_compute::Window;
+          using ::arm_compute::Iterator;
+
+          const int32_t height_width = _shape.dim(1) * _shape.dim(2);
+          const int32_t width = _shape.dim(2);
+
+          Window window;
+          window.use_tensor_dimensions(_input->info()->tensor_shape(), Window::DimY);
+
+          Iterator it(_input, window);
+
+          const auto &z = window[Window::DimZ];
+          const auto &y = window[Window::DimY];
+          for (auto c = z.start(); c < z.end(); c += z.step(), it.increment(Window::DimZ))
+          {
+            for (auto h = y.start(); h < y.end(); h += y.step(), it.increment(Window::DimY))
+            {
+              memcpy(output_buffer + c * height_width + h * width, it.ptr(),
+                     width * sizeof(output_buffer));
+            }
+          }
+          break;
+        }
+        case 4:
+        {
+          auto feature = _shape.asFeature();
+
+          // TODO Fix this workaround (We may need codegen::operand::Object instead of ITensor)
+          const util::feature::nchw::View<float> from{_input};
+          util::feature::nhwc::View<float> into{feature, reinterpret_cast<float *>(output_buffer),
+                                                output_size};
+
+          ::nnfw::util::feature::iterate(feature)
+              << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
+                   const auto value = from.at(batch, ch, row, col);
+                   into.at(batch, ch, row, col) = value;
+                 };
+          break;
+        }
+        default:
+          throw "NYI";
+          break;
+      }
        _input_cl->unmap(queue); // TODO Likewise above
  
        break;
author	김수진/동작제어Lab(SR)/Engineer/삼성전자 <sjsujin.kim@samsung.com>
	Tue, 16 Oct 2018 09:58:09 +0000 (18:58 +0900)
committer	오형석/동작제어Lab(SR)/Staff Engineer/삼성전자 <hseok82.oh@samsung.com>
	Tue, 16 Oct 2018 09:58:09 +0000 (18:58 +0900)