From 36fec45cc127d009f5460e38af98e1d30d3e4215 Mon Sep 17 00:00:00 2001
From: =?utf8?q?=EA=B9=80=EC=88=98=EC=A7=84/=EB=8F=99=EC=9E=91=EC=A0=9C?=
 =?utf8?q?=EC=96=B4Lab=28SR=29/Engineer/=EC=82=BC=EC=84=B1=EC=A0=84?=
 =?utf8?q?=EC=9E=90?= <sjsujin.kim@samsung.com>
Date: Mon, 17 Dec 2018 15:27:08 +0900
Subject: [PATCH] [neurun] Remove backend depenency in Permute (#4011)

This commit removes backend dependency in `Permute` such as `Window`, `Iterator` and `ICLTensor`...

Signed-off-by: sjsujinkim <sjsujin.kim@samsung.com>
---
 runtimes/neurun/src/exec/Sink.h                | 47 ++++----------
 runtimes/neurun/src/exec/Source.h              | 46 ++++----------
 runtimes/neurun/src/kernel/cpu/PermuteLayer.cc | 88 ++++++++------------------
 3 files changed, 52 insertions(+), 129 deletions(-)
diff --git a/runtimes/neurun/src/exec/Sink.h b/runtimes/neurun/src/exec/Sink.h
index 06a4cda..5eea938 100644
--- a/runtimes/neurun/src/exec/Sink.h
+++ b/runtimes/neurun/src/exec/Sink.h
@@ -24,11 +24,6 @@
 #include "util/feature/nchw/View.h"
 #include <misc/feature/IndexIterator.h>
 
-// TODO Remove these dependencies to arm_compute lib
-#include "backend/acl_cl/operand/ICLTensor.h"
-#include <arm_compute/core/Window.h>
-#include <arm_compute/core/Helpers.h>
-
 namespace neurun
 {
 namespace exec
@@ -72,7 +67,6 @@ public:
     auto input_buffer = tensor.buffer();
     auto rank = _shape.rank();
 
-    auto input_cl = dynamic_cast<::neurun::backend::acl_cl::operand::ICLTensor *>(&tensor);
     switch (rank)
     {
       case 0:
@@ -83,43 +77,29 @@ public:
       }
       case 2:
       {
-        using ::arm_compute::Window;
-        using ::arm_compute::Iterator;
-
-        Window window;
-        window.use_tensor_dimensions(input_cl->info()->tensor_shape(), Window::DimY);
-
-        Iterator it(input_cl->handle(), window);
-
-        int output_width = _shape.asMatrix().W;
+        auto matrix_shape = _shape.asMatrix();
 
-        const auto &y = window[Window::DimY];
-        for (auto h = y.start(); h < y.end(); h += y.step(), it.increment(Window::DimY))
+        for (auto h = 0; h < matrix_shape.H; ++h)
         {
-          memcpy(_output_buffer + h * output_width, it.ptr(), output_width * sizeof(float));
+          neurun::util::feature::Coordinate4D coord{0, h, 0, 0};
+          memcpy(_output_buffer + h * matrix_shape.W, input_buffer + tensor.calcOffset(coord),
+                 matrix_shape.W * sizeof(float));
         }
         break;
       }
       case 3:
       {
-        using ::arm_compute::Window;
-        using ::arm_compute::Iterator;
-
-        const int32_t height_width = _shape.dim(1) * _shape.dim(2);
+        const int32_t depth = _shape.dim(0);
+        const int32_t height = _shape.dim(1);
         const int32_t width = _shape.dim(2);
 
-        Window window;
-        window.use_tensor_dimensions(input_cl->info()->tensor_shape(), Window::DimY);
-
-        Iterator it(input_cl->handle(), window);
-
-        const auto &z = window[Window::DimZ];
-        const auto &y = window[Window::DimY];
-        for (auto c = z.start(); c < z.end(); c += z.step(), it.increment(Window::DimZ))
+        for (auto c = 0; c < depth; ++c)
         {
-          for (auto h = y.start(); h < y.end(); h += y.step(), it.increment(Window::DimY))
+          for (auto h = 0; h < height; ++h)
           {
-            memcpy(_output_buffer + c * height_width + h * width, it.ptr(), width * sizeof(float));
+            neurun::util::feature::Coordinate4D coord{0, h, 0, c};
+            memcpy(_output_buffer + c * height * width + h * width,
+                   input_buffer + tensor.calcOffset(coord), width * sizeof(float));
           }
         }
         break;
@@ -128,8 +108,7 @@ public:
       {
         auto feature = _shape.asFeature();
 
-        // TODO Fix this workaround (We may need codegen::operand::Object instead of ITensor)
-        const util::feature::nchw::View<float> from{input_cl};
+        const util::feature::nchw::View<float> from{&tensor};
         util::feature::nhwc::View<float> into{feature, reinterpret_cast<float *>(_output_buffer),
                                               _output_size};
 
diff --git a/runtimes/neurun/src/exec/Source.h b/runtimes/neurun/src/exec/Source.h
index 91b31d9..f4170cc 100644
--- a/runtimes/neurun/src/exec/Source.h
+++ b/runtimes/neurun/src/exec/Source.h
@@ -22,13 +22,9 @@
 #include "cpp14/memory.h"
 #include "util/feature/nchw/View.h"
 #include "util/feature/nhwc/Reader.h"
+#include "util/feature/Coordinate4D.h"
 #include <misc/feature/IndexIterator.h>
 
-// TODO Remove these dependencies to arm_compute lib
-#include "backend/acl_cl/operand/ICLTensor.h"
-#include <arm_compute/core/Window.h>
-#include <arm_compute/core/Helpers.h>
-
 namespace neurun
 {
 namespace exec
@@ -73,7 +69,6 @@ public:
     auto output_buffer = tensor.buffer();
     auto rank = _shape.rank();
 
-    auto output_cl = dynamic_cast<::neurun::backend::acl_cl::operand::ICLTensor *>(&tensor);
     switch (rank)
     {
       case 0:
@@ -84,43 +79,29 @@ public:
       }
       case 2:
       {
-        using ::arm_compute::Window;
-        using ::arm_compute::Iterator;
-
         auto matrix_shape = _shape.asMatrix();
 
-        Window window;
-        window.use_tensor_dimensions(output_cl->info()->tensor_shape(), Window::DimY);
-
-        Iterator it(output_cl->handle(), window);
-
-        const auto &y = window[Window::DimY];
-        for (auto h = y.start(); h < y.end(); h += y.step(), it.increment(Window::DimY))
+        for (auto h = 0; h < matrix_shape.H; ++h)
         {
-          memcpy(it.ptr(), _input_buffer + h * matrix_shape.W, matrix_shape.W * sizeof(float));
+          neurun::util::feature::Coordinate4D coord{0, h, 0, 0};
+          memcpy(output_buffer + tensor.calcOffset(coord), _input_buffer + h * matrix_shape.W,
+                 matrix_shape.W * sizeof(float));
         }
         break;
       }
       case 3:
       {
-        using ::arm_compute::Window;
-        using ::arm_compute::Iterator;
-
-        const int32_t height_width = _shape.dim(1) * _shape.dim(2);
+        const int32_t depth = _shape.dim(0);
+        const int32_t height = _shape.dim(1);
         const int32_t width = _shape.dim(2);
 
-        Window window;
-        window.use_tensor_dimensions(output_cl->info()->tensor_shape(), Window::DimY);
-
-        Iterator it(output_cl->handle(), window);
-
-        const auto &z = window[Window::DimZ];
-        const auto &y = window[Window::DimY];
-        for (auto c = z.start(); c < z.end(); c += z.step(), it.increment(Window::DimZ))
+        for (auto c = 0; c < depth; ++c)
         {
-          for (auto h = y.start(); h < y.end(); h += y.step(), it.increment(Window::DimY))
+          for (auto h = 0; h < height; ++h)
           {
-            memcpy(it.ptr(), _input_buffer + c * height_width + h * width, width * sizeof(float));
+            neurun::util::feature::Coordinate4D coord{0, h, 0, c};
+            memcpy(output_buffer + tensor.calcOffset(coord),
+                   _input_buffer + c * height * width + h * width, width * sizeof(float));
           }
         }
         break;
@@ -131,9 +112,8 @@ public:
 
         const util::feature::nhwc::Reader<float> from{
             feature, reinterpret_cast<const float *>(_input_buffer), _input_size};
-        util::feature::nchw::View<float> into{output_cl};
+        util::feature::nchw::View<float> into{&tensor};
 
-        // TODO Fix this workaround (We may need codegen::operand::Object instead of ITensor)
         ::nnfw::misc::feature::iterate(feature)
             << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
                  const auto value = from.at(batch, ch, row, col);
diff --git a/runtimes/neurun/src/kernel/cpu/PermuteLayer.cc b/runtimes/neurun/src/kernel/cpu/PermuteLayer.cc
index a638c48..f525af2 100644
--- a/runtimes/neurun/src/kernel/cpu/PermuteLayer.cc
+++ b/runtimes/neurun/src/kernel/cpu/PermuteLayer.cc
@@ -19,13 +19,10 @@
 #include "util/feature/nhwc/Reader.h"
 #include "util/feature/nhwc/View.h"
 #include "util/feature/nchw/View.h"
+#include "util/feature/Coordinate4D.h"
 
 #include <misc/feature/IndexIterator.h>
 
-// TODO Remove these dependencies to arm_compute lib
-#include <arm_compute/runtime/CL/CLScheduler.h>
-#include "backend/acl_cl/operand/CLTensor.h"
-
 namespace neurun
 {
 namespace kernel
@@ -59,8 +56,6 @@ void PermuteLayer::run()
 
         auto output_buffer = tensor.buffer();
         auto output_size = tensor.total_size();
-
-        auto output_cl = dynamic_cast<::neurun::backend::acl_cl::operand::ICLTensor *>(&tensor);
         switch (rank)
         {
           case 0:
@@ -71,44 +66,29 @@ void PermuteLayer::run()
           }
           case 2:
           {
-            using ::arm_compute::Window;
-            using ::arm_compute::Iterator;
-
             auto matrix_shape = _shape.asMatrix();
 
-            Window window;
-            window.use_tensor_dimensions(output_cl->info()->tensor_shape(), Window::DimY);
-
-            Iterator it(output_cl->handle(), window);
-
-            const auto &y = window[Window::DimY];
-            for (auto h = y.start(); h < y.end(); h += y.step(), it.increment(Window::DimY))
+            for (auto h = 0; h < matrix_shape.H; ++h)
             {
-              memcpy(it.ptr(), input_buffer + h * matrix_shape.W, matrix_shape.W * sizeof(float));
+              neurun::util::feature::Coordinate4D coord{0, h, 0, 0};
+              memcpy(output_buffer + tensor.calcOffset(coord), input_buffer + h * matrix_shape.W,
+                     matrix_shape.W * sizeof(float));
             }
             break;
           }
           case 3:
           {
-            using ::arm_compute::Window;
-            using ::arm_compute::Iterator;
-
-            const int32_t height_width = _shape.dim(1) * _shape.dim(2);
+            const int32_t depth = _shape.dim(0);
+            const int32_t height = _shape.dim(1);
             const int32_t width = _shape.dim(2);
 
-            Window window;
-            window.use_tensor_dimensions(output_cl->info()->tensor_shape(), Window::DimY);
-
-            Iterator it(output_cl->handle(), window);
-
-            const auto &z = window[Window::DimZ];
-            const auto &y = window[Window::DimY];
-            for (auto c = z.start(); c < z.end(); c += z.step(), it.increment(Window::DimZ))
+            for (auto c = 0; c < depth; ++c)
             {
-              for (auto h = y.start(); h < y.end(); h += y.step(), it.increment(Window::DimY))
+              for (auto h = 0; h < height; ++h)
               {
-                memcpy(it.ptr(), input_buffer + c * height_width + h * width,
-                       width * sizeof(float));
+                neurun::util::feature::Coordinate4D coord{0, h, 0, c};
+                memcpy(output_buffer + tensor.calcOffset(coord),
+                       input_buffer + c * height * width + h * width, width * sizeof(float));
               }
             }
             break;
@@ -119,7 +99,7 @@ void PermuteLayer::run()
 
             const util::feature::nhwc::Reader<float> from{
                 feature, reinterpret_cast<const float *>(input_buffer), input_size};
-            util::feature::nchw::View<float> into{output_cl};
+            util::feature::nchw::View<float> into{&tensor};
 
             ::nnfw::misc::feature::iterate(feature)
                 << [&](uint32_t batch, uint32_t ch, uint32_t row, uint32_t col) {
@@ -147,7 +127,6 @@ void PermuteLayer::run()
         auto output_buffer = output_tensor->buffer();
         auto output_size = output_tensor->total_size();
 
-        auto input_cl = dynamic_cast<::neurun::backend::acl_cl::operand::ICLTensor *>(&tensor);
         switch (rank)
         {
           case 0:
@@ -158,44 +137,29 @@ void PermuteLayer::run()
           }
           case 2:
           {
-            using ::arm_compute::Window;
-            using ::arm_compute::Iterator;
-
-            Window window;
-            window.use_tensor_dimensions(input_cl->info()->tensor_shape(), Window::DimY);
-
-            Iterator it(input_cl->handle(), window);
-
-            int output_width = _shape.asMatrix().W;
+            auto matrix_shape = _shape.asMatrix();
 
-            const auto &y = window[Window::DimY];
-            for (auto h = y.start(); h < y.end(); h += y.step(), it.increment(Window::DimY))
+            for (auto h = 0; h < matrix_shape.H; ++h)
             {
-              memcpy(output_buffer + h * output_width, it.ptr(), output_width * sizeof(float));
+              neurun::util::feature::Coordinate4D coord{0, h, 0, 0};
+              memcpy(output_buffer + h * matrix_shape.W, input_buffer + tensor.calcOffset(coord),
+                     matrix_shape.W * sizeof(float));
             }
             break;
           }
           case 3:
           {
-            using ::arm_compute::Window;
-            using ::arm_compute::Iterator;
-
-            const int32_t height_width = _shape.dim(1) * _shape.dim(2);
+            const int32_t depth = _shape.dim(0);
+            const int32_t height = _shape.dim(1);
             const int32_t width = _shape.dim(2);
 
-            Window window;
-            window.use_tensor_dimensions(input_cl->info()->tensor_shape(), Window::DimY);
-
-            Iterator it(input_cl->handle(), window);
-
-            const auto &z = window[Window::DimZ];
-            const auto &y = window[Window::DimY];
-            for (auto c = z.start(); c < z.end(); c += z.step(), it.increment(Window::DimZ))
+            for (auto c = 0; c < depth; ++c)
             {
-              for (auto h = y.start(); h < y.end(); h += y.step(), it.increment(Window::DimY))
+              for (auto h = 0; h < height; ++h)
               {
-                memcpy(output_buffer + c * height_width + h * width, it.ptr(),
-                       width * sizeof(float));
+                neurun::util::feature::Coordinate4D coord{0, h, 0, c};
+                memcpy(output_buffer + c * height * width + h * width,
+                       input_buffer + tensor.calcOffset(coord), width * sizeof(float));
               }
             }
             break;
@@ -204,7 +168,7 @@ void PermuteLayer::run()
           {
             auto feature = _shape.asFeature();
 
-            const util::feature::nchw::View<float> from{input_cl};
+            const util::feature::nchw::View<float> from{&tensor};
             util::feature::nhwc::View<float> into{feature, reinterpret_cast<float *>(output_buffer),
                                                   output_size};
 
-- 
2.7.4