Fix padding bug of some kernels. (#3161)

author 장지섭/동작제어Lab(SR)/Engineer/삼성전자 <jiseob.jang@samsung.com>

Tue, 16 Oct 2018 06:14:46 +0000 (15:14 +0900)

committer 오형석/동작제어Lab(SR)/Staff Engineer/삼성전자 <hseok82.oh@samsung.com>

Tue, 16 Oct 2018 06:14:46 +0000 (15:14 +0900)
author 장지섭/동작제어Lab(SR)/Engineer/삼성전자 <jiseob.jang@samsung.com>
Tue, 16 Oct 2018 06:14:46 +0000 (15:14 +0900)
committer 오형석/동작제어Lab(SR)/Staff Engineer/삼성전자 <hseok82.oh@samsung.com>
Tue, 16 Oct 2018 06:14:46 +0000 (15:14 +0900)
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.cc

index 8be5334..71d2aa5 100644 (file)
--- a/runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.cc
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.cc
@@ -19,8 +19,7 @@
  #include <arm_compute/runtime/CL/CLScheduler.h>
  
  void SimpleDepthToSpace::configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output,
-                                   int32_t block_size,
-                                   const ::arm_compute::Coordinates &axises = {3, 1, 0, 2})
+                                   int32_t block_size, const ::arm_compute::Coordinates &axises)
  {
    const auto rank = axises.num_dimensions();
    assert(rank == 4);
@@ -36,26 +35,10 @@ void SimpleDepthToSpace::configure(::arm_compute::ITensor *input, ::arm_compute:
    _axises = axises;
  }
  
-inline int32_t Offset4D(const ::arm_compute::TensorShape &shape, int32_t b, int32_t h, int32_t w,
-                        int32_t d, const ::arm_compute::Coordinates &axises)
-{
-  // b, h, w, d >= 0
-  size_t indexes[4];
-  indexes[axises[0]] = b;
-  indexes[axises[1]] = h;
-  indexes[axises[2]] = w;
-  indexes[axises[3]] = d;
-
-  int32_t offset = indexes[3] * shape[2] * shape[1] * shape[0];
-  offset += indexes[2] * shape[1] * shape[0];
-  offset += indexes[1] * shape[0];
-  offset += indexes[0];
-  return offset;
-}
-
  template <typename T>
-inline void DepthToSpace(const T *input_data, const ::arm_compute::TensorShape &input_shape,
-                         int32_t block_size, T *output_data,
+inline void DepthToSpace(const ::arm_compute::ITensor *input,
+                         const ::arm_compute::TensorShape &input_shape, int32_t block_size,
+                         ::arm_compute::ITensor *output,
                           const ::arm_compute::TensorShape &output_shape,
                           const ::arm_compute::Coordinates &axises)
  {
@@ -83,10 +66,13 @@ inline void DepthToSpace(const T *input_data, const ::arm_compute::TensorShape &
            const int in_d =
                out_d + ((out_h % block_size) * block_size + out_w % block_size) * output_depth;
  
-          const int input_index = Offset4D(input_shape, in_b, in_h, in_w, in_d, axises);
-          const int output_index = Offset4D(output_shape, out_b, out_h, out_w, out_d, axises);
+          auto input_id =
+              asARMComputeCoordinates(::arm_compute::Coordinates{in_b, in_h, in_w, in_d}, axises);
+          auto output_id = asARMComputeCoordinates(
+              ::arm_compute::Coordinates{out_b, out_h, out_w, out_d}, axises);
  
-          output_data[output_index] = input_data[input_index];
+          *reinterpret_cast<T *>(output->ptr_to_element(output_id)) =
+              *reinterpret_cast<T *>(input->ptr_to_element(input_id));
          }
        }
      }
@@ -103,20 +89,28 @@ void SimpleDepthToSpace::run()
      CAST_CL(_output)->map(q);
    }
  
-  auto input_buf = _input->buffer();
-  auto output_buf = _output->buffer();
    switch (_input->info()->data_type())
    {
      case ::arm_compute::DataType::U8:
      case ::arm_compute::DataType::QASYMM8:
-      DepthToSpace(reinterpret_cast<const uint8_t *>(input_buf), _input->info()->tensor_shape(),
-                   _block_size, reinterpret_cast<uint8_t *>(output_buf),
-                   _output->info()->tensor_shape(), _axises);
+      DepthToSpace<uint8_t>(_input, _input->info()->tensor_shape(), _block_size, _output,
+                            _output->info()->tensor_shape(), _axises);
+      break;
+    case ::arm_compute::DataType::S8:
+      DepthToSpace<int8_t>(_input, _input->info()->tensor_shape(), _block_size, _output,
+                           _output->info()->tensor_shape(), _axises);
+      break;
+    case ::arm_compute::DataType::U32:
+      DepthToSpace<uint32_t>(_input, _input->info()->tensor_shape(), _block_size, _output,
+                             _output->info()->tensor_shape(), _axises);
+      break;
+    case ::arm_compute::DataType::S32:
+      DepthToSpace<int32_t>(_input, _input->info()->tensor_shape(), _block_size, _output,
+                            _output->info()->tensor_shape(), _axises);
        break;
      case ::arm_compute::DataType::F32:
-      DepthToSpace(reinterpret_cast<const float *>(input_buf), _input->info()->tensor_shape(),
-                   _block_size, reinterpret_cast<float *>(output_buf),
-                   _output->info()->tensor_shape(), _axises);
+      DepthToSpace<float>(_input, _input->info()->tensor_shape(), _block_size, _output,
+                          _output->info()->tensor_shape(), _axises);
        break;
      default:
        ARM_COMPUTE_ERROR("DataType not supported");
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.h

index e4107e2..dac0beb 100644 (file)
--- a/runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.h
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.h
@@ -18,6 +18,7 @@
  #define __SIMPLE_DEPTH_TO_SPACE_H__
  
  #include "internal/arm_compute.h"
+#include "internal/arm_compute/Cast.h"
  #include <arm_compute/core/ITensor.h>
  #include <arm_compute/runtime/IFunction.h>
  
@@ -37,7 +38,7 @@ public:
     * @param[in]  block_size  Block size.
     */
    void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output, int32_t block_size,
-                 const ::arm_compute::Coordinates &axises);
+                 const ::arm_compute::Coordinates &axises = getARMComputeAxises(4));
  
    void run() override;
  
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToBatchND.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToBatchND.cc

index 8282683..2f6a8c3 100644 (file)
--- a/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToBatchND.cc
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToBatchND.cc
@@ -15,12 +15,13 @@
   */
  
  #include "internal/layers/SimpleSpaceToBatchND.h"
+#include "internal/arm_compute/Cast.h"
  
  #include <arm_compute/runtime/CL/CLScheduler.h>
  
  void SimpleSpaceToBatchND::configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output,
                                       const int32_t *block_size, const int32_t *padding_size,
-                                     const ::arm_compute::Coordinates &axises = {3, 1, 0, 2})
+                                     const ::arm_compute::Coordinates &axises)
  {
    const auto rank = axises.num_dimensions();
    assert(rank == 4);
@@ -38,28 +39,12 @@ void SimpleSpaceToBatchND::configure(::arm_compute::ITensor *input, ::arm_comput
    _axises = axises;
  }
  
-inline int32_t Offset4D(const ::arm_compute::TensorShape &shape, int32_t b, int32_t h, int32_t w,
-                        int32_t d, const ::arm_compute::Coordinates &axises)
-{
-  // b, h, w, d >= 0
-  size_t indexes[4];
-  indexes[axises[0]] = b;
-  indexes[axises[1]] = h;
-  indexes[axises[2]] = w;
-  indexes[axises[3]] = d;
-
-  int32_t offset = indexes[3] * shape[2] * shape[1] * shape[0];
-  offset += indexes[2] * shape[1] * shape[0];
-  offset += indexes[1] * shape[0];
-  offset += indexes[0];
-  return offset;
-}
-
  template <typename T>
-inline void SpaceToBatchND(const T *input_data, const ::arm_compute::TensorShape &input_shape,
-                           const int32_t *block_size_data, const int32_t *padding_size_data,
-                           T *output_data, const ::arm_compute::TensorShape &output_shape,
-                           const ::arm_compute::Coordinates &axises)
+inline void
+SpaceToBatchND(const ::arm_compute::ITensor *input, const ::arm_compute::TensorShape &input_shape,
+               const int32_t *block_size_data, const int32_t *padding_size_data,
+               const ::arm_compute::ITensor *output, const ::arm_compute::TensorShape &output_shape,
+               const ::arm_compute::Coordinates &axises)
  {
    const int input_batch = input_shape[axises[0]];
    const int input_height = input_shape[axises[1]];
@@ -92,18 +77,22 @@ inline void SpaceToBatchND(const T *input_data, const ::arm_compute::TensorShape
                ((in_h % block_size_data[0]) * block_size_data[1] + in_w % block_size_data[1]) *
                    input_batch;
  
-          const int output_index = Offset4D(output_shape, out_b, out_h, out_w, out_d, axises);
+          auto output_id = asARMComputeCoordinates(
+              ::arm_compute::Coordinates{out_b, out_h, out_w, out_d}, axises);
  
            if (in_h < padding_size_data[0] || in_h >= (input_height + padding_size_data[0]) ||
                in_w < padding_size_data[2] || in_w >= (input_width + padding_size_data[2]))
            {
-            output_data[output_index] = 0;
+            *reinterpret_cast<T *>(output->ptr_to_element(output_id)) = 0;
            }
            else
            {
-            const int input_index = Offset4D(input_shape, in_b, in_h - padding_size_data[0],
-                                             in_w - padding_size_data[2], in_d, axises);
-            output_data[output_index] = input_data[input_index];
+            auto input_id = asARMComputeCoordinates(
+                ::arm_compute::Coordinates{in_b, in_h - padding_size_data[0],
+                                           in_w - padding_size_data[2], in_d},
+                axises);
+            *reinterpret_cast<T *>(output->ptr_to_element(output_id)) =
+                *reinterpret_cast<T *>(input->ptr_to_element(input_id));
            }
          }
        }
@@ -126,14 +115,12 @@ void SimpleSpaceToBatchND::run()
    {
      case ::arm_compute::DataType::U8:
      case ::arm_compute::DataType::QASYMM8:
-      SpaceToBatchND(reinterpret_cast<const uint8_t *>(input_buf), _input->info()->tensor_shape(),
-                     _block_size, _padding_size, reinterpret_cast<uint8_t *>(output_buf),
-                     _output->info()->tensor_shape(), _axises);
+      SpaceToBatchND<uint8_t>(_input, _input->info()->tensor_shape(), _block_size, _padding_size,
+                              _output, _output->info()->tensor_shape(), _axises);
        break;
      case ::arm_compute::DataType::F32:
-      SpaceToBatchND(reinterpret_cast<const float *>(input_buf), _input->info()->tensor_shape(),
-                     _block_size, _padding_size, reinterpret_cast<float *>(output_buf),
-                     _output->info()->tensor_shape(), _axises);
+      SpaceToBatchND<float>(_input, _input->info()->tensor_shape(), _block_size, _padding_size,
+                            _output, _output->info()->tensor_shape(), _axises);
        break;
      default:
        ARM_COMPUTE_ERROR("DataType not supported");
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToDepth.cc b/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToDepth.cc

index a035cd0..9da5d66 100644 (file)
--- a/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToDepth.cc
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToDepth.cc
@@ -19,8 +19,7 @@
  #include <arm_compute/runtime/CL/CLScheduler.h>
  
  void SimpleSpaceToDepth::configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output,
-                                   int32_t block_size,
-                                   const ::arm_compute::Coordinates &axises = {3, 1, 0, 2})
+                                   int32_t block_size, const ::arm_compute::Coordinates &axises)
  {
    const auto rank = axises.num_dimensions();
    assert(rank == 4);
@@ -36,26 +35,10 @@ void SimpleSpaceToDepth::configure(::arm_compute::ITensor *input, ::arm_compute:
    _axises = axises;
  }
  
-inline int32_t Offset4D(const ::arm_compute::TensorShape &shape, int32_t b, int32_t h, int32_t w,
-                        int32_t d, const ::arm_compute::Coordinates &axises)
-{
-  // b, h, w, d >= 0
-  size_t indexes[4];
-  indexes[axises[0]] = b;
-  indexes[axises[1]] = h;
-  indexes[axises[2]] = w;
-  indexes[axises[3]] = d;
-
-  int32_t offset = indexes[3] * shape[2] * shape[1] * shape[0];
-  offset += indexes[2] * shape[1] * shape[0];
-  offset += indexes[1] * shape[0];
-  offset += indexes[0];
-  return offset;
-}
-
  template <typename T>
-inline void SpaceToDepth(const T *input_data, const ::arm_compute::TensorShape &input_shape,
-                         int32_t block_size, T *output_data,
+inline void SpaceToDepth(const ::arm_compute::ITensor *input,
+                         const ::arm_compute::TensorShape &input_shape, int32_t block_size,
+                         ::arm_compute::ITensor *output,
                           const ::arm_compute::TensorShape &output_shape,
                           const ::arm_compute::Coordinates &axises)
  {
@@ -83,10 +66,13 @@ inline void SpaceToDepth(const T *input_data, const ::arm_compute::TensorShape &
            const int out_d =
                in_d + ((in_h % block_size) * block_size + in_w % block_size) * input_depth;
  
-          const int input_index = Offset4D(input_shape, in_b, in_h, in_w, in_d, axises);
-          const int output_index = Offset4D(output_shape, out_b, out_h, out_w, out_d, axises);
+          auto input_id =
+              asARMComputeCoordinates(::arm_compute::Coordinates{in_b, in_h, in_w, in_d}, axises);
+          auto output_id = asARMComputeCoordinates(
+              ::arm_compute::Coordinates{out_b, out_h, out_w, out_d}, axises);
  
-          output_data[output_index] = input_data[input_index];
+          *reinterpret_cast<T *>(output->ptr_to_element(output_id)) =
+              *reinterpret_cast<T *>(input->ptr_to_element(input_id));
          }
        }
      }
@@ -109,14 +95,12 @@ void SimpleSpaceToDepth::run()
    {
      case ::arm_compute::DataType::U8:
      case ::arm_compute::DataType::QASYMM8:
-      SpaceToDepth(reinterpret_cast<const uint8_t *>(input_buf), _input->info()->tensor_shape(),
-                   _block_size, reinterpret_cast<uint8_t *>(output_buf),
-                   _output->info()->tensor_shape(), _axises);
+      SpaceToDepth<uint8_t>(_input, _input->info()->tensor_shape(), _block_size, _output,
+                            _output->info()->tensor_shape(), _axises);
        break;
      case ::arm_compute::DataType::F32:
-      SpaceToDepth(reinterpret_cast<const float *>(input_buf), _input->info()->tensor_shape(),
-                   _block_size, reinterpret_cast<float *>(output_buf),
-                   _output->info()->tensor_shape(), _axises);
+      SpaceToDepth<float>(_input, _input->info()->tensor_shape(), _block_size, _output,
+                          _output->info()->tensor_shape(), _axises);
        break;
      default:
        ARM_COMPUTE_ERROR("DataType not supported");
diff --git a/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToDepth.h b/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToDepth.h

index 54fa227..98caf2e 100644 (file)
--- a/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToDepth.h
+++ b/runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToDepth.h
@@ -18,6 +18,7 @@
  #define __SIMPLE_SPACE_TO_DEPTH_H__
  
  #include "internal/arm_compute.h"
+#include "internal/arm_compute/Cast.h"
  #include <arm_compute/core/ITensor.h>
  #include <arm_compute/runtime/IFunction.h>
  
@@ -36,7 +37,7 @@ public:
     * @param[in]  block_size  Block size.
     */
    void configure(::arm_compute::ITensor *input, ::arm_compute::ITensor *output, int32_t block_size,
-                 const ::arm_compute::Coordinates &axises);
+                 const ::arm_compute::Coordinates &axises = getARMComputeAxises(4));
  
    void run() override;
author	장지섭/동작제어Lab(SR)/Engineer/삼성전자 <jiseob.jang@samsung.com>
	Tue, 16 Oct 2018 06:14:46 +0000 (15:14 +0900)
committer	오형석/동작제어Lab(SR)/Staff Engineer/삼성전자 <hseok82.oh@samsung.com>
	Tue, 16 Oct 2018 06:14:46 +0000 (15:14 +0900)
runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.cc		patch \| blob \| history
runtimes/pure_arm_compute/src/internal/layers/SimpleDepthToSpace.h		patch \| blob \| history
runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToBatchND.cc		patch \| blob \| history
runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToDepth.cc		patch \| blob \| history
runtimes/pure_arm_compute/src/internal/layers/SimpleSpaceToDepth.h		patch \| blob \| history