COMPMID-3379: Add support for QASYMM8_SIGNED in NEScale

author Sheri Zhang <sheri.zhang@arm.com>

Thu, 30 Apr 2020 21:53:39 +0000 (22:53 +0100)

committer Sheri Zhang <sheri.zhang@arm.com>

Fri, 1 May 2020 14:22:55 +0000 (14:22 +0000)
author Sheri Zhang <sheri.zhang@arm.com>
Thu, 30 Apr 2020 21:53:39 +0000 (22:53 +0100)
committer Sheri Zhang <sheri.zhang@arm.com>
Fri, 1 May 2020 14:22:55 +0000 (14:22 +0000)
diff --git a/arm_compute/core/CPP/kernels/CPPUpsampleKernel.h b/arm_compute/core/CPP/kernels/CPPUpsampleKernel.h

index 375ff5503762a4ada2742c07cbdf12b3536a455d..9fbc9b697c91c51d69bb92d71e93e56f71bea8b7 100644 (file)
--- a/arm_compute/core/CPP/kernels/CPPUpsampleKernel.h
+++ b/arm_compute/core/CPP/kernels/CPPUpsampleKernel.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -55,7 +55,7 @@ public:
  
      /** Set the input and output of the kernel.
       *
-     * @param[in]  input  The input tensor to upsample. Data types supported: F32/F16/QASYMM8
+     * @param[in]  input  The input tensor to upsample. Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED
       * @param[out] output The output tensor. Data types supported: Same as @p input
       * @param[in]  info   Padding info.
       */
diff --git a/arm_compute/core/NEON/kernels/NEScaleKernel.h b/arm_compute/core/NEON/kernels/NEScaleKernel.h

index e6790371a6ebaa98950d26e72a368d5eec0c5d26..0d0d457d51863ae9a628e4dcb1f5726ae1d91d9e 100644 (file)
--- a/arm_compute/core/NEON/kernels/NEScaleKernel.h
+++ b/arm_compute/core/NEON/kernels/NEScaleKernel.h
@@ -57,7 +57,7 @@ public:
       * @note dx, dy and offsets have the same dimensions (width and height) of the output tensor
       * @note Using @p policy Area only supports data layout NCHW and input data type U8.
       *
-     * @param[in]  input                 Source tensor. Data types supported: U8/S16/F16/F32.
+     * @param[in]  input                 Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/U8/S16/F16/F32.
       * @param[in]  dx                    Pixel's distance between the X real coordinate and the smallest X following integer. Data type supported: F32
       * @param[in]  dy                    Pixel's distance between the Y real coordinate and the smallest Y following integer. Data type supported: F32
       * @param[in]  offsets               Offset to access the pixel with NEAREST interpolation or the top-left pixel with BILINEAR interpolation in the input tensor. Data type supported: S32.
@@ -77,7 +77,7 @@ public:
       * @note dx, dy and offsets have the same dimensions (width and height) of the output tensor
       * @note Using @p policy Area only supports data layout NCHW and input data type U8.
       *
-     * @param[in] input                 Source tensor. Data types supported: U8/S16/F16/F32.
+     * @param[in] input                 Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/U8/S16/F16/F32.
       * @param[in] dx                    Pixel's distance between the X real coordinate and the smallest X following integer. Data type supported: F32
       * @param[in] dy                    Pixel's distance between the Y real coordinate and the smallest Y following integer. Data type supported: F32
       * @param[in] offsets               Offset to access the pixel with NEAREST interpolation or the top-left pixel with BILINEAR interpolation in the input tensor. Data type supported: S32.
diff --git a/arm_compute/runtime/CPP/functions/CPPUpsample.h b/arm_compute/runtime/CPP/functions/CPPUpsample.h

index 4f74e51d224c2522a3969e3939d54cd5d455344e..3e1852bc300979bef18733b1e38a90dc04829a5a 100644 (file)
--- a/arm_compute/runtime/CPP/functions/CPPUpsample.h
+++ b/arm_compute/runtime/CPP/functions/CPPUpsample.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
   *
   * SPDX-License-Identifier: MIT
   *
@@ -38,7 +38,7 @@ class CPPUpsample : public ICPPSimpleFunction
  public:
      /** Configure the upsample CPP kernel
       *
-     * @param[in]  input  The input tensor to upsample. Data types supported: F32/F16/QASYMM8
+     * @param[in]  input  The input tensor to upsample. Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED
       * @param[out] output The output tensor. Data types supported: Same as @p input
       * @param[in]  info   Padding information
       */
diff --git a/docs/00_introduction.dox b/docs/00_introduction.dox

index 8155952b1c3ee3ac16ac85cd64530f2a446da7e6..1c8b136f16bd331a012f947cb18999a771ab3817 100644 (file)
--- a/docs/00_introduction.dox
+++ b/docs/00_introduction.dox
@@ -264,6 +264,8 @@ v20.05 Public major release
       - @ref CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel
       - @ref CLReductionOperation
       - @ref CLReduceMean
+     - @ref NEScale
+     - @ref NEScaleKernel
       - @ref NEUpsampleLayer
       - @ref NECast
       - @ref NEReductionOperation
@@ -277,6 +279,8 @@ v20.05 Public major release
       - @ref CPPPermute
       - @ref CPPTopKVKernel
       - @ref CPPTopKV
+     - @ref CPPUpsample
+     - @ref CPPUpsampleKernel
   - New OpenCL kernels / functions:
       - @ref CLQLSTMLayer
       - @ref CLQLSTMLayerNormalizationKernel
diff --git a/src/core/NEON/kernels/NEScaleKernel.cpp b/src/core/NEON/kernels/NEScaleKernel.cpp

index 66408ab94c0d53149485740eca07036ef85d73cf..47a02d77c5cffa8766cc707ed97a07077170ea91 100644 (file)
--- a/src/core/NEON/kernels/NEScaleKernel.cpp
+++ b/src/core/NEON/kernels/NEScaleKernel.cpp
@@ -49,7 +49,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *dx, const
                            BorderMode border_mode, PixelValue constant_border_value, SamplingPolicy sampling_policy, bool use_padding, bool align_corners)
  {
      ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32, DataType::QASYMM8);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32, DataType::QASYMM8, DataType::QASYMM8_SIGNED);
      ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output);
      ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
      ARM_COMPUTE_RETURN_ERROR_ON(output == input);
@@ -251,9 +251,8 @@ inline void scale_bilinear_nhwc_core(const ITensor *input, const ITensor *offset
  
      int border_size = (border_mode == BorderMode::UNDEFINED) ? 0 : 1;
  
-    const bool                    is_quantized = (input->info()->data_type() == DataType::QASYMM8);
-    const UniformQuantizationInfo iq_info      = input->info()->quantization_info().uniform();
-    const UniformQuantizationInfo oq_info      = output->info()->quantization_info().uniform();
+    const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform();
+    const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform();
  
      execute_window_loop(window, [&](const Coordinates & id)
      {
@@ -309,7 +308,7 @@ inline void scale_bilinear_nhwc_core(const ITensor *input, const ITensor *offset
  
              T res = 0;
              //dequantize quantized input
-            if(is_quantized)
+            if(input->info()->data_type() == DataType::QASYMM8)
              {
                  float inp00 = dequantize_qasymm8(a00, iq_info);
                  float inp01 = dequantize_qasymm8(a01, iq_info);
@@ -317,6 +316,14 @@ inline void scale_bilinear_nhwc_core(const ITensor *input, const ITensor *offset
                  float inp11 = dequantize_qasymm8(a11, iq_info);
                  res         = static_cast<T>(quantize_qasymm8((inp00 * w1 + inp01 * w2 + inp10 * w3 + inp11 * w4), oq_info));
              }
+            else if(input->info()->data_type() == DataType::QASYMM8_SIGNED)
+            {
+                float inp00 = dequantize_qasymm8_signed(a00, iq_info);
+                float inp01 = dequantize_qasymm8_signed(a01, iq_info);
+                float inp10 = dequantize_qasymm8_signed(a10, iq_info);
+                float inp11 = dequantize_qasymm8_signed(a11, iq_info);
+                res         = static_cast<T>(quantize_qasymm8_signed((inp00 * w1 + inp01 * w2 + inp10 * w3 + inp11 * w4), oq_info));
+            }
              else
              {
                  res = static_cast<T>(a00 * w1 + a01 * w2 + a10 * w3 + a11 * w4);
@@ -469,6 +476,42 @@ void NEScaleKernel::scale_nearest_nchw(const Window &window)
  
      switch(_input->info()->data_type())
      {
+        case DataType::QASYMM8_SIGNED:
+        {
+            int8x16_t tmp = vdupq_n_s8(0);
+
+            execute_window_loop(window, [&](const Coordinates & id)
+            {
+                const auto           offsets_ptr = reinterpret_cast<const int32_t *>(offsets.ptr());
+                const uint8_t *const in_ptr      = in.ptr();
+
+                const int in_yi         = std::floor((id.y() + _sampling_offset) * hr);
+                const int in_yi_clamped = std::min(static_cast<int>(_input->info()->dimension(1)), std::max(in_yi, -1));
+                ARM_COMPUTE_ERROR_ON(in_yi_clamped < -1 || in_yi_clamped > static_cast<int>(_input->info()->dimension(1)));
+                const int offset_row = in_yi_clamped * input_stride;
+
+                tmp = vsetq_lane_s8(in_ptr[offsets_ptr[0] + offset_row], tmp, 0);
+                tmp = vsetq_lane_s8(in_ptr[offsets_ptr[1] + offset_row], tmp, 1);
+                tmp = vsetq_lane_s8(in_ptr[offsets_ptr[2] + offset_row], tmp, 2);
+                tmp = vsetq_lane_s8(in_ptr[offsets_ptr[3] + offset_row], tmp, 3);
+                tmp = vsetq_lane_s8(in_ptr[offsets_ptr[4] + offset_row], tmp, 4);
+                tmp = vsetq_lane_s8(in_ptr[offsets_ptr[5] + offset_row], tmp, 5);
+                tmp = vsetq_lane_s8(in_ptr[offsets_ptr[6] + offset_row], tmp, 6);
+                tmp = vsetq_lane_s8(in_ptr[offsets_ptr[7] + offset_row], tmp, 7);
+                tmp = vsetq_lane_s8(in_ptr[offsets_ptr[8] + offset_row], tmp, 8);
+                tmp = vsetq_lane_s8(in_ptr[offsets_ptr[9] + offset_row], tmp, 9);
+                tmp = vsetq_lane_s8(in_ptr[offsets_ptr[10] + offset_row], tmp, 10);
+                tmp = vsetq_lane_s8(in_ptr[offsets_ptr[11] + offset_row], tmp, 11);
+                tmp = vsetq_lane_s8(in_ptr[offsets_ptr[12] + offset_row], tmp, 12);
+                tmp = vsetq_lane_s8(in_ptr[offsets_ptr[13] + offset_row], tmp, 13);
+                tmp = vsetq_lane_s8(in_ptr[offsets_ptr[14] + offset_row], tmp, 14);
+                tmp = vsetq_lane_s8(in_ptr[offsets_ptr[15] + offset_row], tmp, 15);
+
+                vst1q_s8(reinterpret_cast<int8_t *>(out.ptr()), tmp);
+            },
+            in, offsets, out);
+            break;
+        }
          case DataType::QASYMM8:
          case DataType::U8:
          {
@@ -639,7 +682,7 @@ void NEScaleKernel::scale_nearest_nchw(const Window &window)
  
  void NEScaleKernel::scale_bilinear_nchw(const Window &window)
  {
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(_input, 1, DataType::U8, DataType::QASYMM8, DataType::S16, DataType::F16, DataType::F32);
+    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(_input, 1, DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S16, DataType::F16, DataType::F32);
  
      // Compute the ratio between source height and destination height
      const auto hr = arm_compute::calculate_resize_ratio(_input->info()->dimension(1), _output->info()->dimension(1), _align_corners);
@@ -669,13 +712,89 @@ void NEScaleKernel::scale_bilinear_nchw(const Window &window)
      const size_t in_stide_in_bytes = _input->info()->strides_in_bytes()[1];
      const size_t in_stride         = in_stide_in_bytes / _input->info()->element_size();
  
-    const bool                    is_quantized = (_input->info()->data_type() == DataType::QASYMM8);
-    const UniformQuantizationInfo iq_info      = _input->info()->quantization_info().uniform();
-    const UniformQuantizationInfo oq_info      = _output->info()->quantization_info().uniform();
+    const UniformQuantizationInfo iq_info = _input->info()->quantization_info().uniform();
+    const UniformQuantizationInfo oq_info = _output->info()->quantization_info().uniform();
  
      switch(_input->info()->data_type())
      {
+        case DataType::QASYMM8_SIGNED:
+        {
+            execute_window_loop(window, [&](const Coordinates & id)
+            {
+                const auto offsets_ptr = reinterpret_cast<const int32_t *>(offsets.ptr());
+                const auto dx_ptr      = reinterpret_cast<const float *>(dx.ptr());
+                const auto dy_ptr      = reinterpret_cast<const float *>(dy.ptr());
+                const auto in_ptr      = reinterpret_cast<const uint8_t *>(in.ptr());
+
+                const int in_yi      = std::floor((id.y() + _sampling_offset) * hr - _sampling_offset);
+                const int offset_row = in_yi * in_stide_in_bytes;
+
+                int8x8_t tmp0 = vdup_n_s8(0);
+
+                tmp0 = vset_lane_s8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[0] + offset_row], in_stride, dx_ptr[0], dy_ptr[0], iq_info, oq_info), tmp0, 0);
+                tmp0 = vset_lane_s8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[1] + offset_row], in_stride, dx_ptr[1], dy_ptr[1], iq_info, oq_info), tmp0, 1);
+                tmp0 = vset_lane_s8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[2] + offset_row], in_stride, dx_ptr[2], dy_ptr[2], iq_info, oq_info), tmp0, 2);
+                tmp0 = vset_lane_s8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[3] + offset_row], in_stride, dx_ptr[3], dy_ptr[3], iq_info, oq_info), tmp0, 3);
+                tmp0 = vset_lane_s8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[4] + offset_row], in_stride, dx_ptr[4], dy_ptr[4], iq_info, oq_info), tmp0, 4);
+                tmp0 = vset_lane_s8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[5] + offset_row], in_stride, dx_ptr[5], dy_ptr[5], iq_info, oq_info), tmp0, 5);
+                tmp0 = vset_lane_s8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[6] + offset_row], in_stride, dx_ptr[6], dy_ptr[6], iq_info, oq_info), tmp0, 6);
+                tmp0 = vset_lane_s8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[7] + offset_row], in_stride, dx_ptr[7], dy_ptr[7], iq_info, oq_info), tmp0, 7);
+
+                int8x8_t tmp1 = vdup_n_s8(0);
+
+                tmp1 = vset_lane_s8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[8] + offset_row], in_stride, dx_ptr[8], dy_ptr[8], iq_info, oq_info), tmp1, 0);
+                tmp1 = vset_lane_s8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[9] + offset_row], in_stride, dx_ptr[9], dy_ptr[9], iq_info, oq_info), tmp1, 1);
+                tmp1 = vset_lane_s8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[10] + offset_row], in_stride, dx_ptr[10], dy_ptr[10], iq_info, oq_info), tmp1, 2);
+                tmp1 = vset_lane_s8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[11] + offset_row], in_stride, dx_ptr[11], dy_ptr[11], iq_info, oq_info), tmp1, 3);
+                tmp1 = vset_lane_s8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[12] + offset_row], in_stride, dx_ptr[12], dy_ptr[12], iq_info, oq_info), tmp1, 4);
+                tmp1 = vset_lane_s8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[13] + offset_row], in_stride, dx_ptr[13], dy_ptr[13], iq_info, oq_info), tmp1, 5);
+                tmp1 = vset_lane_s8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[14] + offset_row], in_stride, dx_ptr[14], dy_ptr[14], iq_info, oq_info), tmp1, 6);
+                tmp1 = vset_lane_s8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[15] + offset_row], in_stride, dx_ptr[15], dy_ptr[15], iq_info, oq_info), tmp1, 7);
+
+                vst1q_s8(reinterpret_cast<int8_t *>(out.ptr()), vcombine_s8(tmp0, tmp1));
+            },
+            in, offsets, dx, dy, out);
+            break;
+        }
          case DataType::QASYMM8:
+        {
+            execute_window_loop(window, [&](const Coordinates & id)
+            {
+                const auto offsets_ptr = reinterpret_cast<const int32_t *>(offsets.ptr());
+                const auto dx_ptr      = reinterpret_cast<const float *>(dx.ptr());
+                const auto dy_ptr      = reinterpret_cast<const float *>(dy.ptr());
+                const auto in_ptr      = reinterpret_cast<const uint8_t *>(in.ptr());
+
+                const int in_yi      = std::floor((id.y() + _sampling_offset) * hr - _sampling_offset);
+                const int offset_row = in_yi * in_stide_in_bytes;
+
+                uint8x8_t tmp0 = vdup_n_u8(0);
+
+                tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[0] + offset_row], in_stride, dx_ptr[0], dy_ptr[0], iq_info, oq_info), tmp0, 0);
+                tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[1] + offset_row], in_stride, dx_ptr[1], dy_ptr[1], iq_info, oq_info), tmp0, 1);
+                tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[2] + offset_row], in_stride, dx_ptr[2], dy_ptr[2], iq_info, oq_info), tmp0, 2);
+                tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[3] + offset_row], in_stride, dx_ptr[3], dy_ptr[3], iq_info, oq_info), tmp0, 3);
+                tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[4] + offset_row], in_stride, dx_ptr[4], dy_ptr[4], iq_info, oq_info), tmp0, 4);
+                tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[5] + offset_row], in_stride, dx_ptr[5], dy_ptr[5], iq_info, oq_info), tmp0, 5);
+                tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[6] + offset_row], in_stride, dx_ptr[6], dy_ptr[6], iq_info, oq_info), tmp0, 6);
+                tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[7] + offset_row], in_stride, dx_ptr[7], dy_ptr[7], iq_info, oq_info), tmp0, 7);
+
+                uint8x8_t tmp1 = vdup_n_u8(0);
+
+                tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[8] + offset_row], in_stride, dx_ptr[8], dy_ptr[8], iq_info, oq_info), tmp1, 0);
+                tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[9] + offset_row], in_stride, dx_ptr[9], dy_ptr[9], iq_info, oq_info), tmp1, 1);
+                tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[10] + offset_row], in_stride, dx_ptr[10], dy_ptr[10], iq_info, oq_info), tmp1, 2);
+                tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[11] + offset_row], in_stride, dx_ptr[11], dy_ptr[11], iq_info, oq_info), tmp1, 3);
+                tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[12] + offset_row], in_stride, dx_ptr[12], dy_ptr[12], iq_info, oq_info), tmp1, 4);
+                tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[13] + offset_row], in_stride, dx_ptr[13], dy_ptr[13], iq_info, oq_info), tmp1, 5);
+                tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[14] + offset_row], in_stride, dx_ptr[14], dy_ptr[14], iq_info, oq_info), tmp1, 6);
+                tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[15] + offset_row], in_stride, dx_ptr[15], dy_ptr[15], iq_info, oq_info), tmp1, 7);
+
+                vst1q_u8(out.ptr(), vcombine_u8(tmp0, tmp1));
+            },
+            in, offsets, dx, dy, out);
+            break;
+        }
          case DataType::U8:
          {
              execute_window_loop(window, [&](const Coordinates & id)
@@ -689,51 +808,27 @@ void NEScaleKernel::scale_bilinear_nchw(const Window &window)
                  const int offset_row = in_yi * in_stide_in_bytes;
  
                  uint8x8_t tmp0 = vdup_n_u8(0);
-                if(is_quantized)
-                {
-                    tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[0] + offset_row], in_stride, dx_ptr[0], dy_ptr[0], iq_info, oq_info), tmp0, 0);
-                    tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[1] + offset_row], in_stride, dx_ptr[1], dy_ptr[1], iq_info, oq_info), tmp0, 1);
-                    tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[2] + offset_row], in_stride, dx_ptr[2], dy_ptr[2], iq_info, oq_info), tmp0, 2);
-                    tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[3] + offset_row], in_stride, dx_ptr[3], dy_ptr[3], iq_info, oq_info), tmp0, 3);
-                    tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[4] + offset_row], in_stride, dx_ptr[4], dy_ptr[4], iq_info, oq_info), tmp0, 4);
-                    tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[5] + offset_row], in_stride, dx_ptr[5], dy_ptr[5], iq_info, oq_info), tmp0, 5);
-                    tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[6] + offset_row], in_stride, dx_ptr[6], dy_ptr[6], iq_info, oq_info), tmp0, 6);
-                    tmp0 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[7] + offset_row], in_stride, dx_ptr[7], dy_ptr[7], iq_info, oq_info), tmp0, 7);
-                }
-                else
-                {
-                    tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[0] + offset_row], in_stride, dx_ptr[0], dy_ptr[0]), tmp0, 0);
-                    tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[1] + offset_row], in_stride, dx_ptr[1], dy_ptr[1]), tmp0, 1);
-                    tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[2] + offset_row], in_stride, dx_ptr[2], dy_ptr[2]), tmp0, 2);
-                    tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[3] + offset_row], in_stride, dx_ptr[3], dy_ptr[3]), tmp0, 3);
-                    tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[4] + offset_row], in_stride, dx_ptr[4], dy_ptr[4]), tmp0, 4);
-                    tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[5] + offset_row], in_stride, dx_ptr[5], dy_ptr[5]), tmp0, 5);
-                    tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[6] + offset_row], in_stride, dx_ptr[6], dy_ptr[6]), tmp0, 6);
-                    tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[7] + offset_row], in_stride, dx_ptr[7], dy_ptr[7]), tmp0, 7);
-                }
+
+                tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[0] + offset_row], in_stride, dx_ptr[0], dy_ptr[0]), tmp0, 0);
+                tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[1] + offset_row], in_stride, dx_ptr[1], dy_ptr[1]), tmp0, 1);
+                tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[2] + offset_row], in_stride, dx_ptr[2], dy_ptr[2]), tmp0, 2);
+                tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[3] + offset_row], in_stride, dx_ptr[3], dy_ptr[3]), tmp0, 3);
+                tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[4] + offset_row], in_stride, dx_ptr[4], dy_ptr[4]), tmp0, 4);
+                tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[5] + offset_row], in_stride, dx_ptr[5], dy_ptr[5]), tmp0, 5);
+                tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[6] + offset_row], in_stride, dx_ptr[6], dy_ptr[6]), tmp0, 6);
+                tmp0 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[7] + offset_row], in_stride, dx_ptr[7], dy_ptr[7]), tmp0, 7);
+
                  uint8x8_t tmp1 = vdup_n_u8(0);
-                if(is_quantized)
-                {
-                    tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[8] + offset_row], in_stride, dx_ptr[8], dy_ptr[8], iq_info, oq_info), tmp1, 0);
-                    tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[9] + offset_row], in_stride, dx_ptr[9], dy_ptr[9], iq_info, oq_info), tmp1, 1);
-                    tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[10] + offset_row], in_stride, dx_ptr[10], dy_ptr[10], iq_info, oq_info), tmp1, 2);
-                    tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[11] + offset_row], in_stride, dx_ptr[11], dy_ptr[11], iq_info, oq_info), tmp1, 3);
-                    tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[12] + offset_row], in_stride, dx_ptr[12], dy_ptr[12], iq_info, oq_info), tmp1, 4);
-                    tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[13] + offset_row], in_stride, dx_ptr[13], dy_ptr[13], iq_info, oq_info), tmp1, 5);
-                    tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[14] + offset_row], in_stride, dx_ptr[14], dy_ptr[14], iq_info, oq_info), tmp1, 6);
-                    tmp1 = vset_lane_u8(delta_bilinear_c1_quantized(&in_ptr[offsets_ptr[15] + offset_row], in_stride, dx_ptr[15], dy_ptr[15], iq_info, oq_info), tmp1, 7);
-                }
-                else
-                {
-                    tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[8] + offset_row], in_stride, dx_ptr[8], dy_ptr[8]), tmp1, 0);
-                    tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[9] + offset_row], in_stride, dx_ptr[9], dy_ptr[9]), tmp1, 1);
-                    tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[10] + offset_row], in_stride, dx_ptr[10], dy_ptr[10]), tmp1, 2);
-                    tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[11] + offset_row], in_stride, dx_ptr[11], dy_ptr[11]), tmp1, 3);
-                    tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[12] + offset_row], in_stride, dx_ptr[12], dy_ptr[12]), tmp1, 4);
-                    tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[13] + offset_row], in_stride, dx_ptr[13], dy_ptr[13]), tmp1, 5);
-                    tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[14] + offset_row], in_stride, dx_ptr[14], dy_ptr[14]), tmp1, 6);
-                    tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[15] + offset_row], in_stride, dx_ptr[15], dy_ptr[15]), tmp1, 7);
-                }
+
+                tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[8] + offset_row], in_stride, dx_ptr[8], dy_ptr[8]), tmp1, 0);
+                tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[9] + offset_row], in_stride, dx_ptr[9], dy_ptr[9]), tmp1, 1);
+                tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[10] + offset_row], in_stride, dx_ptr[10], dy_ptr[10]), tmp1, 2);
+                tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[11] + offset_row], in_stride, dx_ptr[11], dy_ptr[11]), tmp1, 3);
+                tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[12] + offset_row], in_stride, dx_ptr[12], dy_ptr[12]), tmp1, 4);
+                tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[13] + offset_row], in_stride, dx_ptr[13], dy_ptr[13]), tmp1, 5);
+                tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[14] + offset_row], in_stride, dx_ptr[14], dy_ptr[14]), tmp1, 6);
+                tmp1 = vset_lane_u8(delta_bilinear_c1(&in_ptr[offsets_ptr[15] + offset_row], in_stride, dx_ptr[15], dy_ptr[15]), tmp1, 7);
+
                  vst1q_u8(out.ptr(), vcombine_u8(tmp0, tmp1));
              },
              in, offsets, dx, dy, out);
@@ -950,6 +1045,19 @@ void NEScaleKernel::scale_nhwc(const Window &window)
  
      switch(_input->info()->data_type())
      {
+        case DataType::QASYMM8_SIGNED:
+        {
+            if(_policy == InterpolationPolicy::NEAREST_NEIGHBOR)
+            {
+                scale_nearest_nhwc_core<int8_t>(_input, _offsets, _output, hr, window, win_in, input_stride_w, input_stride_h, input_stride_c, _sampling_offset);
+            }
+            else
+            {
+                scale_bilinear_nhwc_core<int8_t, int8_t>(_input, _offsets, _dx, _dy, _output, hr, _sampling_offset,
+                                                         window, win_in, input_stride_w, input_stride_h, input_stride_c, _border_mode, _constant_border_value, _use_padding);
+            }
+            break;
+        }
          case DataType::QASYMM8:
          case DataType::U8:
          {
diff --git a/tests/validation/NEON/Scale.cpp b/tests/validation/NEON/Scale.cpp

index ce1b249f888ee4e9cc559fb8d530252d7620b912..b7d7b00cb6758414c035fae9d9f45757dd7fb237 100644 (file)
--- a/tests/validation/NEON/Scale.cpp
+++ b/tests/validation/NEON/Scale.cpp
@@ -360,6 +360,25 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEScaleQuantizedFixture<uint8_t>, framework::Da
      validate(Accessor(_target), _reference, valid_region, tolerance_u8);
  }
  TEST_SUITE_END() // QASYMM8
+
+TEST_SUITE(QASYMM8_SIGNED)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEScaleQuantizedFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(datasets::SmallShapes(),
+                                                                                                                       framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
+                                                                                                                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, -10) })),
+                                                                                                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                                                                                                                       framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
+                                                                                                                       datasets::BorderModes()),
+                                                                                                               framework::dataset::make("SamplingPolicy", { SamplingPolicy::TOP_LEFT, SamplingPolicy::CENTER })),
+                                                                                                       AlignCorners))
+{
+    //Create valid region
+    TensorInfo  src_info(_shape, 1, _data_type);
+    ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED));
+
+    // Validate output
+    validate(Accessor(_target), _reference, valid_region, tolerance_u8);
+}
+TEST_SUITE_END() // QASYMM8_SIGNED
  TEST_SUITE_END() // Quantized
  
  TEST_SUITE_END() // Scale
author	Sheri Zhang <sheri.zhang@arm.com>
	Thu, 30 Apr 2020 21:53:39 +0000 (22:53 +0100)
committer	Sheri Zhang <sheri.zhang@arm.com>
	Fri, 1 May 2020 14:22:55 +0000 (14:22 +0000)
arm_compute/core/CPP/kernels/CPPUpsampleKernel.h		patch \| blob \| history
arm_compute/core/NEON/kernels/NEScaleKernel.h		patch \| blob \| history
arm_compute/runtime/CPP/functions/CPPUpsample.h		patch \| blob \| history
docs/00_introduction.dox		patch \| blob \| history
src/core/NEON/kernels/NEScaleKernel.cpp		patch \| blob \| history
tests/validation/NEON/Scale.cpp		patch \| blob \| history