Use Arm Compute Library v19.11.1 accepted/tizen/unified/20200309.124340 submit/tizen/20200306.013914 submit/tizen/20200308.233341 submit/tizen/20200308.233815
authorChunseok Lee <chunseok.lee@samsung.com>
Wed, 4 Mar 2020 10:14:29 +0000 (19:14 +0900)
committerChunseok Lee <chunseok.lee@samsung.com>
Wed, 4 Mar 2020 10:14:29 +0000 (19:14 +0900)
- update arm compute library to v19.11.1

Signed-off-by: Chunseok Lee <chunseok.lee@samsung.com>
30 files changed:
compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLRNNLayerEx.h
compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceOperation.h
compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h
compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayerUpsample.h
compute/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h
compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEArgMinMax.h [deleted file]
compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceOperation.h
compute/ARMComputeEx/arm_compute/runtime/misc/functions/Utils.h
compute/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp
compute/ARMComputeEx/src/core/CL/kernels/CLCastKernel.cpp
compute/ARMComputeEx/src/core/CL/kernels/CLPReLUKernel.cpp
compute/ARMComputeEx/src/core/CL/kernels/CLSpaceToBatchNDKernel.cpp
compute/ARMComputeEx/src/core/CPP/kernels/CPPUpsampleKernelEx.cpp
compute/ARMComputeEx/src/core/NEON/NEElementwiseOperationFuncs.cpp
compute/ARMComputeEx/src/core/NEON/kernels/NECastKernel.cpp
compute/ARMComputeEx/src/core/NEON/kernels/NEHashtableLookupKernel.cpp
compute/ARMComputeEx/src/core/NEON/kernels/NEPReLUKernel.cpp
compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayerUpsample.cpp
compute/ARMComputeEx/src/runtime/NEON/functions/NEArgMinMax.cpp [deleted file]
compute/ARMComputeEx/src/runtime/NEON/functions/NEElementwiseUnaryLayerEx.cpp [deleted file]
compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp
compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedLayerEx.cpp
compute/ARMComputeEx/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCoreEx.cpp
compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceOperation.cpp
infra/cmake/packages/ARMComputeConfig.cmake
infra/cmake/packages/ARMComputeSourceConfig.cmake
runtime/neurun/backend/acl_cl/KernelGenerator.cc
runtime/neurun/backend/acl_cl/TensorManager.h
runtime/neurun/backend/acl_common/TemplTensorBuilder.h
runtime/neurun/backend/acl_neon/KernelGenerator.cc

index 7e88cb3..2dab0fb 100644 (file)
@@ -89,7 +89,7 @@ public:
   void prepare() override;
 
 private:
-  CLMemoryGroup _memory_group;
+  MemoryGroup _memory_group;
   CLGEMM _gemm_state_f;
   CLSaturatedArithmeticOperationKernel _add_kernel;
   CLActivationLayerKernel _activation_kernel;
index 1d367d5..64f64fc 100644 (file)
@@ -26,7 +26,7 @@
 
 #include "arm_compute/core/CL/kernels/CLReduceOperationKernel.h"
 #include "arm_compute/core/TypesEx.h"
-#include "arm_compute/runtime/CL/CLMemoryGroup.h"
+#include "arm_compute/runtime/MemoryGroup.h"
 #include "arm_compute/runtime/CL/CLTensor.h"
 #include "arm_compute/runtime/CL/CLTensorAllocator.h"
 #include "arm_compute/runtime/CL/functions/CLReshapeLayer.h"
@@ -82,7 +82,7 @@ public:
   void run() override;
 
 private:
-  CLMemoryGroup _memory_group;
+  MemoryGroup _memory_group;
   ICLTensor *_input;
   ICLTensor *_output;
   std::set<uint32_t> _axis;
index 340a7bf..2780461 100644 (file)
@@ -30,7 +30,6 @@
 
 #include "arm_compute/core/CPP/kernels/CPPFlipWeightsKernel.h"
 
-#include "arm_compute/runtime/CL/CLMemoryGroup.h"
 #include "arm_compute/runtime/CL/CLTensor.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/IMemoryManager.h"
@@ -144,7 +143,7 @@ public:
   void prepare() override;
 
 private:
-  CLMemoryGroup _memory_group;
+  MemoryGroup _memory_group;
   CLTransposeConvLayerUpsample _scale_f;
   CLConvolutionLayer _conv_f;
   CPPFlipWeightsKernel _flip_weights;
index 4ae0e18..14c3623 100644 (file)
@@ -21,7 +21,6 @@
 
 #include "arm_compute/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.h"
 #include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLMemoryGroup.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/IMemoryManager.h"
 
index 37bccc5..34b646b 100644 (file)
 #ifndef __ARM_COMPUTE_NEFUNCTIONSEX_H__
 #define __ARM_COMPUTE_NEFUNCTIONSEX_H__
 
-#include <arm_compute/runtime/NEON/functions/NEArgMinMax.h>
 #include <arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h>
 #include <arm_compute/runtime/NEON/functions/NECast.h>
 #include <arm_compute/runtime/NEON/functions/NEDepthToSpaceLayerEx.h>
-#include <arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayerEx.h>
 #include <arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h>
 #include <arm_compute/runtime/NEON/functions/NEFullyConnectedReshapingLayer.h>
 #include <arm_compute/runtime/NEON/functions/NEGatherEx.h>
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEArgMinMax.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEArgMinMax.h
deleted file mode 100644 (file)
index 604cd93..0000000
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __ARM_COMPUTE_NEON_ARG_MIN_MAX_H__
-#define __ARM_COMPUTE_NEON_ARG_MIN_MAX_H__
-
-#include "arm_compute/runtime/IFunction.h"
-
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to perform reduce min/max operation */
-template <ReductionOperation op> class NEArgMinMaxStatic : public IFunction
-{
-public:
-  /** Constructor */
-  NEArgMinMaxStatic(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
-  /** Configure kernel
-   *
-   * @note Supported tensor rank: up to 4
-   *
-   * @param[in]  input          Source tensor. Data type supported: QASYMM8/F16/F32
-   * @param[in]  axis           Reduction axis.
-   * @param[out] output         Destination tensor. Data type supported: Same as @p input
-   */
-  void configure(ITensor *input, int axis, ITensor *output);
-
-  /** Static function to check if given info will lead to a valid configuration of @ref NEArgMinMax
-   *
-   * @param[in] input          Source tensor. Data type supported: QASYMM8/F16/F32
-   * @param[in] axis Reduction axis.
-   * @param[in] output         Destination tensor. Data type supported: Same as @p input
-   *
-   * @return A status
-   */
-  static Status validate(const ITensorInfo *input, int axis, const ITensorInfo *output);
-
-  // Inherited methods overridden:
-  void run() override;
-
-private:
-  MemoryGroup _memory_group;
-  NEArgMinMaxLayer _reduction_kernel;
-  Tensor _reduced_out;
-  NEReshapeLayer _reshape;
-};
-
-/** Basic function to run arg max. */
-using NEArgMax = NEArgMinMaxStatic<ReductionOperation::ARG_IDX_MAX>;
-/** Basic function to run arg min. */
-using NEArgMin = NEArgMinMaxStatic<ReductionOperation::ARG_IDX_MIN>;
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_NEON_ARG_MIN_MAX_H__ */
index 9c558e6..f824fb5 100644 (file)
@@ -32,6 +32,7 @@
 #include "arm_compute/runtime/MemoryGroup.h"
 #include "arm_compute/runtime/NEON/functions/NEReductionOperationEx.h"
 #include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h"
+#include "arm_compute/runtime/Tensor.h"
 
 namespace arm_compute
 {
index 53736f5..84ee4ce 100644 (file)
@@ -95,8 +95,8 @@ void copyCast(const FromT value, arm_compute::ITensor *to, const arm_compute::Co
       float realValue = static_cast<float>(value);
       // NOTE We haven't known the policy of rounding for quantization.
       //      So this is set to a temporary value.
-      *(to->ptr_to_element(id)) =
-          to->info()->quantization_info().quantize(realValue, arm_compute::RoundingPolicy::TO_ZERO);
+      *(to->ptr_to_element(id)) = quantize_qasymm8(realValue, to->info()->quantization_info(),
+                                                   arm_compute::RoundingPolicy::TO_ZERO);
       break;
     }
     default:
index 7d47606..832628d 100644 (file)
@@ -180,7 +180,7 @@ Kernel CLKernelLibraryEx::create_kernel(const std::string &kernel_name,
 
   if (_kernel_program_map.end() == kernel_program_it)
   {
-    ARM_COMPUTE_ERROR("Kernel %s not found in the CLKernelLibrary", kernel_name.c_str());
+    ARM_COMPUTE_ERROR_VAR("Kernel %s not found in the CLKernelLibrary", kernel_name.c_str());
   }
   std::string concat_str;
 
@@ -261,7 +261,7 @@ const Program &CLKernelLibraryEx::load_program(const std::string &program_name)
 
   if (_program_source_map.end() == program_source_it)
   {
-    ARM_COMPUTE_ERROR("Embedded program for %s does not exist.", program_name.c_str());
+    ARM_COMPUTE_ERROR_VAR("Embedded program for %s does not exist.", program_name.c_str());
   }
 
   program = Program(_context, program_name, program_source_it->second);
@@ -282,7 +282,7 @@ const Program &CLKernelLibraryEx::load_program(const std::string &program_name)
   }
   else
   {
-    ARM_COMPUTE_ERROR("Kernel file %s does not exist.", source_name.c_str());
+    ARM_COMPUTE_ERROR_VAR("Kernel file %s does not exist.", source_name.c_str());
   }
 #endif /* EMBEDDED_KERNELS */
 
@@ -315,7 +315,7 @@ std::string CLKernelLibraryEx::get_program_source(const std::string &program_nam
 
   if (program_source_it == _program_source_map.end())
   {
-    ARM_COMPUTE_ERROR("Embedded program for %s does not exist.", program_name.c_str());
+    ARM_COMPUTE_ERROR_VAR("Embedded program for %s does not exist.", program_name.c_str());
   }
 
   return program_source_it->second;
index 35f607b..1736bf5 100644 (file)
@@ -52,8 +52,9 @@ void CLCastKernel::configure(const ICLTensor *input, ICLTensor *output, SubDataT
   // Create kernel
   if (is_data_type_quantized_asymmetric(input->info()->data_type()))
   {
-    const float scale_in = input->info()->quantization_info().scale;
-    const int offset_in = input->info()->quantization_info().offset;
+    UniformQuantizationInfo qinfo = input->info()->quantization_info().uniform();
+    const float scale_in = qinfo.scale;
+    const int offset_in = qinfo.offset;
     build_opts.add_option("-DSCALE=" + float_to_string_with_full_precision(scale_in));
     build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(offset_in));
 
@@ -62,8 +63,10 @@ void CLCastKernel::configure(const ICLTensor *input, ICLTensor *output, SubDataT
   }
   else if (is_data_type_quantized_asymmetric(output->info()->data_type()))
   {
-    const float scale_in = output->info()->quantization_info().scale;
-    const int offset_in = output->info()->quantization_info().offset;
+    UniformQuantizationInfo qinfo = output->info()->quantization_info().uniform();
+    const float scale_in = qinfo.scale;
+    const float offset_in = qinfo.offset;
+
     build_opts.add_option("-DSCALE=" + float_to_string_with_full_precision(scale_in));
     build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(offset_in));
 
index e7d5870..98377f7 100644 (file)
@@ -72,18 +72,18 @@ void CLPReLUKernel::configure(const ICLTensor *input, const ICLTensor *alpha, IC
 
   if (is_data_type_quantized_asymmetric(input->info()->data_type()))
   {
-    build_opts.emplace("-DOFF_IN=" +
-                       support::cpp11::to_string(input->info()->quantization_info().offset));
-    build_opts.emplace("-DOFF_ALPHA=" +
-                       support::cpp11::to_string(alpha->info()->quantization_info().offset));
-    build_opts.emplace("-DOFF_OUT=" +
-                       support::cpp11::to_string(output->info()->quantization_info().offset));
-    build_opts.emplace("-DSCALE_IN=" +
-                       support::cpp11::to_string(input->info()->quantization_info().scale));
-    build_opts.emplace("-DSCALE_ALPHA=" +
-                       support::cpp11::to_string(alpha->info()->quantization_info().scale));
-    build_opts.emplace("-DSCALE_OUT=" +
-                       support::cpp11::to_string(output->info()->quantization_info().scale));
+    build_opts.emplace("-DOFF_IN=" + support::cpp11::to_string(
+                                         input->info()->quantization_info().uniform().offset));
+    build_opts.emplace("-DOFF_ALPHA=" + support::cpp11::to_string(
+                                            alpha->info()->quantization_info().uniform().offset));
+    build_opts.emplace("-DOFF_OUT=" + support::cpp11::to_string(
+                                          output->info()->quantization_info().uniform().offset));
+    build_opts.emplace("-DSCALE_IN=" + support::cpp11::to_string(
+                                           input->info()->quantization_info().uniform().scale));
+    build_opts.emplace("-DSCALE_ALPHA=" + support::cpp11::to_string(
+                                              alpha->info()->quantization_info().uniform().scale));
+    build_opts.emplace("-DSCALE_OUT=" + support::cpp11::to_string(
+                                            output->info()->quantization_info().uniform().scale));
     kernel_name += "_qasymm8";
   }
   _kernel =
index f7836b6..769617d 100644 (file)
@@ -147,8 +147,8 @@ void CLSpaceToBatchNDKernel::configure(const ICLTensor *input, const ICLTensor *
   build_opts.emplace("-DBATCH_IN=" + support::cpp11::to_string(input->info()->dimension(3)));
   if (input->info()->data_type() == DataType::QASYMM8)
   {
-    build_opts.emplace("-DZERO_VALUE=" +
-                       support::cpp11::to_string(input->info()->quantization_info().offset));
+    build_opts.emplace("-DZERO_VALUE=" + support::cpp11::to_string(
+                                             input->info()->quantization_info().uniform().offset));
   }
   else
   {
index 8ac667c..bc2ee0d 100644 (file)
@@ -81,7 +81,7 @@ void CPPUpsampleKernelEx::run(const Window &window, const ThreadInfo &info)
   // The fill value is normally 0, but for QASYMM8 the '0' corresponds to the offset
   const uint8_t fill_value =
       _output->info()->data_type() == DataType::QASYMM8
-          ? utility::clamp<uint8_t>(_output->info()->quantization_info().offset)
+          ? utility::clamp<uint8_t>(_output->info()->quantization_info().uniform().offset)
           : 0;
   // Filling a value different than 0 works only for QASYMM8 datatype since we are filling 1byte
   // values in a buffer of uint8_ts
index 4508f58..164404f 100644 (file)
@@ -221,8 +221,9 @@ void elementwise_op_quantized(
   const auto window_end_x = static_cast<int>(window.x().end());
   const bool is_broadcast_across_x = (input1_win.x().step() == 0) || (input2_win.x().step() == 0);
 
-  const float output_scale = out->info()->quantization_info().scale;
-  const int output_offset = out->info()->quantization_info().offset;
+  UniformQuantizationInfo qinfo = out->info()->quantization_info().uniform();
+  const float output_scale = qinfo.scale;
+  const int output_offset = qinfo.offset;
 
   // Output quantization info (add 0.5 to round toward the nearest integer - 0.5 rounds away from
   // zero)
@@ -238,8 +239,10 @@ void elementwise_op_quantized(
     const ITensor *broadcast_tensor = is_broadcast_input_2 ? in2 : in1;
     const ITensor *non_broadcast_tensor = !is_broadcast_input_2 ? in2 : in1;
 
-    const QuantizationInfo broadcast_qinfo = broadcast_tensor->info()->quantization_info();
-    const QuantizationInfo non_broadcast_qinfo = non_broadcast_tensor->info()->quantization_info();
+    const UniformQuantizationInfo broadcast_qinfo =
+        broadcast_tensor->info()->quantization_info().uniform();
+    const UniformQuantizationInfo non_broadcast_qinfo =
+        non_broadcast_tensor->info()->quantization_info().uniform();
 
     const int32x4_t voffset_non_broadcast = vdupq_n_s32(non_broadcast_qinfo.offset);
     const float32x4_t vscale_non_broadcast = vdupq_n_f32(non_broadcast_qinfo.scale);
@@ -269,10 +272,8 @@ void elementwise_op_quantized(
           for (; x < window_end_x; ++x)
           {
             const float afs =
-                scvt_f32_qasymm8(*(non_broadcast_input_ptr + x), non_broadcast_qinfo.scale,
-                                 non_broadcast_qinfo.offset);
-            const float bfs =
-                scvt_f32_qasymm8(broadcast_value, broadcast_qinfo.scale, broadcast_qinfo.offset);
+                dequantize_qasymm8(*(non_broadcast_input_ptr + x), non_broadcast_qinfo);
+            const float bfs = dequantize_qasymm8(broadcast_value, broadcast_qinfo);
             *(output_ptr + x) =
                 (*scalar_func)(!is_broadcast_input_2 ? bfs : afs, !is_broadcast_input_2 ? afs : bfs,
                                out->info()->quantization_info());
@@ -283,12 +284,14 @@ void elementwise_op_quantized(
   else
   {
     // Input1 quantization info
-    const int32x4_t voffset1 = vdupq_n_s32(in1->info()->quantization_info().offset);
-    const float32x4_t vscale1 = vdupq_n_f32(in1->info()->quantization_info().scale);
+    UniformQuantizationInfo qinfo = in1->info()->quantization_info().uniform();
+    const int32x4_t voffset1 = vdupq_n_s32(qinfo.offset);
+    const float32x4_t vscale1 = vdupq_n_f32(qinfo.scale);
 
     // Input2 quantization info
-    const int32x4_t voffset2 = vdupq_n_s32(in2->info()->quantization_info().offset);
-    const float32x4_t vscale2 = vdupq_n_f32(in2->info()->quantization_info().scale);
+    qinfo = in2->info()->quantization_info().uniform();
+    const int32x4_t voffset2 = vdupq_n_s32(qinfo.offset);
+    const float32x4_t vscale2 = vdupq_n_f32(qinfo.scale);
 
     // Clear X Dimension on execution window as we handle manually
     input1_win.set(Window::DimX, Window::Dimension(0, 1, 1));
@@ -301,26 +304,24 @@ void elementwise_op_quantized(
     Iterator input2(in2, input2_win);
     Iterator output(out, win);
 
-    execute_window_loop(
-        win,
-        [&](const Coordinates &) {
-          const auto input1_ptr = reinterpret_cast<const uint8_t *>(input1.ptr());
-          const auto input2_ptr = reinterpret_cast<const uint8_t *>(input2.ptr());
-          const auto output_ptr = reinterpret_cast<uint8_t *>(output.ptr());
+    execute_window_loop(win,
+                        [&](const Coordinates &) {
+                          const auto input1_ptr = reinterpret_cast<const uint8_t *>(input1.ptr());
+                          const auto input2_ptr = reinterpret_cast<const uint8_t *>(input2.ptr());
+                          const auto output_ptr = reinterpret_cast<uint8_t *>(output.ptr());
 
-          int x =
-              (*neon_func)(window_start_x, window_end_x, window_step_x, input1_ptr, input2_ptr,
-                           output_ptr, voffset1, voffset2, vscale1, vscale2, voffseto, invvscaleo);
-          for (; x < window_end_x; ++x)
-          {
-            const float afs =
-                scvt_f32_qasymm8(*(input1_ptr + x), input1_qinfo.scale, input1_qinfo.offset);
-            const float bfs =
-                scvt_f32_qasymm8(*(input2_ptr + x), input2_qinfo.scale, input2_qinfo.offset);
-            *(output_ptr + x) = (*scalar_func)(afs, bfs, out->info()->quantization_info());
-          }
-        },
-        input1, input2, output);
+                          int x = (*neon_func)(window_start_x, window_end_x, window_step_x,
+                                               input1_ptr, input2_ptr, output_ptr, voffset1,
+                                               voffset2, vscale1, vscale2, voffseto, invvscaleo);
+                          for (; x < window_end_x; ++x)
+                          {
+                            const float afs = dequantize_qasymm8(*(input1_ptr + x), input1_qinfo);
+                            const float bfs = dequantize_qasymm8(*(input2_ptr + x), input2_qinfo);
+                            *(output_ptr + x) =
+                                (*scalar_func)(afs, bfs, out->info()->quantization_info());
+                          }
+                        },
+                        input1, input2, output);
   }
 }
 
index 7e4fc12..2d6ed21 100644 (file)
@@ -394,7 +394,8 @@ template <typename FromT> void run_cast(const ITensor *input, ITensor *output, c
             case DataType::QASYMM8:
             {
               using to_vector = typename cast_vector<float>::type;
-              const QuantizationInfo &qinfo_out = output->info()->quantization_info();
+              const UniformQuantizationInfo &qinfo_out =
+                  output->info()->quantization_info().uniform();
               const auto vf = vcast<to_vector, from_vector>(vin);
               const auto vout = vquantize(vf, qinfo_out);
               store_result<qasymm8_t>(reinterpret_cast<qasymm8_t *>(out.ptr()) + x, vout);
@@ -440,7 +441,8 @@ template <typename FromT> void run_cast(const ITensor *input, ITensor *output, c
             case DataType::QASYMM8:
             {
               const QuantizationInfo &qinfo_out = output->info()->quantization_info();
-              const auto qval = qinfo_out.quantize(static_cast<float>(val), rounding_policy);
+              const auto qval =
+                  quantize_qasymm8(static_cast<float>(val), qinfo_out, rounding_policy);
               *(reinterpret_cast<qasymm8_t *>(out.ptr()) + x) = qval;
               break;
             }
@@ -486,8 +488,8 @@ void run_cast_qasymm8(const ITensor *input, ITensor *output, const Window &windo
 #else  //__aarch64__
   constexpr RoundingPolicy rounding_policy = RoundingPolicy::TO_ZERO;
 #endif //__aarch64__
-  const auto &qinfo_in = input->info()->quantization_info();
-  const auto &qinfo_out = output->info()->quantization_info();
+  const auto &qinfo_in = input->info()->quantization_info().uniform();
+  const auto &qinfo_out = output->info()->quantization_info().uniform();
 
   execute_window_loop(
       win_collapsed,
@@ -547,7 +549,7 @@ void run_cast_qasymm8(const ITensor *input, ITensor *output, const Window &windo
         for (; x < window_end_x; ++x)
         {
           qasymm8_t qval_in = *(in_ptr + x);
-          const auto val = qinfo_in.dequantize(qval_in);
+          const auto val = dequantize_qasymm8(qval_in, qinfo_in);
 
           switch (output->info()->data_type())
           {
@@ -558,7 +560,7 @@ void run_cast_qasymm8(const ITensor *input, ITensor *output, const Window &windo
             }
             case DataType::QASYMM8:
             {
-              const auto qval_out = qinfo_out.quantize(val, rounding_policy);
+              const auto qval_out = quantize_qasymm8(val, qinfo_out, rounding_policy);
               *(reinterpret_cast<qasymm8_t *>(out.ptr()) + x) = qval_out;
               break;
             }
index 391337b..504ad39 100644 (file)
@@ -118,7 +118,7 @@ void NEHashtableLookupKernel::run(const Window &window, const ThreadInfo &info)
 
   const size_t lookup_dim = _output->info()->num_dimensions() - 1;
   const int const_0 = _output->info()->data_type() == DataType::QASYMM8
-                          ? _output->info()->quantization_info().offset
+                          ? _output->info()->quantization_info().uniform().offset
                           : 0;
 
   std::unordered_map<int32_t, size_t> key_index_map;
index ad1bb90..5b2ffda 100644 (file)
@@ -63,7 +63,8 @@ template <ConditionalOperation op>
 inline uint8_t elementwise_conditional_op_quantized_scalar(const float &a, const float &b,
                                                            QuantizationInfo qinfo)
 {
-  return qinfo.quantize(elementwise_conditional_op_scalar<op>(a, b), RoundingPolicy::TO_NEAREST_UP);
+  return quantize_qasymm8(elementwise_conditional_op_scalar<op>(a, b), qinfo,
+                          RoundingPolicy::TO_NEAREST_UP);
 }
 
 template <ConditionalOperation op, typename VectorType>
index 0ce3e67..b8d60ff 100644 (file)
@@ -19,6 +19,7 @@
 #include "arm_compute/core/CL/OpenCL.h"
 #include "arm_compute/core/Utils.h"
 #include "arm_compute/runtime/CL/CLScheduler.h"
+#include "arm_compute/core/CL/ICLTensor.h"
 
 #include <cmath>
 #include <memory>
@@ -54,7 +55,7 @@ void CLTransposeConvLayerUpsample::run()
   _output->map(CLScheduler::get().queue(), true);
   if (is_data_type_quantized_asymmetric(_output->info()->data_type()))
   {
-    const uint8_t quantized_zero = _output->info()->quantization_info().offset;
+    const uint8_t quantized_zero = _output->info()->quantization_info().uniform().offset;
     std::fill_n(_output->buffer(), _output->info()->total_size(), quantized_zero);
   }
   else
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEArgMinMax.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEArgMinMax.cpp
deleted file mode 100644 (file)
index 5ba465b..0000000
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/NEON/functions/NEArgMinMax.h"
-
-#include "arm_compute/core/CPP/Validate.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/runtime/NEON/NEScheduler.h"
-
-namespace arm_compute
-{
-
-template <ReductionOperation OP>
-NEArgMinMaxStatic<OP>::NEArgMinMaxStatic(std::shared_ptr<IMemoryManager> memory_manager)
-    : _memory_group(std::move(memory_manager)), _reduction_kernel(), _reduced_out(), _reshape()
-{
-}
-
-template <ReductionOperation OP>
-Status NEArgMinMaxStatic<OP>::validate(const ITensorInfo *input, int axis,
-                                       const ITensorInfo *output)
-{
-  ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
-  ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
-  ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16,
-                                                       DataType::F32);
-
-  TensorShape out_shape = input->tensor_shape();
-  const int input_dims = input->num_dimensions();
-  int axis_local = axis;
-
-  // Convert negative axis
-  axis_local = wrap_around(axis_local, input_dims);
-
-  ARM_COMPUTE_RETURN_ERROR_ON(axis_local > 3);
-  ARM_COMPUTE_RETURN_ERROR_ON(static_cast<unsigned int>(axis_local) > input->num_dimensions() - 1);
-  out_shape.remove_dimension(axis_local);
-
-  const TensorInfo out_info = output->clone()->set_tensor_shape(out_shape);
-  ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output, &out_info);
-
-  return Status{};
-}
-
-template <ReductionOperation OP>
-void NEArgMinMaxStatic<OP>::configure(ITensor *input, int axis, ITensor *output)
-{
-  ARM_COMPUTE_ERROR_ON_NULLPTR(input);
-
-  int axis_local = axis;
-  const int input_dims = input->info()->num_dimensions();
-
-  // Convert negative axis
-  axis_local = wrap_around(axis_local, input_dims);
-
-  // Perform reduction for axis
-  TensorShape intermediate_shape = input->info()->tensor_shape();
-  intermediate_shape.set(axis_local, 1);
-  auto in = input;
-
-  _reduced_out.allocator()->init(TensorInfo(intermediate_shape, output->info()->num_channels(),
-                                            output->info()->data_type(),
-                                            output->info()->quantization_info()));
-  _memory_group.manage(&_reduced_out);
-  _reduction_kernel.configure(in, axis_local, &_reduced_out, OP);
-
-  // Allocate intermediate tensor
-  _reduced_out.allocator()->allocate();
-
-  // Configure reshape layer if we want to drop the dimensions
-  TensorShape out_shape = input->info()->tensor_shape();
-  out_shape.remove_dimension(axis_local);
-  auto_init_if_empty(*output->info(), output->info()->clone()->set_tensor_shape(out_shape));
-  _reshape.configure(&_reduced_out, output);
-}
-
-template <ReductionOperation OP> void NEArgMinMaxStatic<OP>::run()
-{
-  MemoryGroupResourceScope scope_mg(_memory_group);
-
-  _reduction_kernel.run();
-  _reshape.run();
-}
-
-// Supported Specializations
-template class NEArgMinMaxStatic<ReductionOperation::ARG_IDX_MAX>;
-template class NEArgMinMaxStatic<ReductionOperation::ARG_IDX_MIN>;
-} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEElementwiseUnaryLayerEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEElementwiseUnaryLayerEx.cpp
deleted file mode 100644 (file)
index a95018a..0000000
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayerEx.h"
-
-#include "arm_compute/core/NEON/kernels/NEElementwiseUnaryKernelEx.h"
-#include "support/ToolchainSupport.h"
-
-#include <utility>
-
-namespace arm_compute
-{
-void NENegLayer::configure(const ITensor *input, ITensor *output)
-{
-  auto k = arm_compute::support::cpp14::make_unique<NEElementwiseUnaryKernelEx>();
-  k->configure(ElementWiseUnaryEx::NEG, input, output);
-  _kernel = std::move(k);
-}
-Status NENegLayer::validate(const ITensorInfo *input, const ITensorInfo *output)
-{
-  return NEElementwiseUnaryKernelEx::validate(ElementWiseUnaryEx::NEG, input, output);
-}
-} // namespace arm_compute
index d604fed..1a9a510 100644 (file)
@@ -154,7 +154,7 @@ void NEFullyConnectedHybridLayer::configure(const ITensor *input, const ITensor
 
   // Multiply scale
   _multiply_scale_kernel.configure(&_gemmlowp_output, &_scale_factor, output,
-                                   weights->info()->quantization_info().scale);
+                                   weights->info()->quantization_info().uniform().scale);
 
   _are_weights_reshaped = _are_weights_reshaped || fc_info.retain_internal_weights;
 
@@ -220,7 +220,7 @@ Status NEFullyConnectedHybridLayer::validate(const ITensorInfo *input, const ITe
   ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(quantized_input, *weights_to_use, gemmlowp_output));
 
   ARM_COMPUTE_RETURN_ON_ERROR(NEMultiplyScaleFactorKernel::validate(
-      &gemmlowp_output, &scale_factor, output, weights->quantization_info().scale));
+      &gemmlowp_output, &scale_factor, output, weights->quantization_info().uniform().scale));
 
   return Status{};
 }
index a944f69..87062d3 100644 (file)
@@ -46,10 +46,10 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I
     // Since we need negative offsets for computing convolution, we need to change
     // QuantizationInfo()
     // Extract and negate input and weights offset
-    const QuantizationInfo input_quantization_info(input.quantization_info().scale,
-                                                   -input.quantization_info().offset);
-    const QuantizationInfo weights_quantization_info(weights.quantization_info().scale,
-                                                     -weights.quantization_info().offset);
+    const QuantizationInfo input_quantization_info(input.quantization_info().uniform().scale,
+                                                   -input.quantization_info().uniform().offset);
+    const QuantizationInfo weights_quantization_info(weights.quantization_info().uniform().scale,
+                                                     -weights.quantization_info().uniform().offset);
 
     // Validate gemmlowp function
     ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixMultiplyCore::validate(
@@ -88,10 +88,10 @@ void NEFullyConnectedLayerEx::configure_mm(const ITensor *input, const ITensor *
     const QuantizationInfo input_quantization_info = input->info()->quantization_info();
     const QuantizationInfo weights_quantization_info = weights->info()->quantization_info();
 
-    input->info()->set_quantization_info(
-        QuantizationInfo(input_quantization_info.scale, -input_quantization_info.offset));
-    weights->info()->set_quantization_info(
-        QuantizationInfo(weights_quantization_info.scale, -weights_quantization_info.offset));
+    input->info()->set_quantization_info(QuantizationInfo(
+        input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset));
+    weights->info()->set_quantization_info(QuantizationInfo(
+        weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset));
 
     // Configure gemmlowp function
     _mm_gemmlowp.configure(input, weights, nullptr, output);
@@ -236,15 +236,16 @@ void NEFullyConnectedLayerEx::configure(const ITensor *input, const ITensor *wei
   // Configure output stage for asymmetric quantized types
   if (_is_quantized)
   {
-    float multiplier = input->info()->quantization_info().scale *
-                       weights->info()->quantization_info().scale /
-                       output->info()->quantization_info().scale;
+    float multiplier = input->info()->quantization_info().uniform().scale *
+                       weights->info()->quantization_info().uniform().scale /
+                       output->info()->quantization_info().uniform().scale;
     int output_multiplier;
     int output_shift;
     quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier,
                                                                &output_shift);
     _gemmlowp_output_stage.configure(&_gemmlowp_output, biases, output, output_multiplier,
-                                     output_shift, output->info()->quantization_info().offset);
+                                     output_shift,
+                                     output->info()->quantization_info().uniform().offset);
     _gemmlowp_output.allocator()->allocate();
   }
 
index 11794a1..d167ca6 100644 (file)
@@ -71,8 +71,8 @@ void NEGEMMLowpMatrixMultiplyCoreEx::configure(const ITensor *a, const ITensor *
   _mtx_b_reshape_kernel = nullptr;
 
   // Set internal variables
-  _a_offset = a->info()->quantization_info().offset;
-  _b_offset = b->info()->quantization_info().offset;
+  _a_offset = a->info()->quantization_info().uniform().offset;
+  _b_offset = b->info()->quantization_info().uniform().offset;
   _run_vector_matrix_multiplication = a->info()->dimension(1) < 2;
   _reshape_b_only_on_first_run = info.reshape_b_only_on_first_run();
   _is_prepared = false;
@@ -277,8 +277,8 @@ Status NEGEMMLowpMatrixMultiplyCoreEx::validate(const ITensorInfo *a, const ITen
   TensorInfo tmp_b_info{};
   TensorInfo mm_result_s32_info{};
 
-  int32_t a_offset = a->quantization_info().offset;
-  int32_t b_offset = b->quantization_info().offset;
+  int32_t a_offset = a->quantization_info().uniform().offset;
+  int32_t b_offset = b->quantization_info().uniform().offset;
 
   bool fuse_output_stage = info.gemmlowp_output_stage().type != GEMMLowpOutputStageType::NONE;
   if (fuse_output_stage)
@@ -291,19 +291,16 @@ Status NEGEMMLowpMatrixMultiplyCoreEx::validate(const ITensorInfo *a, const ITen
   // Check if we need to run the optimized assembly kernel
   bool run_optimised = false;
   bool run_optimised_requantized = false;
-  const bool reshape_b_only_on_first_run = info.reshape_b_only_on_first_run();
   if (a_to_use->data_type() == DataType::QASYMM8 &&
       info.gemmlowp_output_stage().type == GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT)
   {
-    run_optimised = bool(NEGEMMAssemblyDispatch::validate(a_to_use, b, output, 1.f, 0.f,
-                                                          reshape_b_only_on_first_run));
+    run_optimised = bool(NEGEMMAssemblyDispatch::validate(a_to_use, b, c, output, gemm_info));
     run_optimised_requantized = run_optimised;
   }
   else
   {
     run_optimised = bool(NEGEMMAssemblyDispatch::validate(
-        a_to_use, b, fuse_output_stage ? &mm_result_s32_info : output, 1.f, 0.f,
-        reshape_b_only_on_first_run));
+        a_to_use, b, c, fuse_output_stage ? &mm_result_s32_info : output, gemm_info));
   }
 
   if (run_optimised)
index b36f828..e2b2870 100644 (file)
@@ -27,6 +27,8 @@
 #include "arm_compute/core/CPP/Validate.h"
 #include "arm_compute/core/Helpers.h"
 #include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/runtime/Tensor.h"
 
 using namespace arm_compute;
 
index f014f3e..2ebf337 100644 (file)
@@ -152,7 +152,7 @@ function(_ARMCompute_Build ARMCompute_INSTALL_PREFIX)
 
   # Copy externals/SConstruct to externals/acl/ for Tizen build support.
   # TODO The change of externals/SConstruct should be upstreamed to ARM Compute Library community layer.
-  execute_process(COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_LIST_DIR}/ARMCompute/SConstruct" "${ARMComputeSource_DIR}")
+  execute_process(COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_LIST_DIR}/ARMCompute/SConstruct" "${ARMComputeSource_DIR}")
 
   # Build ARMCompute libraries with SCONS
   # NOTE ARMCompute SConstruct unconditioanlly appends "arm-linux-gnueabihf-" prefix for linux
index 2720bf7..45d8727 100644 (file)
@@ -3,7 +3,7 @@ function(_ARMComputeSource_import)
   nnas_include(OptionTools)
 
   envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
-  set(ARMCOMPUTE_URL ${EXTERNAL_DOWNLOAD_SERVER}/ARM-software/ComputeLibrary/archive/v19.05.tar.gz)
+  set(ARMCOMPUTE_URL ${EXTERNAL_DOWNLOAD_SERVER}/ARM-software/ComputeLibrary/archive/v19.11.1.tar.gz)
   ExternalSource_Get(ARMCOMPUTE ${DOWNLOAD_ARMCOMPUTE} ${ARMCOMPUTE_URL})
 
   set(ARMComputeSource_DIR ${ARMCOMPUTE_SOURCE_DIR} PARENT_SCOPE)
index bffb60b..5672fd9 100644 (file)
@@ -271,25 +271,13 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
   const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
   const auto act_info = acl_common::asActivationLayerInfo(activation);
 
-  if (ker_height == 3 && ker_width == 3)
-  {
-    auto fn = nnfw::cpp14::make_unique<::arm_compute::CLDepthwiseConvolutionLayer3x3>(
-        _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
-    fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(),
-                  ofm_alloc->handle(), conv_info, multiplier, act_info);
-
-    _execution_builder->append(asAclFunction(std::move(fn)));
-  }
-  else
-  {
-    auto fn = nnfw::cpp14::make_unique<::arm_compute::CLDepthwiseConvolutionLayer>();
+  auto fn = nnfw::cpp14::make_unique<::arm_compute::CLDepthwiseConvolutionLayer>(
+      _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
 
-    fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(),
-                  ofm_alloc->handle(), conv_info, multiplier, act_info);
+  fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(), ofm_alloc->handle(),
+                conv_info, multiplier, act_info);
 
-    _execution_builder->append(asAclFunction(std::move(fn)));
-  }
+  _execution_builder->append(asAclFunction(std::move(fn)));
 }
 
 void KernelGenerator::visit(const ir::operation::MaxPool2D &node)
index fd2a905..e798974 100644 (file)
@@ -21,7 +21,7 @@
 #include <arm_compute/runtime/PoolManager.h>
 #include <arm_compute/runtime/BlobLifetimeManager.h>
 #include <arm_compute/runtime/MemoryManagerOnDemand.h>
-#include <arm_compute/runtime/CL/CLMemoryGroup.h>
+#include <arm_compute/runtime/MemoryGroup.h>
 
 #include <AclMemoryManager.h>
 #include <AclLinearMemoryManager.h>
@@ -47,7 +47,7 @@ using LinearMemoryManager = acl_common::AclLinearMemoryManager<
     operand::ICLTensor, operand::CLTensor, operand::CLSubTensor,
     ::arm_compute::MemoryManagerOnDemand, ::arm_compute::PoolManager,
     ::arm_compute::BlobLifetimeManager, ::arm_compute::CLBufferAllocator,
-    ::arm_compute::CLMemoryGroup>;
+    ::arm_compute::MemoryGroup>;
 
 using InternalBufferManager = acl_common::AclInternalBufferManager<
     ::arm_compute::MemoryManagerOnDemand, ::arm_compute::PoolManager,
index bb43823..d71c9b7 100644 (file)
@@ -386,8 +386,8 @@ void TemplTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::buildSubtensors(void)
       assert(parent_tensor != nullptr);
 
       // Child's type should be same with parent
-      assert(info.type().offset() == parent_tensor->info()->quantization_info().offset);
-      assert(info.type().scale() == parent_tensor->info()->quantization_info().scale);
+      assert(info.type().offset() == parent_tensor->info()->quantization_info().uniform().offset);
+      assert(info.type().scale() == parent_tensor->info()->quantization_info().uniform().scale);
       assert(asDataType(info.type().type()) == parent_tensor->info()->data_type());
 
       // NOTE SubTensor's layout must be the same with layout of parent tensor
index 85c6a06..7d0e4bc 100644 (file)
@@ -201,17 +201,10 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
   const auto fixed_axis =
       acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value();
 
-  // auto fn = nnfw::cpp14::make_unique<::arm_compute::NEArgMinMaxLayer>();
-  auto fn = nnfw::cpp14::make_unique<::arm_compute::NEArgMax>();
-
-  // NOTE
-  // if (ofm_alloc->info()->data_type() == arm_compute::DataType::S32)
-  //{
-  ofm_alloc->info()->set_data_type(arm_compute::DataType::U32);
-  //}
-  fn->configure(ifm_alloc->handle(), fixed_axis, ofm_alloc->handle());
-  // fn->configure(ifm_alloc->handle(), fixed_axis, ofm_alloc->handle(),
-  // arm_compute::ReductionOperation::ARG_IDX_MAX);
+  auto fn = nnfw::cpp14::make_unique<::arm_compute::NEArgMinMaxLayer>();
+
+  fn->configure(ifm_alloc->handle(), fixed_axis, ofm_alloc->handle(),
+                arm_compute::ReductionOperation::ARG_IDX_MAX);
 
   auto acl_fn = asAclFunction(std::move(fn));
 
@@ -348,24 +341,12 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
   const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
   const auto act_info = acl_common::asActivationLayerInfo(activation);
 
-  if (ker_height == 3 && ker_width == 3)
-  {
-    auto fn = nnfw::cpp14::make_unique<::arm_compute::NEDepthwiseConvolutionLayer3x3>();
-
-    fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(),
-                  ofm_alloc->handle(), conv_info, multiplier, act_info);
-
-    _execution_builder->append(asAclFunction(std::move(fn)));
-  }
-  else
-  {
-    auto fn = nnfw::cpp14::make_unique<::arm_compute::NEDepthwiseConvolutionLayer>();
+  auto fn = nnfw::cpp14::make_unique<::arm_compute::NEDepthwiseConvolutionLayer>();
 
-    fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(),
-                  ofm_alloc->handle(), conv_info, multiplier, act_info);
+  fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(), ofm_alloc->handle(),
+                conv_info, multiplier, act_info);
 
-    _execution_builder->append(asAclFunction(std::move(fn)));
-  }
+  _execution_builder->append(asAclFunction(std::move(fn)));
 }
 
 void KernelGenerator::visit(const ir::operation::Dequantize &node)