Use Arm Compute Library v19.11.1

author Chunseok Lee <chunseok.lee@samsung.com>

Wed, 4 Mar 2020 10:14:29 +0000 (19:14 +0900)

committer Chunseok Lee <chunseok.lee@samsung.com>

Wed, 4 Mar 2020 10:14:29 +0000 (19:14 +0900)
author Chunseok Lee <chunseok.lee@samsung.com>
Wed, 4 Mar 2020 10:14:29 +0000 (19:14 +0900)
committer Chunseok Lee <chunseok.lee@samsung.com>
Wed, 4 Mar 2020 10:14:29 +0000 (19:14 +0900)
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLRNNLayerEx.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLRNNLayerEx.h

index 7e88cb3..2dab0fb 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLRNNLayerEx.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLRNNLayerEx.h
@@ -89,7 +89,7 @@ public:
    void prepare() override;
  
  private:
-  CLMemoryGroup _memory_group;
+  MemoryGroup _memory_group;
    CLGEMM _gemm_state_f;
    CLSaturatedArithmeticOperationKernel _add_kernel;
    CLActivationLayerKernel _activation_kernel;
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceOperation.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceOperation.h

index 1d367d5..64f64fc 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceOperation.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceOperation.h
@@ -26,7 +26,7 @@
  
  #include "arm_compute/core/CL/kernels/CLReduceOperationKernel.h"
  #include "arm_compute/core/TypesEx.h"
-#include "arm_compute/runtime/CL/CLMemoryGroup.h"
+#include "arm_compute/runtime/MemoryGroup.h"
  #include "arm_compute/runtime/CL/CLTensor.h"
  #include "arm_compute/runtime/CL/CLTensorAllocator.h"
  #include "arm_compute/runtime/CL/functions/CLReshapeLayer.h"
@@ -82,7 +82,7 @@ public:
    void run() override;
  
  private:
-  CLMemoryGroup _memory_group;
+  MemoryGroup _memory_group;
    ICLTensor *_input;
    ICLTensor *_output;
    std::set<uint32_t> _axis;
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h

index 340a7bf..2780461 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h
@@ -30,7 +30,6 @@
  
  #include "arm_compute/core/CPP/kernels/CPPFlipWeightsKernel.h"
  
-#include "arm_compute/runtime/CL/CLMemoryGroup.h"
  #include "arm_compute/runtime/CL/CLTensor.h"
  #include "arm_compute/runtime/IFunction.h"
  #include "arm_compute/runtime/IMemoryManager.h"
@@ -144,7 +143,7 @@ public:
    void prepare() override;
  
  private:
-  CLMemoryGroup _memory_group;
+  MemoryGroup _memory_group;
    CLTransposeConvLayerUpsample _scale_f;
    CLConvolutionLayer _conv_f;
    CPPFlipWeightsKernel _flip_weights;
diff --git a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayerUpsample.h b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayerUpsample.h

index 4ae0e18..14c3623 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayerUpsample.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayerUpsample.h
@@ -21,7 +21,6 @@
  
  #include "arm_compute/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.h"
  #include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLMemoryGroup.h"
  #include "arm_compute/runtime/IFunction.h"
  #include "arm_compute/runtime/IMemoryManager.h"
  
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h

index 37bccc5..34b646b 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h
@@ -16,11 +16,9 @@
  #ifndef __ARM_COMPUTE_NEFUNCTIONSEX_H__
  #define __ARM_COMPUTE_NEFUNCTIONSEX_H__
  
-#include <arm_compute/runtime/NEON/functions/NEArgMinMax.h>
  #include <arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h>
  #include <arm_compute/runtime/NEON/functions/NECast.h>
  #include <arm_compute/runtime/NEON/functions/NEDepthToSpaceLayerEx.h>
-#include <arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayerEx.h>
  #include <arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h>
  #include <arm_compute/runtime/NEON/functions/NEFullyConnectedReshapingLayer.h>
  #include <arm_compute/runtime/NEON/functions/NEGatherEx.h>
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEArgMinMax.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEArgMinMax.h

deleted file mode 100644 (file)

index 604cd93..0000000
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEArgMinMax.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __ARM_COMPUTE_NEON_ARG_MIN_MAX_H__
-#define __ARM_COMPUTE_NEON_ARG_MIN_MAX_H__
-
-#include "arm_compute/runtime/IFunction.h"
-
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to perform reduce min/max operation */
-template <ReductionOperation op> class NEArgMinMaxStatic : public IFunction
-{
-public:
-  /** Constructor */
-  NEArgMinMaxStatic(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
-  /** Configure kernel
-   *
-   * @note Supported tensor rank: up to 4
-   *
-   * @param[in]  input          Source tensor. Data type supported: QASYMM8/F16/F32
-   * @param[in]  axis           Reduction axis.
-   * @param[out] output         Destination tensor. Data type supported: Same as @p input
-   */
-  void configure(ITensor *input, int axis, ITensor *output);
-
-  /** Static function to check if given info will lead to a valid configuration of @ref NEArgMinMax
-   *
-   * @param[in] input          Source tensor. Data type supported: QASYMM8/F16/F32
-   * @param[in] axis Reduction axis.
-   * @param[in] output         Destination tensor. Data type supported: Same as @p input
-   *
-   * @return A status
-   */
-  static Status validate(const ITensorInfo *input, int axis, const ITensorInfo *output);
-
-  // Inherited methods overridden:
-  void run() override;
-
-private:
-  MemoryGroup _memory_group;
-  NEArgMinMaxLayer _reduction_kernel;
-  Tensor _reduced_out;
-  NEReshapeLayer _reshape;
-};
-
-/** Basic function to run arg max. */
-using NEArgMax = NEArgMinMaxStatic<ReductionOperation::ARG_IDX_MAX>;
-/** Basic function to run arg min. */
-using NEArgMin = NEArgMinMaxStatic<ReductionOperation::ARG_IDX_MIN>;
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_NEON_ARG_MIN_MAX_H__ */
diff --git a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceOperation.h b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceOperation.h

index 9c558e6..f824fb5 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceOperation.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceOperation.h
@@ -32,6 +32,7 @@
  #include "arm_compute/runtime/MemoryGroup.h"
  #include "arm_compute/runtime/NEON/functions/NEReductionOperationEx.h"
  #include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h"
+#include "arm_compute/runtime/Tensor.h"
  
  namespace arm_compute
  {
diff --git a/compute/ARMComputeEx/arm_compute/runtime/misc/functions/Utils.h b/compute/ARMComputeEx/arm_compute/runtime/misc/functions/Utils.h

index 53736f5..84ee4ce 100644 (file)
--- a/compute/ARMComputeEx/arm_compute/runtime/misc/functions/Utils.h
+++ b/compute/ARMComputeEx/arm_compute/runtime/misc/functions/Utils.h
@@ -95,8 +95,8 @@ void copyCast(const FromT value, arm_compute::ITensor *to, const arm_compute::Co
        float realValue = static_cast<float>(value);
        // NOTE We haven't known the policy of rounding for quantization.
        //      So this is set to a temporary value.
-      *(to->ptr_to_element(id)) =
-          to->info()->quantization_info().quantize(realValue, arm_compute::RoundingPolicy::TO_ZERO);
+      *(to->ptr_to_element(id)) = quantize_qasymm8(realValue, to->info()->quantization_info(),
+                                                   arm_compute::RoundingPolicy::TO_ZERO);
        break;
      }
      default:
diff --git a/compute/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp b/compute/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp

index 7d47606..832628d 100644 (file)
--- a/compute/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp
+++ b/compute/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp
@@ -180,7 +180,7 @@ Kernel CLKernelLibraryEx::create_kernel(const std::string &kernel_name,
  
    if (_kernel_program_map.end() == kernel_program_it)
    {
-    ARM_COMPUTE_ERROR("Kernel %s not found in the CLKernelLibrary", kernel_name.c_str());
+    ARM_COMPUTE_ERROR_VAR("Kernel %s not found in the CLKernelLibrary", kernel_name.c_str());
    }
    std::string concat_str;
  
@@ -261,7 +261,7 @@ const Program &CLKernelLibraryEx::load_program(const std::string &program_name)
  
    if (_program_source_map.end() == program_source_it)
    {
-    ARM_COMPUTE_ERROR("Embedded program for %s does not exist.", program_name.c_str());
+    ARM_COMPUTE_ERROR_VAR("Embedded program for %s does not exist.", program_name.c_str());
    }
  
    program = Program(_context, program_name, program_source_it->second);
@@ -282,7 +282,7 @@ const Program &CLKernelLibraryEx::load_program(const std::string &program_name)
    }
    else
    {
-    ARM_COMPUTE_ERROR("Kernel file %s does not exist.", source_name.c_str());
+    ARM_COMPUTE_ERROR_VAR("Kernel file %s does not exist.", source_name.c_str());
    }
  #endif /* EMBEDDED_KERNELS */
  
@@ -315,7 +315,7 @@ std::string CLKernelLibraryEx::get_program_source(const std::string &program_nam
  
    if (program_source_it == _program_source_map.end())
    {
-    ARM_COMPUTE_ERROR("Embedded program for %s does not exist.", program_name.c_str());
+    ARM_COMPUTE_ERROR_VAR("Embedded program for %s does not exist.", program_name.c_str());
    }
  
    return program_source_it->second;
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLCastKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLCastKernel.cpp

index 35f607b..1736bf5 100644 (file)
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLCastKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLCastKernel.cpp
@@ -52,8 +52,9 @@ void CLCastKernel::configure(const ICLTensor *input, ICLTensor *output, SubDataT
    // Create kernel
    if (is_data_type_quantized_asymmetric(input->info()->data_type()))
    {
-    const float scale_in = input->info()->quantization_info().scale;
-    const int offset_in = input->info()->quantization_info().offset;
+    UniformQuantizationInfo qinfo = input->info()->quantization_info().uniform();
+    const float scale_in = qinfo.scale;
+    const int offset_in = qinfo.offset;
      build_opts.add_option("-DSCALE=" + float_to_string_with_full_precision(scale_in));
      build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(offset_in));
  
@@ -62,8 +63,10 @@ void CLCastKernel::configure(const ICLTensor *input, ICLTensor *output, SubDataT
    }
    else if (is_data_type_quantized_asymmetric(output->info()->data_type()))
    {
-    const float scale_in = output->info()->quantization_info().scale;
-    const int offset_in = output->info()->quantization_info().offset;
+    UniformQuantizationInfo qinfo = output->info()->quantization_info().uniform();
+    const float scale_in = qinfo.scale;
+    const float offset_in = qinfo.offset;
+
      build_opts.add_option("-DSCALE=" + float_to_string_with_full_precision(scale_in));
      build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(offset_in));
  
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLPReLUKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLPReLUKernel.cpp

index e7d5870..98377f7 100644 (file)
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLPReLUKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLPReLUKernel.cpp
@@ -72,18 +72,18 @@ void CLPReLUKernel::configure(const ICLTensor *input, const ICLTensor *alpha, IC
  
    if (is_data_type_quantized_asymmetric(input->info()->data_type()))
    {
-    build_opts.emplace("-DOFF_IN=" +
-                       support::cpp11::to_string(input->info()->quantization_info().offset));
-    build_opts.emplace("-DOFF_ALPHA=" +
-                       support::cpp11::to_string(alpha->info()->quantization_info().offset));
-    build_opts.emplace("-DOFF_OUT=" +
-                       support::cpp11::to_string(output->info()->quantization_info().offset));
-    build_opts.emplace("-DSCALE_IN=" +
-                       support::cpp11::to_string(input->info()->quantization_info().scale));
-    build_opts.emplace("-DSCALE_ALPHA=" +
-                       support::cpp11::to_string(alpha->info()->quantization_info().scale));
-    build_opts.emplace("-DSCALE_OUT=" +
-                       support::cpp11::to_string(output->info()->quantization_info().scale));
+    build_opts.emplace("-DOFF_IN=" + support::cpp11::to_string(
+                                         input->info()->quantization_info().uniform().offset));
+    build_opts.emplace("-DOFF_ALPHA=" + support::cpp11::to_string(
+                                            alpha->info()->quantization_info().uniform().offset));
+    build_opts.emplace("-DOFF_OUT=" + support::cpp11::to_string(
+                                          output->info()->quantization_info().uniform().offset));
+    build_opts.emplace("-DSCALE_IN=" + support::cpp11::to_string(
+                                           input->info()->quantization_info().uniform().scale));
+    build_opts.emplace("-DSCALE_ALPHA=" + support::cpp11::to_string(
+                                              alpha->info()->quantization_info().uniform().scale));
+    build_opts.emplace("-DSCALE_OUT=" + support::cpp11::to_string(
+                                            output->info()->quantization_info().uniform().scale));
      kernel_name += "_qasymm8";
    }
    _kernel =
diff --git a/compute/ARMComputeEx/src/core/CL/kernels/CLSpaceToBatchNDKernel.cpp b/compute/ARMComputeEx/src/core/CL/kernels/CLSpaceToBatchNDKernel.cpp

index f7836b6..769617d 100644 (file)
--- a/compute/ARMComputeEx/src/core/CL/kernels/CLSpaceToBatchNDKernel.cpp
+++ b/compute/ARMComputeEx/src/core/CL/kernels/CLSpaceToBatchNDKernel.cpp
@@ -147,8 +147,8 @@ void CLSpaceToBatchNDKernel::configure(const ICLTensor *input, const ICLTensor *
    build_opts.emplace("-DBATCH_IN=" + support::cpp11::to_string(input->info()->dimension(3)));
    if (input->info()->data_type() == DataType::QASYMM8)
    {
-    build_opts.emplace("-DZERO_VALUE=" +
-                       support::cpp11::to_string(input->info()->quantization_info().offset));
+    build_opts.emplace("-DZERO_VALUE=" + support::cpp11::to_string(
+                                             input->info()->quantization_info().uniform().offset));
    }
    else
    {
diff --git a/compute/ARMComputeEx/src/core/CPP/kernels/CPPUpsampleKernelEx.cpp b/compute/ARMComputeEx/src/core/CPP/kernels/CPPUpsampleKernelEx.cpp

index 8ac667c..bc2ee0d 100644 (file)
--- a/compute/ARMComputeEx/src/core/CPP/kernels/CPPUpsampleKernelEx.cpp
+++ b/compute/ARMComputeEx/src/core/CPP/kernels/CPPUpsampleKernelEx.cpp
@@ -81,7 +81,7 @@ void CPPUpsampleKernelEx::run(const Window &window, const ThreadInfo &info)
    // The fill value is normally 0, but for QASYMM8 the '0' corresponds to the offset
    const uint8_t fill_value =
        _output->info()->data_type() == DataType::QASYMM8
-          ? utility::clamp<uint8_t>(_output->info()->quantization_info().offset)
+          ? utility::clamp<uint8_t>(_output->info()->quantization_info().uniform().offset)
            : 0;
    // Filling a value different than 0 works only for QASYMM8 datatype since we are filling 1byte
    // values in a buffer of uint8_ts
diff --git a/compute/ARMComputeEx/src/core/NEON/NEElementwiseOperationFuncs.cpp b/compute/ARMComputeEx/src/core/NEON/NEElementwiseOperationFuncs.cpp

index 4508f58..164404f 100644 (file)
--- a/compute/ARMComputeEx/src/core/NEON/NEElementwiseOperationFuncs.cpp
+++ b/compute/ARMComputeEx/src/core/NEON/NEElementwiseOperationFuncs.cpp
@@ -221,8 +221,9 @@ void elementwise_op_quantized(
    const auto window_end_x = static_cast<int>(window.x().end());
    const bool is_broadcast_across_x = (input1_win.x().step() == 0) || (input2_win.x().step() == 0);
  
-  const float output_scale = out->info()->quantization_info().scale;
-  const int output_offset = out->info()->quantization_info().offset;
+  UniformQuantizationInfo qinfo = out->info()->quantization_info().uniform();
+  const float output_scale = qinfo.scale;
+  const int output_offset = qinfo.offset;
  
    // Output quantization info (add 0.5 to round toward the nearest integer - 0.5 rounds away from
    // zero)
@@ -238,8 +239,10 @@ void elementwise_op_quantized(
      const ITensor *broadcast_tensor = is_broadcast_input_2 ? in2 : in1;
      const ITensor *non_broadcast_tensor = !is_broadcast_input_2 ? in2 : in1;
  
-    const QuantizationInfo broadcast_qinfo = broadcast_tensor->info()->quantization_info();
-    const QuantizationInfo non_broadcast_qinfo = non_broadcast_tensor->info()->quantization_info();
+    const UniformQuantizationInfo broadcast_qinfo =
+        broadcast_tensor->info()->quantization_info().uniform();
+    const UniformQuantizationInfo non_broadcast_qinfo =
+        non_broadcast_tensor->info()->quantization_info().uniform();
  
      const int32x4_t voffset_non_broadcast = vdupq_n_s32(non_broadcast_qinfo.offset);
      const float32x4_t vscale_non_broadcast = vdupq_n_f32(non_broadcast_qinfo.scale);
@@ -269,10 +272,8 @@ void elementwise_op_quantized(
            for (; x < window_end_x; ++x)
            {
              const float afs =
-                scvt_f32_qasymm8(*(non_broadcast_input_ptr + x), non_broadcast_qinfo.scale,
-                                 non_broadcast_qinfo.offset);
-            const float bfs =
-                scvt_f32_qasymm8(broadcast_value, broadcast_qinfo.scale, broadcast_qinfo.offset);
+                dequantize_qasymm8(*(non_broadcast_input_ptr + x), non_broadcast_qinfo);
+            const float bfs = dequantize_qasymm8(broadcast_value, broadcast_qinfo);
              *(output_ptr + x) =
                  (*scalar_func)(!is_broadcast_input_2 ? bfs : afs, !is_broadcast_input_2 ? afs : bfs,
                                 out->info()->quantization_info());
@@ -283,12 +284,14 @@ void elementwise_op_quantized(
    else
    {
      // Input1 quantization info
-    const int32x4_t voffset1 = vdupq_n_s32(in1->info()->quantization_info().offset);
-    const float32x4_t vscale1 = vdupq_n_f32(in1->info()->quantization_info().scale);
+    UniformQuantizationInfo qinfo = in1->info()->quantization_info().uniform();
+    const int32x4_t voffset1 = vdupq_n_s32(qinfo.offset);
+    const float32x4_t vscale1 = vdupq_n_f32(qinfo.scale);
  
      // Input2 quantization info
-    const int32x4_t voffset2 = vdupq_n_s32(in2->info()->quantization_info().offset);
-    const float32x4_t vscale2 = vdupq_n_f32(in2->info()->quantization_info().scale);
+    qinfo = in2->info()->quantization_info().uniform();
+    const int32x4_t voffset2 = vdupq_n_s32(qinfo.offset);
+    const float32x4_t vscale2 = vdupq_n_f32(qinfo.scale);
  
      // Clear X Dimension on execution window as we handle manually
      input1_win.set(Window::DimX, Window::Dimension(0, 1, 1));
@@ -301,26 +304,24 @@ void elementwise_op_quantized(
      Iterator input2(in2, input2_win);
      Iterator output(out, win);
  
-    execute_window_loop(
-        win,
-        [&](const Coordinates &) {
-          const auto input1_ptr = reinterpret_cast<const uint8_t *>(input1.ptr());
-          const auto input2_ptr = reinterpret_cast<const uint8_t *>(input2.ptr());
-          const auto output_ptr = reinterpret_cast<uint8_t *>(output.ptr());
+    execute_window_loop(win,
+                        [&](const Coordinates &) {
+                          const auto input1_ptr = reinterpret_cast<const uint8_t *>(input1.ptr());
+                          const auto input2_ptr = reinterpret_cast<const uint8_t *>(input2.ptr());
+                          const auto output_ptr = reinterpret_cast<uint8_t *>(output.ptr());
  
-          int x =
-              (*neon_func)(window_start_x, window_end_x, window_step_x, input1_ptr, input2_ptr,
-                           output_ptr, voffset1, voffset2, vscale1, vscale2, voffseto, invvscaleo);
-          for (; x < window_end_x; ++x)
-          {
-            const float afs =
-                scvt_f32_qasymm8(*(input1_ptr + x), input1_qinfo.scale, input1_qinfo.offset);
-            const float bfs =
-                scvt_f32_qasymm8(*(input2_ptr + x), input2_qinfo.scale, input2_qinfo.offset);
-            *(output_ptr + x) = (*scalar_func)(afs, bfs, out->info()->quantization_info());
-          }
-        },
-        input1, input2, output);
+                          int x = (*neon_func)(window_start_x, window_end_x, window_step_x,
+                                               input1_ptr, input2_ptr, output_ptr, voffset1,
+                                               voffset2, vscale1, vscale2, voffseto, invvscaleo);
+                          for (; x < window_end_x; ++x)
+                          {
+                            const float afs = dequantize_qasymm8(*(input1_ptr + x), input1_qinfo);
+                            const float bfs = dequantize_qasymm8(*(input2_ptr + x), input2_qinfo);
+                            *(output_ptr + x) =
+                                (*scalar_func)(afs, bfs, out->info()->quantization_info());
+                          }
+                        },
+                        input1, input2, output);
    }
  }
  
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NECastKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NECastKernel.cpp

index 7e4fc12..2d6ed21 100644 (file)
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NECastKernel.cpp
+++ b/compute/ARMComputeEx/src/core/NEON/kernels/NECastKernel.cpp
@@ -394,7 +394,8 @@ template <typename FromT> void run_cast(const ITensor *input, ITensor *output, c
              case DataType::QASYMM8:
              {
                using to_vector = typename cast_vector<float>::type;
-              const QuantizationInfo &qinfo_out = output->info()->quantization_info();
+              const UniformQuantizationInfo &qinfo_out =
+                  output->info()->quantization_info().uniform();
                const auto vf = vcast<to_vector, from_vector>(vin);
                const auto vout = vquantize(vf, qinfo_out);
                store_result<qasymm8_t>(reinterpret_cast<qasymm8_t *>(out.ptr()) + x, vout);
@@ -440,7 +441,8 @@ template <typename FromT> void run_cast(const ITensor *input, ITensor *output, c
              case DataType::QASYMM8:
              {
                const QuantizationInfo &qinfo_out = output->info()->quantization_info();
-              const auto qval = qinfo_out.quantize(static_cast<float>(val), rounding_policy);
+              const auto qval =
+                  quantize_qasymm8(static_cast<float>(val), qinfo_out, rounding_policy);
                *(reinterpret_cast<qasymm8_t *>(out.ptr()) + x) = qval;
                break;
              }
@@ -486,8 +488,8 @@ void run_cast_qasymm8(const ITensor *input, ITensor *output, const Window &windo
  #else  //__aarch64__
    constexpr RoundingPolicy rounding_policy = RoundingPolicy::TO_ZERO;
  #endif //__aarch64__
-  const auto &qinfo_in = input->info()->quantization_info();
-  const auto &qinfo_out = output->info()->quantization_info();
+  const auto &qinfo_in = input->info()->quantization_info().uniform();
+  const auto &qinfo_out = output->info()->quantization_info().uniform();
  
    execute_window_loop(
        win_collapsed,
@@ -547,7 +549,7 @@ void run_cast_qasymm8(const ITensor *input, ITensor *output, const Window &windo
          for (; x < window_end_x; ++x)
          {
            qasymm8_t qval_in = *(in_ptr + x);
-          const auto val = qinfo_in.dequantize(qval_in);
+          const auto val = dequantize_qasymm8(qval_in, qinfo_in);
  
            switch (output->info()->data_type())
            {
@@ -558,7 +560,7 @@ void run_cast_qasymm8(const ITensor *input, ITensor *output, const Window &windo
              }
              case DataType::QASYMM8:
              {
-              const auto qval_out = qinfo_out.quantize(val, rounding_policy);
+              const auto qval_out = quantize_qasymm8(val, qinfo_out, rounding_policy);
                *(reinterpret_cast<qasymm8_t *>(out.ptr()) + x) = qval_out;
                break;
              }
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEHashtableLookupKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEHashtableLookupKernel.cpp

index 391337b..504ad39 100644 (file)
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NEHashtableLookupKernel.cpp
+++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEHashtableLookupKernel.cpp
@@ -118,7 +118,7 @@ void NEHashtableLookupKernel::run(const Window &window, const ThreadInfo &info)
  
    const size_t lookup_dim = _output->info()->num_dimensions() - 1;
    const int const_0 = _output->info()->data_type() == DataType::QASYMM8
-                          ? _output->info()->quantization_info().offset
+                          ? _output->info()->quantization_info().uniform().offset
                            : 0;
  
    std::unordered_map<int32_t, size_t> key_index_map;
diff --git a/compute/ARMComputeEx/src/core/NEON/kernels/NEPReLUKernel.cpp b/compute/ARMComputeEx/src/core/NEON/kernels/NEPReLUKernel.cpp

index ad1bb90..5b2ffda 100644 (file)
--- a/compute/ARMComputeEx/src/core/NEON/kernels/NEPReLUKernel.cpp
+++ b/compute/ARMComputeEx/src/core/NEON/kernels/NEPReLUKernel.cpp
@@ -63,7 +63,8 @@ template <ConditionalOperation op>
  inline uint8_t elementwise_conditional_op_quantized_scalar(const float &a, const float &b,
                                                             QuantizationInfo qinfo)
  {
-  return qinfo.quantize(elementwise_conditional_op_scalar<op>(a, b), RoundingPolicy::TO_NEAREST_UP);
+  return quantize_qasymm8(elementwise_conditional_op_scalar<op>(a, b), qinfo,
+                          RoundingPolicy::TO_NEAREST_UP);
  }
  
  template <ConditionalOperation op, typename VectorType>
diff --git a/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayerUpsample.cpp b/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayerUpsample.cpp

index 0ce3e67..b8d60ff 100644 (file)
--- a/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayerUpsample.cpp
+++ b/compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayerUpsample.cpp
@@ -19,6 +19,7 @@
  #include "arm_compute/core/CL/OpenCL.h"
  #include "arm_compute/core/Utils.h"
  #include "arm_compute/runtime/CL/CLScheduler.h"
+#include "arm_compute/core/CL/ICLTensor.h"
  
  #include <cmath>
  #include <memory>
@@ -54,7 +55,7 @@ void CLTransposeConvLayerUpsample::run()
    _output->map(CLScheduler::get().queue(), true);
    if (is_data_type_quantized_asymmetric(_output->info()->data_type()))
    {
-    const uint8_t quantized_zero = _output->info()->quantization_info().offset;
+    const uint8_t quantized_zero = _output->info()->quantization_info().uniform().offset;
      std::fill_n(_output->buffer(), _output->info()->total_size(), quantized_zero);
    }
    else
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEArgMinMax.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEArgMinMax.cpp

deleted file mode 100644 (file)

index 5ba465b..0000000
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEArgMinMax.cpp
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/NEON/functions/NEArgMinMax.h"
-
-#include "arm_compute/core/CPP/Validate.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/runtime/NEON/NEScheduler.h"
-
-namespace arm_compute
-{
-
-template <ReductionOperation OP>
-NEArgMinMaxStatic<OP>::NEArgMinMaxStatic(std::shared_ptr<IMemoryManager> memory_manager)
-    : _memory_group(std::move(memory_manager)), _reduction_kernel(), _reduced_out(), _reshape()
-{
-}
-
-template <ReductionOperation OP>
-Status NEArgMinMaxStatic<OP>::validate(const ITensorInfo *input, int axis,
-                                       const ITensorInfo *output)
-{
-  ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
-  ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
-  ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16,
-                                                       DataType::F32);
-
-  TensorShape out_shape = input->tensor_shape();
-  const int input_dims = input->num_dimensions();
-  int axis_local = axis;
-
-  // Convert negative axis
-  axis_local = wrap_around(axis_local, input_dims);
-
-  ARM_COMPUTE_RETURN_ERROR_ON(axis_local > 3);
-  ARM_COMPUTE_RETURN_ERROR_ON(static_cast<unsigned int>(axis_local) > input->num_dimensions() - 1);
-  out_shape.remove_dimension(axis_local);
-
-  const TensorInfo out_info = output->clone()->set_tensor_shape(out_shape);
-  ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output, &out_info);
-
-  return Status{};
-}
-
-template <ReductionOperation OP>
-void NEArgMinMaxStatic<OP>::configure(ITensor *input, int axis, ITensor *output)
-{
-  ARM_COMPUTE_ERROR_ON_NULLPTR(input);
-
-  int axis_local = axis;
-  const int input_dims = input->info()->num_dimensions();
-
-  // Convert negative axis
-  axis_local = wrap_around(axis_local, input_dims);
-
-  // Perform reduction for axis
-  TensorShape intermediate_shape = input->info()->tensor_shape();
-  intermediate_shape.set(axis_local, 1);
-  auto in = input;
-
-  _reduced_out.allocator()->init(TensorInfo(intermediate_shape, output->info()->num_channels(),
-                                            output->info()->data_type(),
-                                            output->info()->quantization_info()));
-  _memory_group.manage(&_reduced_out);
-  _reduction_kernel.configure(in, axis_local, &_reduced_out, OP);
-
-  // Allocate intermediate tensor
-  _reduced_out.allocator()->allocate();
-
-  // Configure reshape layer if we want to drop the dimensions
-  TensorShape out_shape = input->info()->tensor_shape();
-  out_shape.remove_dimension(axis_local);
-  auto_init_if_empty(*output->info(), output->info()->clone()->set_tensor_shape(out_shape));
-  _reshape.configure(&_reduced_out, output);
-}
-
-template <ReductionOperation OP> void NEArgMinMaxStatic<OP>::run()
-{
-  MemoryGroupResourceScope scope_mg(_memory_group);
-
-  _reduction_kernel.run();
-  _reshape.run();
-}
-
-// Supported Specializations
-template class NEArgMinMaxStatic<ReductionOperation::ARG_IDX_MAX>;
-template class NEArgMinMaxStatic<ReductionOperation::ARG_IDX_MIN>;
-} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEElementwiseUnaryLayerEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEElementwiseUnaryLayerEx.cpp

deleted file mode 100644 (file)

index a95018a..0000000
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEElementwiseUnaryLayerEx.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayerEx.h"
-
-#include "arm_compute/core/NEON/kernels/NEElementwiseUnaryKernelEx.h"
-#include "support/ToolchainSupport.h"
-
-#include <utility>
-
-namespace arm_compute
-{
-void NENegLayer::configure(const ITensor *input, ITensor *output)
-{
-  auto k = arm_compute::support::cpp14::make_unique<NEElementwiseUnaryKernelEx>();
-  k->configure(ElementWiseUnaryEx::NEG, input, output);
-  _kernel = std::move(k);
-}
-Status NENegLayer::validate(const ITensorInfo *input, const ITensorInfo *output)
-{
-  return NEElementwiseUnaryKernelEx::validate(ElementWiseUnaryEx::NEG, input, output);
-}
-} // namespace arm_compute
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp

index d604fed..1a9a510 100644 (file)
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp
@@ -154,7 +154,7 @@ void NEFullyConnectedHybridLayer::configure(const ITensor *input, const ITensor
  
    // Multiply scale
    _multiply_scale_kernel.configure(&_gemmlowp_output, &_scale_factor, output,
-                                   weights->info()->quantization_info().scale);
+                                   weights->info()->quantization_info().uniform().scale);
  
    _are_weights_reshaped = _are_weights_reshaped || fc_info.retain_internal_weights;
  
@@ -220,7 +220,7 @@ Status NEFullyConnectedHybridLayer::validate(const ITensorInfo *input, const ITe
    ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(quantized_input, *weights_to_use, gemmlowp_output));
  
    ARM_COMPUTE_RETURN_ON_ERROR(NEMultiplyScaleFactorKernel::validate(
-      &gemmlowp_output, &scale_factor, output, weights->quantization_info().scale));
+      &gemmlowp_output, &scale_factor, output, weights->quantization_info().uniform().scale));
  
    return Status{};
  }
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedLayerEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedLayerEx.cpp

index a944f69..87062d3 100644 (file)
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedLayerEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedLayerEx.cpp
@@ -46,10 +46,10 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I
      // Since we need negative offsets for computing convolution, we need to change
      // QuantizationInfo()
      // Extract and negate input and weights offset
-    const QuantizationInfo input_quantization_info(input.quantization_info().scale,
-                                                   -input.quantization_info().offset);
-    const QuantizationInfo weights_quantization_info(weights.quantization_info().scale,
-                                                     -weights.quantization_info().offset);
+    const QuantizationInfo input_quantization_info(input.quantization_info().uniform().scale,
+                                                   -input.quantization_info().uniform().offset);
+    const QuantizationInfo weights_quantization_info(weights.quantization_info().uniform().scale,
+                                                     -weights.quantization_info().uniform().offset);
  
      // Validate gemmlowp function
      ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixMultiplyCore::validate(
@@ -88,10 +88,10 @@ void NEFullyConnectedLayerEx::configure_mm(const ITensor *input, const ITensor *
      const QuantizationInfo input_quantization_info = input->info()->quantization_info();
      const QuantizationInfo weights_quantization_info = weights->info()->quantization_info();
  
-    input->info()->set_quantization_info(
-        QuantizationInfo(input_quantization_info.scale, -input_quantization_info.offset));
-    weights->info()->set_quantization_info(
-        QuantizationInfo(weights_quantization_info.scale, -weights_quantization_info.offset));
+    input->info()->set_quantization_info(QuantizationInfo(
+        input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset));
+    weights->info()->set_quantization_info(QuantizationInfo(
+        weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset));
  
      // Configure gemmlowp function
      _mm_gemmlowp.configure(input, weights, nullptr, output);
@@ -236,15 +236,16 @@ void NEFullyConnectedLayerEx::configure(const ITensor *input, const ITensor *wei
    // Configure output stage for asymmetric quantized types
    if (_is_quantized)
    {
-    float multiplier = input->info()->quantization_info().scale *
-                       weights->info()->quantization_info().scale /
-                       output->info()->quantization_info().scale;
+    float multiplier = input->info()->quantization_info().uniform().scale *
+                       weights->info()->quantization_info().uniform().scale /
+                       output->info()->quantization_info().uniform().scale;
      int output_multiplier;
      int output_shift;
      quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier,
                                                                 &output_shift);
      _gemmlowp_output_stage.configure(&_gemmlowp_output, biases, output, output_multiplier,
-                                     output_shift, output->info()->quantization_info().offset);
+                                     output_shift,
+                                     output->info()->quantization_info().uniform().offset);
      _gemmlowp_output.allocator()->allocate();
    }
  
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCoreEx.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCoreEx.cpp

index 11794a1..d167ca6 100644 (file)
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCoreEx.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCoreEx.cpp
@@ -71,8 +71,8 @@ void NEGEMMLowpMatrixMultiplyCoreEx::configure(const ITensor *a, const ITensor *
    _mtx_b_reshape_kernel = nullptr;
  
    // Set internal variables
-  _a_offset = a->info()->quantization_info().offset;
-  _b_offset = b->info()->quantization_info().offset;
+  _a_offset = a->info()->quantization_info().uniform().offset;
+  _b_offset = b->info()->quantization_info().uniform().offset;
    _run_vector_matrix_multiplication = a->info()->dimension(1) < 2;
    _reshape_b_only_on_first_run = info.reshape_b_only_on_first_run();
    _is_prepared = false;
@@ -277,8 +277,8 @@ Status NEGEMMLowpMatrixMultiplyCoreEx::validate(const ITensorInfo *a, const ITen
    TensorInfo tmp_b_info{};
    TensorInfo mm_result_s32_info{};
  
-  int32_t a_offset = a->quantization_info().offset;
-  int32_t b_offset = b->quantization_info().offset;
+  int32_t a_offset = a->quantization_info().uniform().offset;
+  int32_t b_offset = b->quantization_info().uniform().offset;
  
    bool fuse_output_stage = info.gemmlowp_output_stage().type != GEMMLowpOutputStageType::NONE;
    if (fuse_output_stage)
@@ -291,19 +291,16 @@ Status NEGEMMLowpMatrixMultiplyCoreEx::validate(const ITensorInfo *a, const ITen
    // Check if we need to run the optimized assembly kernel
    bool run_optimised = false;
    bool run_optimised_requantized = false;
-  const bool reshape_b_only_on_first_run = info.reshape_b_only_on_first_run();
    if (a_to_use->data_type() == DataType::QASYMM8 &&
        info.gemmlowp_output_stage().type == GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT)
    {
-    run_optimised = bool(NEGEMMAssemblyDispatch::validate(a_to_use, b, output, 1.f, 0.f,
-                                                          reshape_b_only_on_first_run));
+    run_optimised = bool(NEGEMMAssemblyDispatch::validate(a_to_use, b, c, output, gemm_info));
      run_optimised_requantized = run_optimised;
    }
    else
    {
      run_optimised = bool(NEGEMMAssemblyDispatch::validate(
-        a_to_use, b, fuse_output_stage ? &mm_result_s32_info : output, 1.f, 0.f,
-        reshape_b_only_on_first_run));
+        a_to_use, b, c, fuse_output_stage ? &mm_result_s32_info : output, gemm_info));
    }
  
    if (run_optimised)
diff --git a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceOperation.cpp b/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceOperation.cpp

index b36f828..e2b2870 100644 (file)
--- a/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceOperation.cpp
+++ b/compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceOperation.cpp
@@ -27,6 +27,8 @@
  #include "arm_compute/core/CPP/Validate.h"
  #include "arm_compute/core/Helpers.h"
  #include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/runtime/Tensor.h"
  
  using namespace arm_compute;
  
diff --git a/infra/cmake/packages/ARMComputeConfig.cmake b/infra/cmake/packages/ARMComputeConfig.cmake

index f014f3e..2ebf337 100644 (file)
--- a/infra/cmake/packages/ARMComputeConfig.cmake
+++ b/infra/cmake/packages/ARMComputeConfig.cmake
@@ -152,7 +152,7 @@ function(_ARMCompute_Build ARMCompute_INSTALL_PREFIX)
  
    # Copy externals/SConstruct to externals/acl/ for Tizen build support.
    # TODO The change of externals/SConstruct should be upstreamed to ARM Compute Library community layer.
-  execute_process(COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_LIST_DIR}/ARMCompute/SConstruct" "${ARMComputeSource_DIR}")
+  # execute_process(COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_LIST_DIR}/ARMCompute/SConstruct" "${ARMComputeSource_DIR}")
  
    # Build ARMCompute libraries with SCONS
    # NOTE ARMCompute SConstruct unconditioanlly appends "arm-linux-gnueabihf-" prefix for linux
diff --git a/infra/cmake/packages/ARMComputeSourceConfig.cmake b/infra/cmake/packages/ARMComputeSourceConfig.cmake

index 2720bf7..45d8727 100644 (file)
--- a/infra/cmake/packages/ARMComputeSourceConfig.cmake
+++ b/infra/cmake/packages/ARMComputeSourceConfig.cmake
@@ -3,7 +3,7 @@ function(_ARMComputeSource_import)
    nnas_include(OptionTools)
  
    envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
-  set(ARMCOMPUTE_URL ${EXTERNAL_DOWNLOAD_SERVER}/ARM-software/ComputeLibrary/archive/v19.05.tar.gz)
+  set(ARMCOMPUTE_URL ${EXTERNAL_DOWNLOAD_SERVER}/ARM-software/ComputeLibrary/archive/v19.11.1.tar.gz)
    ExternalSource_Get(ARMCOMPUTE ${DOWNLOAD_ARMCOMPUTE} ${ARMCOMPUTE_URL})
  
    set(ARMComputeSource_DIR ${ARMCOMPUTE_SOURCE_DIR} PARENT_SCOPE)
diff --git a/runtime/neurun/backend/acl_cl/KernelGenerator.cc b/runtime/neurun/backend/acl_cl/KernelGenerator.cc

index bffb60b..5672fd9 100644 (file)
--- a/runtime/neurun/backend/acl_cl/KernelGenerator.cc
+++ b/runtime/neurun/backend/acl_cl/KernelGenerator.cc
@@ -271,25 +271,13 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
    const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
    const auto act_info = acl_common::asActivationLayerInfo(activation);
  
-  if (ker_height == 3 && ker_width == 3)
-  {
-    auto fn = nnfw::cpp14::make_unique<::arm_compute::CLDepthwiseConvolutionLayer3x3>(
-        _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
-    fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(),
-                  ofm_alloc->handle(), conv_info, multiplier, act_info);
-
-    _execution_builder->append(asAclFunction(std::move(fn)));
-  }
-  else
-  {
-    auto fn = nnfw::cpp14::make_unique<::arm_compute::CLDepthwiseConvolutionLayer>();
+  auto fn = nnfw::cpp14::make_unique<::arm_compute::CLDepthwiseConvolutionLayer>(
+      _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
  
-    fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(),
-                  ofm_alloc->handle(), conv_info, multiplier, act_info);
+  fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(), ofm_alloc->handle(),
+                conv_info, multiplier, act_info);
  
-    _execution_builder->append(asAclFunction(std::move(fn)));
-  }
+  _execution_builder->append(asAclFunction(std::move(fn)));
  }
  
  void KernelGenerator::visit(const ir::operation::MaxPool2D &node)
diff --git a/runtime/neurun/backend/acl_cl/TensorManager.h b/runtime/neurun/backend/acl_cl/TensorManager.h

index fd2a905..e798974 100644 (file)
--- a/runtime/neurun/backend/acl_cl/TensorManager.h
+++ b/runtime/neurun/backend/acl_cl/TensorManager.h
@@ -21,7 +21,7 @@
  #include <arm_compute/runtime/PoolManager.h>
  #include <arm_compute/runtime/BlobLifetimeManager.h>
  #include <arm_compute/runtime/MemoryManagerOnDemand.h>
-#include <arm_compute/runtime/CL/CLMemoryGroup.h>
+#include <arm_compute/runtime/MemoryGroup.h>
  
  #include <AclMemoryManager.h>
  #include <AclLinearMemoryManager.h>
@@ -47,7 +47,7 @@ using LinearMemoryManager = acl_common::AclLinearMemoryManager<
      operand::ICLTensor, operand::CLTensor, operand::CLSubTensor,
      ::arm_compute::MemoryManagerOnDemand, ::arm_compute::PoolManager,
      ::arm_compute::BlobLifetimeManager, ::arm_compute::CLBufferAllocator,
-    ::arm_compute::CLMemoryGroup>;
+    ::arm_compute::MemoryGroup>;
  
  using InternalBufferManager = acl_common::AclInternalBufferManager<
      ::arm_compute::MemoryManagerOnDemand, ::arm_compute::PoolManager,
diff --git a/runtime/neurun/backend/acl_common/TemplTensorBuilder.h b/runtime/neurun/backend/acl_common/TemplTensorBuilder.h

index bb43823..d71c9b7 100644 (file)
--- a/runtime/neurun/backend/acl_common/TemplTensorBuilder.h
+++ b/runtime/neurun/backend/acl_common/TemplTensorBuilder.h
@@ -386,8 +386,8 @@ void TemplTensorBuilder<T_ITensor, T_Tensor, T_SubTensor>::buildSubtensors(void)
        assert(parent_tensor != nullptr);
  
        // Child's type should be same with parent
-      assert(info.type().offset() == parent_tensor->info()->quantization_info().offset);
-      assert(info.type().scale() == parent_tensor->info()->quantization_info().scale);
+      assert(info.type().offset() == parent_tensor->info()->quantization_info().uniform().offset);
+      assert(info.type().scale() == parent_tensor->info()->quantization_info().uniform().scale);
        assert(asDataType(info.type().type()) == parent_tensor->info()->data_type());
  
        // NOTE SubTensor's layout must be the same with layout of parent tensor
diff --git a/runtime/neurun/backend/acl_neon/KernelGenerator.cc b/runtime/neurun/backend/acl_neon/KernelGenerator.cc

index 85c6a06..7d0e4bc 100644 (file)
--- a/runtime/neurun/backend/acl_neon/KernelGenerator.cc
+++ b/runtime/neurun/backend/acl_neon/KernelGenerator.cc
@@ -201,17 +201,10 @@ void KernelGenerator::visit(const ir::operation::ArgMax &node)
    const auto fixed_axis =
        acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value();
  
-  // auto fn = nnfw::cpp14::make_unique<::arm_compute::NEArgMinMaxLayer>();
-  auto fn = nnfw::cpp14::make_unique<::arm_compute::NEArgMax>();
-
-  // NOTE
-  // if (ofm_alloc->info()->data_type() == arm_compute::DataType::S32)
-  //{
-  ofm_alloc->info()->set_data_type(arm_compute::DataType::U32);
-  //}
-  fn->configure(ifm_alloc->handle(), fixed_axis, ofm_alloc->handle());
-  // fn->configure(ifm_alloc->handle(), fixed_axis, ofm_alloc->handle(),
-  // arm_compute::ReductionOperation::ARG_IDX_MAX);
+  auto fn = nnfw::cpp14::make_unique<::arm_compute::NEArgMinMaxLayer>();
+
+  fn->configure(ifm_alloc->handle(), fixed_axis, ofm_alloc->handle(),
+                arm_compute::ReductionOperation::ARG_IDX_MAX);
  
    auto acl_fn = asAclFunction(std::move(fn));
  
@@ -348,24 +341,12 @@ void KernelGenerator::visit(const ir::operation::DepthwiseConv2D &node)
    const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
    const auto act_info = acl_common::asActivationLayerInfo(activation);
  
-  if (ker_height == 3 && ker_width == 3)
-  {
-    auto fn = nnfw::cpp14::make_unique<::arm_compute::NEDepthwiseConvolutionLayer3x3>();
-
-    fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(),
-                  ofm_alloc->handle(), conv_info, multiplier, act_info);
-
-    _execution_builder->append(asAclFunction(std::move(fn)));
-  }
-  else
-  {
-    auto fn = nnfw::cpp14::make_unique<::arm_compute::NEDepthwiseConvolutionLayer>();
+  auto fn = nnfw::cpp14::make_unique<::arm_compute::NEDepthwiseConvolutionLayer>();
  
-    fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(),
-                  ofm_alloc->handle(), conv_info, multiplier, act_info);
+  fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(), ofm_alloc->handle(),
+                conv_info, multiplier, act_info);
  
-    _execution_builder->append(asAclFunction(std::move(fn)));
-  }
+  _execution_builder->append(asAclFunction(std::move(fn)));
  }
  
  void KernelGenerator::visit(const ir::operation::Dequantize &node)
author	Chunseok Lee <chunseok.lee@samsung.com>
	Wed, 4 Mar 2020 10:14:29 +0000 (19:14 +0900)
committer	Chunseok Lee <chunseok.lee@samsung.com>
	Wed, 4 Mar 2020 10:14:29 +0000 (19:14 +0900)
compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLRNNLayerEx.h		patch \| blob \| history
compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLReduceOperation.h		patch \| blob \| history
compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayer.h		patch \| blob \| history
compute/ARMComputeEx/arm_compute/runtime/CL/functions/CLTransposeConvLayerUpsample.h		patch \| blob \| history
compute/ARMComputeEx/arm_compute/runtime/NEON/NEFunctionsEx.h		patch \| blob \| history
compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEArgMinMax.h	[deleted file]	patch \| blob \| history
compute/ARMComputeEx/arm_compute/runtime/NEON/functions/NEReduceOperation.h		patch \| blob \| history
compute/ARMComputeEx/arm_compute/runtime/misc/functions/Utils.h		patch \| blob \| history
compute/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp		patch \| blob \| history
compute/ARMComputeEx/src/core/CL/kernels/CLCastKernel.cpp		patch \| blob \| history
compute/ARMComputeEx/src/core/CL/kernels/CLPReLUKernel.cpp		patch \| blob \| history
compute/ARMComputeEx/src/core/CL/kernels/CLSpaceToBatchNDKernel.cpp		patch \| blob \| history
compute/ARMComputeEx/src/core/CPP/kernels/CPPUpsampleKernelEx.cpp		patch \| blob \| history
compute/ARMComputeEx/src/core/NEON/NEElementwiseOperationFuncs.cpp		patch \| blob \| history
compute/ARMComputeEx/src/core/NEON/kernels/NECastKernel.cpp		patch \| blob \| history
compute/ARMComputeEx/src/core/NEON/kernels/NEHashtableLookupKernel.cpp		patch \| blob \| history
compute/ARMComputeEx/src/core/NEON/kernels/NEPReLUKernel.cpp		patch \| blob \| history
compute/ARMComputeEx/src/runtime/CL/functions/CLTransposeConvLayerUpsample.cpp		patch \| blob \| history
compute/ARMComputeEx/src/runtime/NEON/functions/NEArgMinMax.cpp	[deleted file]	patch \| blob \| history
compute/ARMComputeEx/src/runtime/NEON/functions/NEElementwiseUnaryLayerEx.cpp	[deleted file]	patch \| blob \| history
compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedHybridLayer.cpp		patch \| blob \| history
compute/ARMComputeEx/src/runtime/NEON/functions/NEFullyConnectedLayerEx.cpp		patch \| blob \| history
compute/ARMComputeEx/src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCoreEx.cpp		patch \| blob \| history
compute/ARMComputeEx/src/runtime/NEON/functions/NEReduceOperation.cpp		patch \| blob \| history
infra/cmake/packages/ARMComputeConfig.cmake		patch \| blob \| history
infra/cmake/packages/ARMComputeSourceConfig.cmake		patch \| blob \| history
runtime/neurun/backend/acl_cl/KernelGenerator.cc		patch \| blob \| history
runtime/neurun/backend/acl_cl/TensorManager.h		patch \| blob \| history
runtime/neurun/backend/acl_common/TemplTensorBuilder.h		patch \| blob \| history
runtime/neurun/backend/acl_neon/KernelGenerator.cc		patch \| blob \| history