void prepare() override;
private:
- CLMemoryGroup _memory_group;
+ MemoryGroup _memory_group;
CLGEMM _gemm_state_f;
CLSaturatedArithmeticOperationKernel _add_kernel;
CLActivationLayerKernel _activation_kernel;
#include "arm_compute/core/CL/kernels/CLReduceOperationKernel.h"
#include "arm_compute/core/TypesEx.h"
-#include "arm_compute/runtime/CL/CLMemoryGroup.h"
+#include "arm_compute/runtime/MemoryGroup.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/CLTensorAllocator.h"
#include "arm_compute/runtime/CL/functions/CLReshapeLayer.h"
void run() override;
private:
- CLMemoryGroup _memory_group;
+ MemoryGroup _memory_group;
ICLTensor *_input;
ICLTensor *_output;
std::set<uint32_t> _axis;
#include "arm_compute/core/CPP/kernels/CPPFlipWeightsKernel.h"
-#include "arm_compute/runtime/CL/CLMemoryGroup.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/IMemoryManager.h"
void prepare() override;
private:
- CLMemoryGroup _memory_group;
+ MemoryGroup _memory_group;
CLTransposeConvLayerUpsample _scale_f;
CLConvolutionLayer _conv_f;
CPPFlipWeightsKernel _flip_weights;
#include "arm_compute/core/CL/kernels/CLTransposeConvLayerUpsampleKernel.h"
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLMemoryGroup.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/IMemoryManager.h"
#ifndef __ARM_COMPUTE_NEFUNCTIONSEX_H__
#define __ARM_COMPUTE_NEFUNCTIONSEX_H__
-#include <arm_compute/runtime/NEON/functions/NEArgMinMax.h>
#include <arm_compute/runtime/NEON/functions/NEBinaryLogicalOperation.h>
#include <arm_compute/runtime/NEON/functions/NECast.h>
#include <arm_compute/runtime/NEON/functions/NEDepthToSpaceLayerEx.h>
-#include <arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayerEx.h>
#include <arm_compute/runtime/NEON/functions/NEEmbeddingLookup.h>
#include <arm_compute/runtime/NEON/functions/NEFullyConnectedReshapingLayer.h>
#include <arm_compute/runtime/NEON/functions/NEGatherEx.h>
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __ARM_COMPUTE_NEON_ARG_MIN_MAX_H__
-#define __ARM_COMPUTE_NEON_ARG_MIN_MAX_H__
-
-#include "arm_compute/runtime/IFunction.h"
-
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to perform reduce min/max operation */
-template <ReductionOperation op> class NEArgMinMaxStatic : public IFunction
-{
-public:
- /** Constructor */
- NEArgMinMaxStatic(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
- /** Configure kernel
- *
- * @note Supported tensor rank: up to 4
- *
- * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32
- * @param[in] axis Reduction axis.
- * @param[out] output Destination tensor. Data type supported: Same as @p input
- */
- void configure(ITensor *input, int axis, ITensor *output);
-
- /** Static function to check if given info will lead to a valid configuration of @ref NEArgMinMax
- *
- * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32
- * @param[in] axis Reduction axis.
- * @param[in] output Destination tensor. Data type supported: Same as @p input
- *
- * @return A status
- */
- static Status validate(const ITensorInfo *input, int axis, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- MemoryGroup _memory_group;
- NEArgMinMaxLayer _reduction_kernel;
- Tensor _reduced_out;
- NEReshapeLayer _reshape;
-};
-
-/** Basic function to run arg max. */
-using NEArgMax = NEArgMinMaxStatic<ReductionOperation::ARG_IDX_MAX>;
-/** Basic function to run arg min. */
-using NEArgMin = NEArgMinMaxStatic<ReductionOperation::ARG_IDX_MIN>;
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_NEON_ARG_MIN_MAX_H__ */
#include "arm_compute/runtime/MemoryGroup.h"
#include "arm_compute/runtime/NEON/functions/NEReductionOperationEx.h"
#include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h"
+#include "arm_compute/runtime/Tensor.h"
namespace arm_compute
{
float realValue = static_cast<float>(value);
// NOTE We haven't known the policy of rounding for quantization.
// So this is set to a temporary value.
- *(to->ptr_to_element(id)) =
- to->info()->quantization_info().quantize(realValue, arm_compute::RoundingPolicy::TO_ZERO);
+ *(to->ptr_to_element(id)) = quantize_qasymm8(realValue, to->info()->quantization_info(),
+ arm_compute::RoundingPolicy::TO_ZERO);
break;
}
default:
if (_kernel_program_map.end() == kernel_program_it)
{
- ARM_COMPUTE_ERROR("Kernel %s not found in the CLKernelLibrary", kernel_name.c_str());
+ ARM_COMPUTE_ERROR_VAR("Kernel %s not found in the CLKernelLibrary", kernel_name.c_str());
}
std::string concat_str;
if (_program_source_map.end() == program_source_it)
{
- ARM_COMPUTE_ERROR("Embedded program for %s does not exist.", program_name.c_str());
+ ARM_COMPUTE_ERROR_VAR("Embedded program for %s does not exist.", program_name.c_str());
}
program = Program(_context, program_name, program_source_it->second);
}
else
{
- ARM_COMPUTE_ERROR("Kernel file %s does not exist.", source_name.c_str());
+ ARM_COMPUTE_ERROR_VAR("Kernel file %s does not exist.", source_name.c_str());
}
#endif /* EMBEDDED_KERNELS */
if (program_source_it == _program_source_map.end())
{
- ARM_COMPUTE_ERROR("Embedded program for %s does not exist.", program_name.c_str());
+ ARM_COMPUTE_ERROR_VAR("Embedded program for %s does not exist.", program_name.c_str());
}
return program_source_it->second;
// Create kernel
if (is_data_type_quantized_asymmetric(input->info()->data_type()))
{
- const float scale_in = input->info()->quantization_info().scale;
- const int offset_in = input->info()->quantization_info().offset;
+ UniformQuantizationInfo qinfo = input->info()->quantization_info().uniform();
+ const float scale_in = qinfo.scale;
+ const int offset_in = qinfo.offset;
build_opts.add_option("-DSCALE=" + float_to_string_with_full_precision(scale_in));
build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(offset_in));
}
else if (is_data_type_quantized_asymmetric(output->info()->data_type()))
{
- const float scale_in = output->info()->quantization_info().scale;
- const int offset_in = output->info()->quantization_info().offset;
+ UniformQuantizationInfo qinfo = output->info()->quantization_info().uniform();
+ const float scale_in = qinfo.scale;
+ const float offset_in = qinfo.offset;
+
build_opts.add_option("-DSCALE=" + float_to_string_with_full_precision(scale_in));
build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(offset_in));
if (is_data_type_quantized_asymmetric(input->info()->data_type()))
{
- build_opts.emplace("-DOFF_IN=" +
- support::cpp11::to_string(input->info()->quantization_info().offset));
- build_opts.emplace("-DOFF_ALPHA=" +
- support::cpp11::to_string(alpha->info()->quantization_info().offset));
- build_opts.emplace("-DOFF_OUT=" +
- support::cpp11::to_string(output->info()->quantization_info().offset));
- build_opts.emplace("-DSCALE_IN=" +
- support::cpp11::to_string(input->info()->quantization_info().scale));
- build_opts.emplace("-DSCALE_ALPHA=" +
- support::cpp11::to_string(alpha->info()->quantization_info().scale));
- build_opts.emplace("-DSCALE_OUT=" +
- support::cpp11::to_string(output->info()->quantization_info().scale));
+ build_opts.emplace("-DOFF_IN=" + support::cpp11::to_string(
+ input->info()->quantization_info().uniform().offset));
+ build_opts.emplace("-DOFF_ALPHA=" + support::cpp11::to_string(
+ alpha->info()->quantization_info().uniform().offset));
+ build_opts.emplace("-DOFF_OUT=" + support::cpp11::to_string(
+ output->info()->quantization_info().uniform().offset));
+ build_opts.emplace("-DSCALE_IN=" + support::cpp11::to_string(
+ input->info()->quantization_info().uniform().scale));
+ build_opts.emplace("-DSCALE_ALPHA=" + support::cpp11::to_string(
+ alpha->info()->quantization_info().uniform().scale));
+ build_opts.emplace("-DSCALE_OUT=" + support::cpp11::to_string(
+ output->info()->quantization_info().uniform().scale));
kernel_name += "_qasymm8";
}
_kernel =
build_opts.emplace("-DBATCH_IN=" + support::cpp11::to_string(input->info()->dimension(3)));
if (input->info()->data_type() == DataType::QASYMM8)
{
- build_opts.emplace("-DZERO_VALUE=" +
- support::cpp11::to_string(input->info()->quantization_info().offset));
+ build_opts.emplace("-DZERO_VALUE=" + support::cpp11::to_string(
+ input->info()->quantization_info().uniform().offset));
}
else
{
// The fill value is normally 0, but for QASYMM8 the '0' corresponds to the offset
const uint8_t fill_value =
_output->info()->data_type() == DataType::QASYMM8
- ? utility::clamp<uint8_t>(_output->info()->quantization_info().offset)
+ ? utility::clamp<uint8_t>(_output->info()->quantization_info().uniform().offset)
: 0;
// Filling a value different than 0 works only for QASYMM8 datatype since we are filling 1byte
// values in a buffer of uint8_ts
const auto window_end_x = static_cast<int>(window.x().end());
const bool is_broadcast_across_x = (input1_win.x().step() == 0) || (input2_win.x().step() == 0);
- const float output_scale = out->info()->quantization_info().scale;
- const int output_offset = out->info()->quantization_info().offset;
+ UniformQuantizationInfo qinfo = out->info()->quantization_info().uniform();
+ const float output_scale = qinfo.scale;
+ const int output_offset = qinfo.offset;
// Output quantization info (add 0.5 to round toward the nearest integer - 0.5 rounds away from
// zero)
const ITensor *broadcast_tensor = is_broadcast_input_2 ? in2 : in1;
const ITensor *non_broadcast_tensor = !is_broadcast_input_2 ? in2 : in1;
- const QuantizationInfo broadcast_qinfo = broadcast_tensor->info()->quantization_info();
- const QuantizationInfo non_broadcast_qinfo = non_broadcast_tensor->info()->quantization_info();
+ const UniformQuantizationInfo broadcast_qinfo =
+ broadcast_tensor->info()->quantization_info().uniform();
+ const UniformQuantizationInfo non_broadcast_qinfo =
+ non_broadcast_tensor->info()->quantization_info().uniform();
const int32x4_t voffset_non_broadcast = vdupq_n_s32(non_broadcast_qinfo.offset);
const float32x4_t vscale_non_broadcast = vdupq_n_f32(non_broadcast_qinfo.scale);
for (; x < window_end_x; ++x)
{
const float afs =
- scvt_f32_qasymm8(*(non_broadcast_input_ptr + x), non_broadcast_qinfo.scale,
- non_broadcast_qinfo.offset);
- const float bfs =
- scvt_f32_qasymm8(broadcast_value, broadcast_qinfo.scale, broadcast_qinfo.offset);
+ dequantize_qasymm8(*(non_broadcast_input_ptr + x), non_broadcast_qinfo);
+ const float bfs = dequantize_qasymm8(broadcast_value, broadcast_qinfo);
*(output_ptr + x) =
(*scalar_func)(!is_broadcast_input_2 ? bfs : afs, !is_broadcast_input_2 ? afs : bfs,
out->info()->quantization_info());
else
{
// Input1 quantization info
- const int32x4_t voffset1 = vdupq_n_s32(in1->info()->quantization_info().offset);
- const float32x4_t vscale1 = vdupq_n_f32(in1->info()->quantization_info().scale);
+ UniformQuantizationInfo qinfo = in1->info()->quantization_info().uniform();
+ const int32x4_t voffset1 = vdupq_n_s32(qinfo.offset);
+ const float32x4_t vscale1 = vdupq_n_f32(qinfo.scale);
// Input2 quantization info
- const int32x4_t voffset2 = vdupq_n_s32(in2->info()->quantization_info().offset);
- const float32x4_t vscale2 = vdupq_n_f32(in2->info()->quantization_info().scale);
+ qinfo = in2->info()->quantization_info().uniform();
+ const int32x4_t voffset2 = vdupq_n_s32(qinfo.offset);
+ const float32x4_t vscale2 = vdupq_n_f32(qinfo.scale);
// Clear X Dimension on execution window as we handle manually
input1_win.set(Window::DimX, Window::Dimension(0, 1, 1));
Iterator input2(in2, input2_win);
Iterator output(out, win);
- execute_window_loop(
- win,
- [&](const Coordinates &) {
- const auto input1_ptr = reinterpret_cast<const uint8_t *>(input1.ptr());
- const auto input2_ptr = reinterpret_cast<const uint8_t *>(input2.ptr());
- const auto output_ptr = reinterpret_cast<uint8_t *>(output.ptr());
+ execute_window_loop(win,
+ [&](const Coordinates &) {
+ const auto input1_ptr = reinterpret_cast<const uint8_t *>(input1.ptr());
+ const auto input2_ptr = reinterpret_cast<const uint8_t *>(input2.ptr());
+ const auto output_ptr = reinterpret_cast<uint8_t *>(output.ptr());
- int x =
- (*neon_func)(window_start_x, window_end_x, window_step_x, input1_ptr, input2_ptr,
- output_ptr, voffset1, voffset2, vscale1, vscale2, voffseto, invvscaleo);
- for (; x < window_end_x; ++x)
- {
- const float afs =
- scvt_f32_qasymm8(*(input1_ptr + x), input1_qinfo.scale, input1_qinfo.offset);
- const float bfs =
- scvt_f32_qasymm8(*(input2_ptr + x), input2_qinfo.scale, input2_qinfo.offset);
- *(output_ptr + x) = (*scalar_func)(afs, bfs, out->info()->quantization_info());
- }
- },
- input1, input2, output);
+ int x = (*neon_func)(window_start_x, window_end_x, window_step_x,
+ input1_ptr, input2_ptr, output_ptr, voffset1,
+ voffset2, vscale1, vscale2, voffseto, invvscaleo);
+ for (; x < window_end_x; ++x)
+ {
+ const float afs = dequantize_qasymm8(*(input1_ptr + x), input1_qinfo);
+ const float bfs = dequantize_qasymm8(*(input2_ptr + x), input2_qinfo);
+ *(output_ptr + x) =
+ (*scalar_func)(afs, bfs, out->info()->quantization_info());
+ }
+ },
+ input1, input2, output);
}
}
case DataType::QASYMM8:
{
using to_vector = typename cast_vector<float>::type;
- const QuantizationInfo &qinfo_out = output->info()->quantization_info();
+ const UniformQuantizationInfo &qinfo_out =
+ output->info()->quantization_info().uniform();
const auto vf = vcast<to_vector, from_vector>(vin);
const auto vout = vquantize(vf, qinfo_out);
store_result<qasymm8_t>(reinterpret_cast<qasymm8_t *>(out.ptr()) + x, vout);
case DataType::QASYMM8:
{
const QuantizationInfo &qinfo_out = output->info()->quantization_info();
- const auto qval = qinfo_out.quantize(static_cast<float>(val), rounding_policy);
+ const auto qval =
+ quantize_qasymm8(static_cast<float>(val), qinfo_out, rounding_policy);
*(reinterpret_cast<qasymm8_t *>(out.ptr()) + x) = qval;
break;
}
#else //__aarch64__
constexpr RoundingPolicy rounding_policy = RoundingPolicy::TO_ZERO;
#endif //__aarch64__
- const auto &qinfo_in = input->info()->quantization_info();
- const auto &qinfo_out = output->info()->quantization_info();
+ const auto &qinfo_in = input->info()->quantization_info().uniform();
+ const auto &qinfo_out = output->info()->quantization_info().uniform();
execute_window_loop(
win_collapsed,
for (; x < window_end_x; ++x)
{
qasymm8_t qval_in = *(in_ptr + x);
- const auto val = qinfo_in.dequantize(qval_in);
+ const auto val = dequantize_qasymm8(qval_in, qinfo_in);
switch (output->info()->data_type())
{
}
case DataType::QASYMM8:
{
- const auto qval_out = qinfo_out.quantize(val, rounding_policy);
+ const auto qval_out = quantize_qasymm8(val, qinfo_out, rounding_policy);
*(reinterpret_cast<qasymm8_t *>(out.ptr()) + x) = qval_out;
break;
}
const size_t lookup_dim = _output->info()->num_dimensions() - 1;
const int const_0 = _output->info()->data_type() == DataType::QASYMM8
- ? _output->info()->quantization_info().offset
+ ? _output->info()->quantization_info().uniform().offset
: 0;
std::unordered_map<int32_t, size_t> key_index_map;
inline uint8_t elementwise_conditional_op_quantized_scalar(const float &a, const float &b,
QuantizationInfo qinfo)
{
- return qinfo.quantize(elementwise_conditional_op_scalar<op>(a, b), RoundingPolicy::TO_NEAREST_UP);
+ return quantize_qasymm8(elementwise_conditional_op_scalar<op>(a, b), qinfo,
+ RoundingPolicy::TO_NEAREST_UP);
}
template <ConditionalOperation op, typename VectorType>
#include "arm_compute/core/CL/OpenCL.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "arm_compute/core/CL/ICLTensor.h"
#include <cmath>
#include <memory>
_output->map(CLScheduler::get().queue(), true);
if (is_data_type_quantized_asymmetric(_output->info()->data_type()))
{
- const uint8_t quantized_zero = _output->info()->quantization_info().offset;
+ const uint8_t quantized_zero = _output->info()->quantization_info().uniform().offset;
std::fill_n(_output->buffer(), _output->info()->total_size(), quantized_zero);
}
else
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/NEON/functions/NEArgMinMax.h"
-
-#include "arm_compute/core/CPP/Validate.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/runtime/NEON/NEScheduler.h"
-
-namespace arm_compute
-{
-
-template <ReductionOperation OP>
-NEArgMinMaxStatic<OP>::NEArgMinMaxStatic(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _reduction_kernel(), _reduced_out(), _reshape()
-{
-}
-
-template <ReductionOperation OP>
-Status NEArgMinMaxStatic<OP>::validate(const ITensorInfo *input, int axis,
- const ITensorInfo *output)
-{
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
- ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16,
- DataType::F32);
-
- TensorShape out_shape = input->tensor_shape();
- const int input_dims = input->num_dimensions();
- int axis_local = axis;
-
- // Convert negative axis
- axis_local = wrap_around(axis_local, input_dims);
-
- ARM_COMPUTE_RETURN_ERROR_ON(axis_local > 3);
- ARM_COMPUTE_RETURN_ERROR_ON(static_cast<unsigned int>(axis_local) > input->num_dimensions() - 1);
- out_shape.remove_dimension(axis_local);
-
- const TensorInfo out_info = output->clone()->set_tensor_shape(out_shape);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output, &out_info);
-
- return Status{};
-}
-
-template <ReductionOperation OP>
-void NEArgMinMaxStatic<OP>::configure(ITensor *input, int axis, ITensor *output)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input);
-
- int axis_local = axis;
- const int input_dims = input->info()->num_dimensions();
-
- // Convert negative axis
- axis_local = wrap_around(axis_local, input_dims);
-
- // Perform reduction for axis
- TensorShape intermediate_shape = input->info()->tensor_shape();
- intermediate_shape.set(axis_local, 1);
- auto in = input;
-
- _reduced_out.allocator()->init(TensorInfo(intermediate_shape, output->info()->num_channels(),
- output->info()->data_type(),
- output->info()->quantization_info()));
- _memory_group.manage(&_reduced_out);
- _reduction_kernel.configure(in, axis_local, &_reduced_out, OP);
-
- // Allocate intermediate tensor
- _reduced_out.allocator()->allocate();
-
- // Configure reshape layer if we want to drop the dimensions
- TensorShape out_shape = input->info()->tensor_shape();
- out_shape.remove_dimension(axis_local);
- auto_init_if_empty(*output->info(), output->info()->clone()->set_tensor_shape(out_shape));
- _reshape.configure(&_reduced_out, output);
-}
-
-template <ReductionOperation OP> void NEArgMinMaxStatic<OP>::run()
-{
- MemoryGroupResourceScope scope_mg(_memory_group);
-
- _reduction_kernel.run();
- _reshape.run();
-}
-
-// Supported Specializations
-template class NEArgMinMaxStatic<ReductionOperation::ARG_IDX_MAX>;
-template class NEArgMinMaxStatic<ReductionOperation::ARG_IDX_MIN>;
-} // namespace arm_compute
+++ /dev/null
-/*
- * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayerEx.h"
-
-#include "arm_compute/core/NEON/kernels/NEElementwiseUnaryKernelEx.h"
-#include "support/ToolchainSupport.h"
-
-#include <utility>
-
-namespace arm_compute
-{
-void NENegLayer::configure(const ITensor *input, ITensor *output)
-{
- auto k = arm_compute::support::cpp14::make_unique<NEElementwiseUnaryKernelEx>();
- k->configure(ElementWiseUnaryEx::NEG, input, output);
- _kernel = std::move(k);
-}
-Status NENegLayer::validate(const ITensorInfo *input, const ITensorInfo *output)
-{
- return NEElementwiseUnaryKernelEx::validate(ElementWiseUnaryEx::NEG, input, output);
-}
-} // namespace arm_compute
// Multiply scale
_multiply_scale_kernel.configure(&_gemmlowp_output, &_scale_factor, output,
- weights->info()->quantization_info().scale);
+ weights->info()->quantization_info().uniform().scale);
_are_weights_reshaped = _are_weights_reshaped || fc_info.retain_internal_weights;
ARM_COMPUTE_RETURN_ON_ERROR(validate_mm(quantized_input, *weights_to_use, gemmlowp_output));
ARM_COMPUTE_RETURN_ON_ERROR(NEMultiplyScaleFactorKernel::validate(
- &gemmlowp_output, &scale_factor, output, weights->quantization_info().scale));
+ &gemmlowp_output, &scale_factor, output, weights->quantization_info().uniform().scale));
return Status{};
}
// Since we need negative offsets for computing convolution, we need to change
// QuantizationInfo()
// Extract and negate input and weights offset
- const QuantizationInfo input_quantization_info(input.quantization_info().scale,
- -input.quantization_info().offset);
- const QuantizationInfo weights_quantization_info(weights.quantization_info().scale,
- -weights.quantization_info().offset);
+ const QuantizationInfo input_quantization_info(input.quantization_info().uniform().scale,
+ -input.quantization_info().uniform().offset);
+ const QuantizationInfo weights_quantization_info(weights.quantization_info().uniform().scale,
+ -weights.quantization_info().uniform().offset);
// Validate gemmlowp function
ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixMultiplyCore::validate(
const QuantizationInfo input_quantization_info = input->info()->quantization_info();
const QuantizationInfo weights_quantization_info = weights->info()->quantization_info();
- input->info()->set_quantization_info(
- QuantizationInfo(input_quantization_info.scale, -input_quantization_info.offset));
- weights->info()->set_quantization_info(
- QuantizationInfo(weights_quantization_info.scale, -weights_quantization_info.offset));
+ input->info()->set_quantization_info(QuantizationInfo(
+ input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset));
+ weights->info()->set_quantization_info(QuantizationInfo(
+ weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset));
// Configure gemmlowp function
_mm_gemmlowp.configure(input, weights, nullptr, output);
// Configure output stage for asymmetric quantized types
if (_is_quantized)
{
- float multiplier = input->info()->quantization_info().scale *
- weights->info()->quantization_info().scale /
- output->info()->quantization_info().scale;
+ float multiplier = input->info()->quantization_info().uniform().scale *
+ weights->info()->quantization_info().uniform().scale /
+ output->info()->quantization_info().uniform().scale;
int output_multiplier;
int output_shift;
quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier,
&output_shift);
_gemmlowp_output_stage.configure(&_gemmlowp_output, biases, output, output_multiplier,
- output_shift, output->info()->quantization_info().offset);
+ output_shift,
+ output->info()->quantization_info().uniform().offset);
_gemmlowp_output.allocator()->allocate();
}
_mtx_b_reshape_kernel = nullptr;
// Set internal variables
- _a_offset = a->info()->quantization_info().offset;
- _b_offset = b->info()->quantization_info().offset;
+ _a_offset = a->info()->quantization_info().uniform().offset;
+ _b_offset = b->info()->quantization_info().uniform().offset;
_run_vector_matrix_multiplication = a->info()->dimension(1) < 2;
_reshape_b_only_on_first_run = info.reshape_b_only_on_first_run();
_is_prepared = false;
TensorInfo tmp_b_info{};
TensorInfo mm_result_s32_info{};
- int32_t a_offset = a->quantization_info().offset;
- int32_t b_offset = b->quantization_info().offset;
+ int32_t a_offset = a->quantization_info().uniform().offset;
+ int32_t b_offset = b->quantization_info().uniform().offset;
bool fuse_output_stage = info.gemmlowp_output_stage().type != GEMMLowpOutputStageType::NONE;
if (fuse_output_stage)
// Check if we need to run the optimized assembly kernel
bool run_optimised = false;
bool run_optimised_requantized = false;
- const bool reshape_b_only_on_first_run = info.reshape_b_only_on_first_run();
if (a_to_use->data_type() == DataType::QASYMM8 &&
info.gemmlowp_output_stage().type == GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT)
{
- run_optimised = bool(NEGEMMAssemblyDispatch::validate(a_to_use, b, output, 1.f, 0.f,
- reshape_b_only_on_first_run));
+ run_optimised = bool(NEGEMMAssemblyDispatch::validate(a_to_use, b, c, output, gemm_info));
run_optimised_requantized = run_optimised;
}
else
{
run_optimised = bool(NEGEMMAssemblyDispatch::validate(
- a_to_use, b, fuse_output_stage ? &mm_result_s32_info : output, 1.f, 0.f,
- reshape_b_only_on_first_run));
+ a_to_use, b, c, fuse_output_stage ? &mm_result_s32_info : output, gemm_info));
}
if (run_optimised)
#include "arm_compute/core/CPP/Validate.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/runtime/Tensor.h"
using namespace arm_compute;
# Copy externals/SConstruct to externals/acl/ for Tizen build support.
# TODO The change of externals/SConstruct should be upstreamed to ARM Compute Library community layer.
- execute_process(COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_LIST_DIR}/ARMCompute/SConstruct" "${ARMComputeSource_DIR}")
+ # execute_process(COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_LIST_DIR}/ARMCompute/SConstruct" "${ARMComputeSource_DIR}")
# Build ARMCompute libraries with SCONS
# NOTE ARMCompute SConstruct unconditioanlly appends "arm-linux-gnueabihf-" prefix for linux
nnas_include(OptionTools)
envoption(EXTERNAL_DOWNLOAD_SERVER "https://github.com")
- set(ARMCOMPUTE_URL ${EXTERNAL_DOWNLOAD_SERVER}/ARM-software/ComputeLibrary/archive/v19.05.tar.gz)
+ set(ARMCOMPUTE_URL ${EXTERNAL_DOWNLOAD_SERVER}/ARM-software/ComputeLibrary/archive/v19.11.1.tar.gz)
ExternalSource_Get(ARMCOMPUTE ${DOWNLOAD_ARMCOMPUTE} ${ARMCOMPUTE_URL})
set(ARMComputeSource_DIR ${ARMCOMPUTE_SOURCE_DIR} PARENT_SCOPE)
const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
const auto act_info = acl_common::asActivationLayerInfo(activation);
- if (ker_height == 3 && ker_width == 3)
- {
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLDepthwiseConvolutionLayer3x3>(
- _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
-
- fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(),
- ofm_alloc->handle(), conv_info, multiplier, act_info);
-
- _execution_builder->append(asAclFunction(std::move(fn)));
- }
- else
- {
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLDepthwiseConvolutionLayer>();
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLDepthwiseConvolutionLayer>(
+ _tensor_builder->acl_tensor_manager()->internal_buffer_manager());
- fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(),
- ofm_alloc->handle(), conv_info, multiplier, act_info);
+ fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(), ofm_alloc->handle(),
+ conv_info, multiplier, act_info);
- _execution_builder->append(asAclFunction(std::move(fn)));
- }
+ _execution_builder->append(asAclFunction(std::move(fn)));
}
void KernelGenerator::visit(const ir::operation::MaxPool2D &node)
#include <arm_compute/runtime/PoolManager.h>
#include <arm_compute/runtime/BlobLifetimeManager.h>
#include <arm_compute/runtime/MemoryManagerOnDemand.h>
-#include <arm_compute/runtime/CL/CLMemoryGroup.h>
+#include <arm_compute/runtime/MemoryGroup.h>
#include <AclMemoryManager.h>
#include <AclLinearMemoryManager.h>
operand::ICLTensor, operand::CLTensor, operand::CLSubTensor,
::arm_compute::MemoryManagerOnDemand, ::arm_compute::PoolManager,
::arm_compute::BlobLifetimeManager, ::arm_compute::CLBufferAllocator,
- ::arm_compute::CLMemoryGroup>;
+ ::arm_compute::MemoryGroup>;
using InternalBufferManager = acl_common::AclInternalBufferManager<
::arm_compute::MemoryManagerOnDemand, ::arm_compute::PoolManager,
assert(parent_tensor != nullptr);
// Child's type should be same with parent
- assert(info.type().offset() == parent_tensor->info()->quantization_info().offset);
- assert(info.type().scale() == parent_tensor->info()->quantization_info().scale);
+ assert(info.type().offset() == parent_tensor->info()->quantization_info().uniform().offset);
+ assert(info.type().scale() == parent_tensor->info()->quantization_info().uniform().scale);
assert(asDataType(info.type().type()) == parent_tensor->info()->data_type());
// NOTE SubTensor's layout must be the same with layout of parent tensor
const auto fixed_axis =
acl_common::ToARMComputeAxis(ifm_rank, axis_value, frontend_layout, backend_layout).value();
- // auto fn = nnfw::cpp14::make_unique<::arm_compute::NEArgMinMaxLayer>();
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEArgMax>();
-
- // NOTE
- // if (ofm_alloc->info()->data_type() == arm_compute::DataType::S32)
- //{
- ofm_alloc->info()->set_data_type(arm_compute::DataType::U32);
- //}
- fn->configure(ifm_alloc->handle(), fixed_axis, ofm_alloc->handle());
- // fn->configure(ifm_alloc->handle(), fixed_axis, ofm_alloc->handle(),
- // arm_compute::ReductionOperation::ARG_IDX_MAX);
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::NEArgMinMaxLayer>();
+
+ fn->configure(ifm_alloc->handle(), fixed_axis, ofm_alloc->handle(),
+ arm_compute::ReductionOperation::ARG_IDX_MAX);
auto acl_fn = asAclFunction(std::move(fn));
const auto conv_info = acl_common::asPadStrideInfo(padding, stride);
const auto act_info = acl_common::asActivationLayerInfo(activation);
- if (ker_height == 3 && ker_width == 3)
- {
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEDepthwiseConvolutionLayer3x3>();
-
- fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(),
- ofm_alloc->handle(), conv_info, multiplier, act_info);
-
- _execution_builder->append(asAclFunction(std::move(fn)));
- }
- else
- {
- auto fn = nnfw::cpp14::make_unique<::arm_compute::NEDepthwiseConvolutionLayer>();
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::NEDepthwiseConvolutionLayer>();
- fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(),
- ofm_alloc->handle(), conv_info, multiplier, act_info);
+ fn->configure(ifm_alloc->handle(), ker_alloc->handle(), bias_alloc->handle(), ofm_alloc->handle(),
+ conv_info, multiplier, act_info);
- _execution_builder->append(asAclFunction(std::move(fn)));
- }
+ _execution_builder->append(asAclFunction(std::move(fn)));
}
void KernelGenerator::visit(const ir::operation::Dequantize &node)