"src/core/CL/kernels/CLAccumulateKernel.cpp",
"src/core/CL/kernels/CLActivationLayerKernel.cpp",
"src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp",
- "src/core/CL/kernels/CLBatchConcatenateLayerKernel.cpp",
"src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp",
"src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp",
"src/core/CL/kernels/CLBitwiseKernel.cpp",
"src/core/CL/kernels/CLCropKernel.cpp",
"src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp",
"src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.cpp",
- "src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp",
"src/core/CL/kernels/CLDepthConvertLayerKernel.cpp",
"src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp",
"src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp",
"src/core/CL/kernels/CLHOGDescriptorKernel.cpp",
"src/core/CL/kernels/CLHOGDetectorKernel.cpp",
"src/core/CL/kernels/CLHarrisCornersKernel.cpp",
- "src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp",
"src/core/CL/kernels/CLHistogramKernel.cpp",
"src/core/CL/kernels/CLIm2ColKernel.cpp",
"src/core/CL/kernels/CLInstanceNormalizationLayerKernel.cpp",
"src/core/CL/kernels/CLWarpAffineKernel.cpp",
"src/core/CL/kernels/CLWarpPerspectiveKernel.cpp",
"src/core/CL/kernels/CLWeightsReshapeKernel.cpp",
- "src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp",
- "src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp",
- "src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp",
"src/core/CL/kernels/CLWinogradFilterTransformKernel.cpp",
"src/core/CL/kernels/CLWinogradInputTransformKernel.cpp",
"src/core/CL/kernels/CLWinogradOutputTransformKernel.cpp",
"src/core/cpu/kernels/add/sve/qsymm16.cpp",
"src/core/cpu/kernels/floor/NEON/fp16.cpp",
"src/core/cpu/kernels/floor/NEON/fp32.cpp",
+ "src/core/gpu/cl/kernels/ClBatchConcatenateKernel.cpp",
+ "src/core/gpu/cl/kernels/ClDepthConcatenateKernel.cpp",
+ "src/core/gpu/cl/kernels/ClHeightConcatenateKernel.cpp",
+ "src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.cpp",
+ "src/core/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.cpp",
+ "src/core/gpu/cl/kernels/ClWidthConcatenateKernel.cpp",
"src/core/helpers/SoftmaxHelpers.cpp",
"src/core/helpers/WindowHelpers.cpp",
"src/core/utils/ScaleUtils.cpp",
"src/runtime/cpu/operators/CpuFloor.cpp",
"src/runtime/cpu/operators/CpuPermute.cpp",
"src/runtime/cpu/operators/CpuReshape.cpp",
+ "src/runtime/gpu/cl/operators/ClConcatenate.cpp",
"utils/CommonGraphOptions.cpp",
"utils/GraphUtils.cpp",
"utils/Utils.cpp",
core_files += Glob('src/core/CL/gemm/native/*.cpp')
core_files += Glob('src/core/CL/gemm/reshaped/*.cpp')
core_files += Glob('src/core/CL/gemm/reshaped_only_rhs/*.cpp')
+ core_files += Glob('src/core/gpu/cl/*.cpp')
+ core_files += Glob('src/core/gpu/cl/kernels/*.cpp')
runtime_files += Glob('src/runtime/CL/*.cpp')
runtime_files += Glob('src/runtime/CL/functions/*.cpp')
runtime_files += Glob('src/runtime/CL/gemm/*.cpp')
runtime_files += Glob('src/runtime/CL/tuners/*.cpp')
+ runtime_files += Glob('src/runtime/gpu/cl/*.cpp')
+ runtime_files += Glob('src/runtime/gpu/cl/operators/*.cpp')
graph_files += Glob('src/graph/backends/CL/*.cpp')
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#ifndef ARM_COMPUTE_CLCONCATENATELAYER_H
#define ARM_COMPUTE_CLCONCATENATELAYER_H
-#include "arm_compute/runtime/CL/ICLOperator.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/core/Types.h"
/** Basic function to execute concatenate tensors along a given axis. This function calls the following kernels:
*
- * -# @ref CLWidthConcatenateLayerKernel (if underlying concatenation axis is 0).
- * -# @ref CLHeightConcatenateLayerKernel (if underlying concatenation axis is 1).
- * -# @ref CLDepthConcatenateLayerKernel (if underlying concatenation axis is 2).
- * -# @ref CLBatchConcatenateLayerKernel (if underlying concatenation axis is 3).
+ * -# @ref opencl::kernels::ClWidthConcatenateKernel (if underlying concatenation axis is 0).
+ * -# @ref opencl::kernels::ClHeightConcatenateKernel (if underlying concatenation axis is 1).
+ * -# @ref opencl::kernels::ClDepthConcatenateKernel (if underlying concatenation axis is 2).
+ * -# @ref opencl::kernels::ClBatchConcatenateKernel (if underlying concatenation axis is 3).
*/
class CLConcatenateLayer : public IFunction
{
/** Initialise the kernel's inputs vector and output.
*
* @note Input and output tensor dimensions preconditions defer depending on the concatenation axis.
- * @note Preconditions can be found respectively at @ref CLWidthConcatenateLayerKernel, @ref CLHeightConcatenateLayerKernel and @ref CLDepthConcatenateLayerKernel.
+ * @note Preconditions can be found respectively at @ref opencl::kernels::ClWidthConcatenateKernel,
+ * @ref opencl::kernels::ClHeightConcatenateKernel and @ref opencl::kernels::ClDepthConcatenateKernel.
*
* @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: All
* @param[out] output Output tensor. Data types supported: Same as @p input.
/** Initialise the kernel's inputs vector and output.
*
* @note Input and output tensor dimensions preconditions defer depending on the concatenation axis.
- * @note Preconditions can be found respectively at @ref CLWidthConcatenateLayerKernel, @ref CLHeightConcatenateLayerKernel and @ref CLDepthConcatenateLayerKernel.
+ * @note Preconditions can be found respectively at @ref opencl::kernels::ClWidthConcatenateKernel,
+ * @ref opencl::kernels::ClHeightConcatenateKernel and @ref opencl::kernels::ClDepthConcatenateKernel.
*
* @param[in] compile_context The compile context to be used.
* @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: All
/** Static function to check if given info will lead to a valid configuration of @ref CLConcatenateLayer
*
* @note Input and output tensor dimensions preconditions defer depending on the concatenation axis.
- * @note Preconditions can be found respectively at @ref CLWidthConcatenateLayerKernel, @ref CLHeightConcatenateLayerKernel and @ref CLDepthConcatenateLayerKernel.
+ * @note Preconditions can be found respectively at @ref opencl::kernels::ClWidthConcatenateKernel,
+ * @ref opencl::kernels::ClHeightConcatenateKernel and @ref opencl::kernels::ClDepthConcatenateKernel.
*
* @param[in] inputs_vector The vectors containing all the tensors info to concatenate. Data types supported: All.
* @param[in] output Output tensor info. Data types supported: Same as @p input.
struct Impl;
std::unique_ptr<Impl> _impl;
};
-
-namespace experimental
-{
-/** Basic function to execute concatenate tensors along a given axis. This function calls the following kernels:
- *
- * -# @ref CLWidthConcatenateLayerKernel (if underlying concatenation axis is 0).
- * -# @ref CLHeightConcatenateLayerKernel (if underlying concatenation axis is 1).
- * -# @ref CLDepthConcatenateLayerKernel (if underlying concatenation axis is 2).
- * -# @ref CLBatchConcatenateLayerKernel (if underlying concatenation axis is 3).
- */
-class CLConcatenation : public ICLOperator
-{
-public:
- /** Default constructor */
- CLConcatenation();
- /** Initialise the kernel's inputs vector and output.
- *
- * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis.
- * @note Preconditions can be found respectively at @ref CLWidthConcatenateLayerKernel, @ref CLHeightConcatenateLayerKernel and @ref CLDepthConcatenateLayerKernel.
- *
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: All
- * @param[out] output Output tensor. Data types supported: Same as @p input.
- * @param[in] axis Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3.
- */
- void configure(const CLCompileContext &compile_context, const std::vector<ITensorInfo *> &inputs_vector, ITensorInfo *output, size_t axis);
- /** Static function to check if given info will lead to a valid configuration of @ref NEConcatenateLayer
- *
- * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis.
- * @note Preconditions can be found respectively at @ref CLWidthConcatenateLayerKernel, @ref CLHeightConcatenateLayerKernel and @ref CLDepthConcatenateLayerKernel.
- *
- * @param[in] inputs_vector The vectors containing all the tensors info to concatenate. Data types supported: All
- * @param[in] output Output tensor info. Data types supported: Same as @p input.
- * @param[in] axis Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3.
- *
- * @return a status
- */
- static Status validate(const std::vector<const ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis);
-
- // Inherited methods overridden:
- void run(ITensorPack &tensors) override;
-
-private:
- std::vector<std::unique_ptr<ICLKernel>> _concat_kernels;
- unsigned int _num_inputs;
- unsigned int _axis;
-};
-} // namespace experimental
} // namespace arm_compute
#endif /* ARM_COMPUTE_CLCONCATENATELAYER_H */
- @ref NEGEMMLowpMatrixAReductionKernel
- @ref NEGEMMLowpMatrixBReductionKernel
- Removed padding from OpenCL kernels:
- - @ref CLBatchConcatenateLayerKernel
+ - CLBatchConcatenateLayerKernel
- @ref CLElementwiseOperationKernel
- @ref CLBatchNormalizationLayerKernel
- @ref CLPoolingLayerKernel
- @ref CLDepthwiseConvolutionLayer3x3NHWCKernel
- @ref CLActivationLayerKernel
- @ref CLWinogradFilterTransformKernel
- - @ref CLWidthConcatenateLayerKernel
- - @ref CLWidthConcatenate4TensorsKernel
- - @ref CLWidthConcatenate2TensorsKernel
+ - CLWidthConcatenateLayerKernel
+ - CLWidthConcatenate4TensorsKernel
+ - CLWidthConcatenate2TensorsKernel
- @ref CLLogits1DMaxShiftExpSumKernel
- @ref CLLogits1DNormKernel
- - @ref CLHeightConcatenateLayerKernel
+ - CLHeightConcatenateLayerKernel
- @ref CLGEMMMatrixMultiplyKernel
- @ref CLGEMMLowpQuantizeDownInt32ScaleKernel
- @ref CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel
- @ref CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel
- - @ref CLDepthConcatenateLayerKernel
+ - CLDepthConcatenateLayerKernel
- @ref CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel
- Removed OpenCL kernels / functions:
- CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel
- @ref CLNegLayer
- @ref CLPReluLayer
- @ref CLSinLayer
- - @ref CLBatchConcatenateLayerKernel
+ - CLBatchConcatenateLayerKernel
- @ref CLDepthToSpaceLayerKernel / @ref CLDepthToSpaceLayer
- @ref CLGEMMLowpMatrixMultiplyNativeKernel
- CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel
- @ref CLFFTScaleKernel
- @ref CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel
- @ref CLGEMMMatrixMultiplyReshapedOnlyRHSKernel
- - @ref CLHeightConcatenateLayerKernel
+ - CLHeightConcatenateLayerKernel
- @ref CLDirectDeconvolutionLayer
- @ref CLFFT1D
- @ref CLFFT2D
- @ref CLCopy / @ref CLCopyKernel
- @ref CLLSTMLayer
- @ref CLRNNLayer
- - CLWidthConcatenateLayer / @ref CLWidthConcatenateLayerKernel
+ - CLWidthConcatenateLayer / CLWidthConcatenateLayerKernel
- @ref CLWinogradFilterTransformKernel / @ref CLWinogradInputTransformKernel / @ref CLWinogradConvolutionLayer
- @ref CLWinogradInputTransformKernel / @ref CLWinogradInputTransform
- New Neon kernels / functions:
- User can specify his own scheduler by implementing the @ref IScheduler interface.
- New OpenCL kernels / functions:
- @ref CLBatchNormalizationLayerKernel / @ref CLBatchNormalizationLayer
- - @ref CLDepthConcatenateLayerKernel / CLDepthConcatenateLayer
+ - CLDepthConcatenateLayerKernel / CLDepthConcatenateLayer
- @ref CLHOGOrientationBinningKernel @ref CLHOGBlockNormalizationKernel, @ref CLHOGDetectorKernel / @ref CLHOGDescriptor @ref CLHOGDetector @ref CLHOGGradient @ref CLHOGMultiDetection
- CLLocallyConnectedMatrixMultiplyKernel / CLLocallyConnectedLayer
- @ref CLWeightsReshapeKernel / @ref CLConvolutionLayerReshapeWeights
/*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#include "src/core/CL/kernels/CLAccumulateKernel.h"
#include "src/core/CL/kernels/CLActivationLayerKernel.h"
#include "src/core/CL/kernels/CLArgMinMaxLayerKernel.h"
-#include "src/core/CL/kernels/CLBatchConcatenateLayerKernel.h"
#include "src/core/CL/kernels/CLBatchNormalizationLayerKernel.h"
#include "src/core/CL/kernels/CLBatchToSpaceLayerKernel.h"
#include "src/core/CL/kernels/CLBitwiseKernel.h"
#include "src/core/CL/kernels/CLCropKernel.h"
#include "src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h"
#include "src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h"
-#include "src/core/CL/kernels/CLDepthConcatenateLayerKernel.h"
#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
#include "src/core/CL/kernels/CLDepthToSpaceLayerKernel.h"
#include "src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h"
#include "src/core/CL/kernels/CLHOGDescriptorKernel.h"
#include "src/core/CL/kernels/CLHOGDetectorKernel.h"
#include "src/core/CL/kernels/CLHarrisCornersKernel.h"
-#include "src/core/CL/kernels/CLHeightConcatenateLayerKernel.h"
#include "src/core/CL/kernels/CLHistogramKernel.h"
#include "src/core/CL/kernels/CLIm2ColKernel.h"
#include "src/core/CL/kernels/CLInstanceNormalizationLayerKernel.h"
#include "src/core/CL/kernels/CLWarpAffineKernel.h"
#include "src/core/CL/kernels/CLWarpPerspectiveKernel.h"
#include "src/core/CL/kernels/CLWeightsReshapeKernel.h"
-#include "src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h"
-#include "src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h"
-#include "src/core/CL/kernels/CLWidthConcatenateLayerKernel.h"
#include "src/core/CL/kernels/CLWinogradFilterTransformKernel.h"
#include "src/core/CL/kernels/CLWinogradInputTransformKernel.h"
#include "src/core/CL/kernels/CLWinogradOutputTransformKernel.h"
+++ /dev/null
-/*
- * Copyright (c) 2019-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/core/CL/kernels/CLBatchConcatenateLayerKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Utils.h"
-#include "src/core/CL/CLValidate.h"
-#include "src/core/helpers/WindowHelpers.h"
-#include "support/Cast.h"
-
-#include "support/StringSupport.h"
-
-namespace arm_compute
-{
-namespace
-{
-Status validate_arguments(const ITensorInfo *input, unsigned int batch_offset, const ITensorInfo *output)
-{
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
- ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
-
- ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(Window::DimX) != output->dimension(Window::DimX));
- ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(Window::DimY) != output->dimension(Window::DimY));
- ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(Window::DimZ) != output->dimension(Window::DimZ));
- ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(3) + batch_offset > output->dimension(3));
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(4, input, output);
-
- return Status{};
-}
-} // namespace
-
-CLBatchConcatenateLayerKernel::CLBatchConcatenateLayerKernel()
- : _batch_offset(0)
-{
-}
-
-void CLBatchConcatenateLayerKernel::configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int batch_offset, ITensorInfo *output)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input, batch_offset, output));
-
- auto padding_info = get_padding_info({ input, output });
-
- _batch_offset = batch_offset;
-
- const unsigned int num_elems_processed_per_iteration = adjust_vec_size(16 / input->element_size(), input->dimension(0));
-
- // Add build options
- CLBuildOptions build_opts;
- build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->data_type()));
- build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration));
- build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(input->dimension(0) % num_elems_processed_per_iteration));
- if(is_data_type_quantized_asymmetric(input->data_type()) && input->quantization_info() != output->quantization_info())
- {
- const UniformQuantizationInfo iq_info = input->quantization_info().uniform();
- const UniformQuantizationInfo oq_info = output->quantization_info().uniform();
-
- build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq_info.offset));
- build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oq_info.offset));
- build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq_info.scale));
- build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oq_info.scale));
- }
-
- // Create kernel
- _kernel = create_kernel(compile_context, "concatenate", build_opts.options());
-
- // Configure kernel window
- auto win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration));
- win.set(3, Window::Dimension(0, input->tensor_shape()[3], 1));
- ICLKernel::configure_internal(win);
-
- // Set output valid region
- output->set_valid_region(ValidRegion(Coordinates(), output->tensor_shape()));
-
- // Set config_id for enabling LWS tuning
- _config_id = "concatenate_";
- _config_id += support::cpp11::to_string(3);
- _config_id += "_";
- _config_id += support::cpp11::to_string(batch_offset);
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->dimension(0));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->dimension(1));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->dimension(2));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->dimension(3));
-
- ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
-}
-
-Status CLBatchConcatenateLayerKernel::validate(const arm_compute::ITensorInfo *input,
- unsigned int batch_offset,
- const arm_compute::ITensorInfo *output)
-{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, batch_offset, output));
- return Status{};
-}
-
-void CLBatchConcatenateLayerKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
-
- const auto src = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC));
- auto dst = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(TensorType::ACL_DST));
-
- Window slice = window.first_slice_window_3D();
-
- const int offset_to_first_elements_in_bytes = _batch_offset * dst->info()->strides_in_bytes()[3];
-
- unsigned int idx = 2 * num_arguments_per_3D_tensor(); // Skip the input and output parameters
- _kernel.setArg<cl_int>(idx, offset_to_first_elements_in_bytes);
-
- do
- {
- unsigned int idx = 0;
- add_3D_tensor_argument(idx, src, slice);
- add_3D_tensor_argument(idx, dst, slice);
- enqueue(queue, *this, slice, lws_hint());
- }
- while(window.slide_window_slice_3D(slice));
-}
-} // namespace arm_compute
+++ /dev/null
-/*
- * Copyright (c) 2019-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef ARM_COMPUTE_CLBATCHCONCATENATEKERNEL_H
-#define ARM_COMPUTE_CLBATCHCONCATENATEKERNEL_H
-
-#include "arm_compute/core/Types.h"
-#include "src/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the batch concatenate kernel.
- * The input tensor will be concatenated into the output tensor.
- */
-class CLBatchConcatenateLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLBatchConcatenateLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLBatchConcatenateLayerKernel(const CLBatchConcatenateLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLBatchConcatenateLayerKernel &operator=(const CLBatchConcatenateLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLBatchConcatenateLayerKernel(CLBatchConcatenateLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLBatchConcatenateLayerKernel &operator=(CLBatchConcatenateLayerKernel &&) = default;
- /** Default destructor */
- ~CLBatchConcatenateLayerKernel() = default;
- /** Initialise the kernel's inputs and output
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input tensor. Data types supported: All.
- * @param[in] batch_offset The offset on axis # 3.
- * @param[in,out] output Output tensor. Data types supported: Same as @p input.
- *
- * @note: The output tensor's low two dimensions can't be smaller than the input one's.
- * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2.
- *
- */
- void configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int batch_offset, ITensorInfo *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLBatchConcatenateLayerKernel
- *
- * @param[in] input Input tensor info. Data types supported: All.
- * @param[in] batch_offset The offset on axis # 3.
- * @param[in] output Output tensor info. Data types supported: Same as @p input.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, unsigned int batch_offset, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
-
-private:
- unsigned int _batch_offset;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLBATCHCONCATENATEKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/core/CL/kernels/CLDepthConcatenateLayerKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Utils.h"
-#include "src/core/CL/CLValidate.h"
-#include "src/core/helpers/WindowHelpers.h"
-#include "support/Cast.h"
-
-#include "support/StringSupport.h"
-
-namespace arm_compute
-{
-namespace
-{
-Status validate_arguments(const ITensorInfo *input, unsigned int depth_offset, const ITensorInfo *output)
-{
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
-
- ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(Window::DimX) != output->dimension(Window::DimX));
- ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(Window::DimY) != output->dimension(Window::DimY));
- ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(2) + depth_offset > output->dimension(2));
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(3, input, output);
-
- return Status{};
-}
-} // namespace
-
-CLDepthConcatenateLayerKernel::CLDepthConcatenateLayerKernel()
- : _depth_offset(0)
-{
-}
-
-void CLDepthConcatenateLayerKernel::configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int depth_offset, ITensorInfo *output)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input, depth_offset, output));
-
- auto padding_info = get_padding_info({ input, output });
-
- _depth_offset = depth_offset;
-
- const unsigned int num_elems_processed_per_iteration = adjust_vec_size(16 / input->element_size(), input->dimension(0));
-
- // Add build options
- CLBuildOptions build_opts;
- build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->data_type()));
- build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration));
- build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(input->dimension(0) % num_elems_processed_per_iteration));
- if(is_data_type_quantized_asymmetric(input->data_type()) && input->quantization_info() != output->quantization_info())
- {
- const UniformQuantizationInfo iq_info = input->quantization_info().uniform();
- const UniformQuantizationInfo oq_info = output->quantization_info().uniform();
-
- build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq_info.offset));
- build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oq_info.offset));
- build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq_info.scale));
- build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oq_info.scale));
- }
-
- // Create kernel
- _kernel = create_kernel(compile_context, "concatenate", build_opts.options());
-
- // Configure kernel window
- auto win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration));
- win.set(Window::DimZ, Window::Dimension(0, input->tensor_shape().z(), 1));
- ICLKernel::configure_internal(win);
-
- // Set output valid region
- output->set_valid_region(ValidRegion(Coordinates(), output->tensor_shape()));
-
- ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
-}
-
-Status CLDepthConcatenateLayerKernel::validate(const arm_compute::ITensorInfo *input,
- unsigned int depth_offset,
- const arm_compute::ITensorInfo *output)
-{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, depth_offset, output));
- return Status{};
-}
-
-void CLDepthConcatenateLayerKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
-
- const auto src = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC));
- auto dst = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(TensorType::ACL_DST));
-
- Window slice = window.first_slice_window_3D();
-
- const int offset_to_first_elements_in_bytes = _depth_offset * dst->info()->strides_in_bytes()[2];
-
- unsigned int idx = 2 * num_arguments_per_3D_tensor(); // Skip the input and output parameters
- _kernel.setArg<cl_int>(idx, offset_to_first_elements_in_bytes);
-
- do
- {
- unsigned int idx = 0;
- add_3D_tensor_argument(idx, src, slice);
- add_3D_tensor_argument(idx, dst, slice);
- enqueue(queue, *this, slice, lws_hint());
- }
- while(window.slide_window_slice_3D(slice));
-}
-} // namespace arm_compute
+++ /dev/null
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H
-#define ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H
-
-#include "arm_compute/core/Types.h"
-#include "src/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-/** Interface for the depth concatenate kernel.
- * The input tensor will be concatenated into the output tensor.
- */
-class CLDepthConcatenateLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLDepthConcatenateLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDepthConcatenateLayerKernel(const CLDepthConcatenateLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDepthConcatenateLayerKernel &operator=(const CLDepthConcatenateLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLDepthConcatenateLayerKernel(CLDepthConcatenateLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLDepthConcatenateLayerKernel &operator=(CLDepthConcatenateLayerKernel &&) = default;
- /** Default destructor */
- ~CLDepthConcatenateLayerKernel() = default;
- /** Initialise the kernel's inputs and output
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] depth_offset The offset on the Z axis.
- * @param[in,out] output Output tensor. Data types supported: Same as @p input.
- *
- * @note: The output tensor's low two dimensions can't be smaller than the input one's.
- * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2.
- *
- */
- void configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int depth_offset, ITensorInfo *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLDepthConcatenateLayerKernel
- *
- * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
- * @param[in] depth_offset The offset on the Z axis.
- * @param[in] output Output tensor info. Data types supported: Same as @p input.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, unsigned int depth_offset, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
-
-private:
- unsigned int _depth_offset;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2019-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/core/CL/kernels/CLHeightConcatenateLayerKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "src/core/CL/CLValidate.h"
-#include "src/core/helpers/WindowHelpers.h"
-#include "support/Cast.h"
-
-#include "support/StringSupport.h"
-
-namespace arm_compute
-{
-namespace
-{
-Status validate_arguments(const ITensorInfo *input, unsigned int height_offset, const ITensorInfo *output)
-{
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(Window::DimY) + height_offset > output->dimension(Window::DimY));
-
- ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(0) != output->dimension(0));
- for(size_t i = 2; i < Coordinates::num_max_dimensions; ++i)
- {
- ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(i) != output->dimension(i));
- }
- ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 4);
-
- return Status{};
-}
-} // namespace
-
-CLHeightConcatenateLayerKernel::CLHeightConcatenateLayerKernel()
- : _height_offset(0)
-{
-}
-
-Status CLHeightConcatenateLayerKernel::validate(const ITensorInfo *input, unsigned int height_offset, const ITensorInfo *output)
-{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, height_offset, output));
- return Status{};
-}
-
-void CLHeightConcatenateLayerKernel::configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int height_offset, ITensorInfo *output)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input, height_offset, output));
-
- auto padding_info = get_padding_info({ input, output });
-
- _height_offset = height_offset;
-
- // Add build options
- const unsigned int num_elems_processed_per_iteration = adjust_vec_size(4, input->dimension(0));
-
- CLBuildOptions build_opts;
- build_opts.add_option("-DDATA_TYPE=" + get_cl_unsigned_type_from_element_size(input->element_size()));
- build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration));
- build_opts.add_option("-DHEIGHT_OFFSET=" + support::cpp11::to_string(_height_offset));
- build_opts.add_option("-DDEPTH=" + support::cpp11::to_string(input->dimension(2)));
- build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(input->dimension(0) % num_elems_processed_per_iteration));
-
- if(is_data_type_quantized_asymmetric(input->data_type()) && input->quantization_info() != output->quantization_info())
- {
- const UniformQuantizationInfo iq_info = input->quantization_info().uniform();
- const UniformQuantizationInfo oq_info = output->quantization_info().uniform();
-
- build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq_info.offset));
- build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oq_info.offset));
- build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq_info.scale));
- build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oq_info.scale));
- }
-
- // Create kernel
- _kernel = create_kernel(compile_context, "concatenate_height", build_opts.options());
- // Configure kernel window
-
- // The window needs to be based on input as we copy all the heights of input
- Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration));
- ICLKernel::configure_internal(win.collapse(win, Window::DimZ));
-
- // Set output valid region
- output->set_valid_region(ValidRegion(Coordinates(), output->tensor_shape()));
-
- ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
-}
-
-void CLHeightConcatenateLayerKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
-
- const auto src = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC));
- auto dst = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(TensorType::ACL_DST));
-
- unsigned int idx = 0;
- add_4D_tensor_argument(idx, src, window);
- add_4D_tensor_argument(idx, dst, window);
- enqueue(queue, *this, window, lws_hint());
-}
-} // namespace arm_compute
+++ /dev/null
-/*
- * Copyright (c) 2019-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef ARM_COMPUTE_CLHEIGHTCONCATENATELAYERKERNEL_H
-#define ARM_COMPUTE_CLHEIGHTCONCATENATELAYERKERNEL_H
-
-#include "arm_compute/core/Types.h"
-#include "src/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-/** Interface for the height concatenate kernel.
- * The input tensor will be concatenated into the output tensor.
- */
-class CLHeightConcatenateLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLHeightConcatenateLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLHeightConcatenateLayerKernel(const CLHeightConcatenateLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLHeightConcatenateLayerKernel &operator=(const CLHeightConcatenateLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLHeightConcatenateLayerKernel(CLHeightConcatenateLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLHeightConcatenateLayerKernel &operator=(CLHeightConcatenateLayerKernel &&) = default;
- /** Default destructor */
- ~CLHeightConcatenateLayerKernel() = default;
- /** Initialise the kernel's inputs and output
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input tensor. Data types supported: All.
- * @param[in] height_offset The starting offset on the Y axis for the output tensor.
- * @param[out] output Output tensor. Data types supported: Same as @p input.
- *
- */
- void configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int height_offset, ITensorInfo *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLHeightConcatenateLayerKernel
- *
- * @param[in] input Input tensor info. Data types supported: All.
- * @param[in] height_offset The starting offset on the Y axis for the output tensor.
- * @param[in] output Output tensor info. Data types supported: Same as @p input.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, unsigned int height_offset, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
-
-private:
- unsigned int _height_offset;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLHEIGHTCONCATENATELAYERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "src/core/CL/CLValidate.h"
-#include "src/core/helpers/WindowHelpers.h"
-#include "src/core/utils/helpers/tensor_info.h"
-#include "support/Cast.h"
-
-#include "support/StringSupport.h"
-
-namespace arm_compute
-{
-namespace
-{
-Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output)
-{
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input1, input2, output);
- ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input1);
- ARM_COMPUTE_RETURN_ERROR_ON(input1->data_type() == DataType::UNKNOWN);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input1, input2, output);
- ARM_COMPUTE_RETURN_ERROR_ON(input1->dimension(0) + input2->dimension(0) > output->dimension(0));
-
- for(size_t i = 1; i < Coordinates::num_max_dimensions; ++i)
- {
- ARM_COMPUTE_RETURN_ERROR_ON(input1->dimension(i) != output->dimension(i));
- ARM_COMPUTE_RETURN_ERROR_ON(input2->dimension(i) != output->dimension(i));
- }
- ARM_COMPUTE_RETURN_ERROR_ON(input1->num_dimensions() > 4);
-
- return Status{};
-}
-} // namespace
-
-Status CLWidthConcatenate2TensorsKernel::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output)
-{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input1, input2, output));
- return Status{};
-}
-
-void CLWidthConcatenate2TensorsKernel::configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input1, input2, output));
-
- auto padding_info = get_padding_info({ input1, input2, output });
-
- const unsigned int min_dimension = std::min(input1->dimension(0), input2->dimension(0));
- const unsigned int num_elems_processed_per_iteration = adjust_vec_size(8, min_dimension);
- const unsigned int vec_size_leftover = output->dimension(0) % num_elems_processed_per_iteration;
-
- // Add build options
- CLBuildOptions build_opts;
- build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input1->data_type()));
- build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration));
- build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(vec_size_leftover));
- build_opts.add_option("-DDEPTH=" + support::cpp11::to_string(input1->dimension(2)));
- build_opts.add_option("-DINPUT1_WIDTH=" + support::cpp11::to_string(input1->dimension(0)));
- build_opts.add_option("-DINPUT2_WIDTH=" + support::cpp11::to_string(input2->dimension(0)));
- build_opts.add_option("-DELEMENT_SIZE=" + support::cpp11::to_string(input1->element_size()));
- build_opts.add_option("-DINPUT1_ROTATE_N=" + support::cpp11::to_string((input1->dimension(0) - vec_size_leftover) % num_elems_processed_per_iteration));
-
- // If input have different quantization info set quantization parameters needed for the re-quantization process
- const bool have_different_qinfo = helpers::tensor_info::tensors_have_different_quantization_info(output, input1, input2);
- if(is_data_type_quantized_asymmetric(input1->data_type()) && have_different_qinfo)
- {
- const UniformQuantizationInfo iq1_info = input1->quantization_info().uniform();
- const UniformQuantizationInfo iq2_info = input2->quantization_info().uniform();
- const UniformQuantizationInfo oq_info = output->quantization_info().uniform();
-
- build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq1_info.offset));
- build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq1_info.scale));
- build_opts.add_option("-DOFFSET_IN2=" + float_to_string_with_full_precision(iq2_info.offset));
- build_opts.add_option("-DSCALE_IN2=" + float_to_string_with_full_precision(iq2_info.scale));
- build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oq_info.offset));
- build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oq_info.scale));
- }
-
- // Create kernel
- _kernel = create_kernel(compile_context, "concatenate_width_x2", build_opts.options());
-
- // Configure kernel window
- Window win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration));
- ICLKernel::configure_internal(win.collapse(win, Window::DimZ));
-
- // Set output valid region
- output->set_valid_region(ValidRegion(Coordinates(), output->tensor_shape()));
- ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
-
- // Set config_id for enabling LWS tuning
- _config_id = "concatenate_width_x2_";
- _config_id += lower_string(string_from_data_type(input1->data_type()));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input1->dimension(0));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input1->dimension(1));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input2->dimension(0));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input2->dimension(1));
-}
-
-void CLWidthConcatenate2TensorsKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
-
- Window slice = window.first_slice_window_4D();
-
- const auto src0 = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_VEC));
- const auto src1 = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_VEC + 1));
- auto dst = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(TensorType::ACL_DST));
-
- do
- {
- unsigned int idx = 0;
- add_4D_tensor_argument(idx, src0, slice);
- add_4D_tensor_argument(idx, src1, slice);
- add_4D_tensor_argument(idx, dst, slice);
- enqueue(queue, *this, window, lws_hint());
- }
- while(window.slide_window_slice_4D(slice));
-}
-} // namespace arm_compute
+++ /dev/null
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef ARM_COMPUTE_CLWIDTHCONCATENATE_2TENSORS_KERNEL_H
-#define ARM_COMPUTE_CLWIDTHCONCATENATE_2TENSORS_KERNEL_H
-
-#include "arm_compute/core/Types.h"
-#include "src/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-/** Interface for the width concatenate kernel of 2 tensors.
- * The input1 and input2 tensors will be concatenated into the output tensor.
- */
-class CLWidthConcatenate2TensorsKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLWidthConcatenate2TensorsKernel() = default;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLWidthConcatenate2TensorsKernel(const CLWidthConcatenate2TensorsKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLWidthConcatenate2TensorsKernel &operator=(const CLWidthConcatenate2TensorsKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLWidthConcatenate2TensorsKernel(CLWidthConcatenate2TensorsKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLWidthConcatenate2TensorsKernel &operator=(CLWidthConcatenate2TensorsKernel &&) = default;
- /** Default destructor */
- ~CLWidthConcatenate2TensorsKernel() = default;
- /** Initialise the kernel's input1s and output
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input1 First input tensor. Data types supported: All.
- * @param[in] input2 Second input tensor. Data types supported: same as @p input1
- * @param[out] output Output tensor. Data types supported: Same as @p input1.
- */
- void configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLWidthConcatenate2TensorsKernel
- *
- * @param[in] input1 First tensor info. Data types supported: All.
- * @param[in] input2 Second tensor info. Data types supported: same as @p input1
- * @param[in] output Output tensor info. Data types supported: Same as @p input1.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLWIDTHCONCATENATE_2TENSORS_KERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Utils.h"
-#include "src/core/CL/CLValidate.h"
-#include "src/core/helpers/WindowHelpers.h"
-#include "src/core/utils/helpers/tensor_info.h"
-#include "support/Cast.h"
-
-#include "support/StringSupport.h"
-
-namespace arm_compute
-{
-namespace
-{
-Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *input3, const ITensorInfo *input4, const ITensorInfo *output)
-{
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input1, input2, input3, input4, output);
- ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input1);
- ARM_COMPUTE_RETURN_ERROR_ON(input1->data_type() == DataType::UNKNOWN);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input1, input2, input3, input4, output);
- ARM_COMPUTE_RETURN_ERROR_ON(input1->dimension(0) + input2->dimension(0) + input3->dimension(0) + input4->dimension(0) > output->dimension(0));
-
- for(size_t i = 1; i < Coordinates::num_max_dimensions; ++i)
- {
- ARM_COMPUTE_RETURN_ERROR_ON(input1->dimension(i) != output->dimension(i));
- ARM_COMPUTE_RETURN_ERROR_ON(input2->dimension(i) != output->dimension(i));
- ARM_COMPUTE_RETURN_ERROR_ON(input3->dimension(i) != output->dimension(i));
- ARM_COMPUTE_RETURN_ERROR_ON(input4->dimension(i) != output->dimension(i));
- }
- ARM_COMPUTE_RETURN_ERROR_ON(input1->num_dimensions() > 4);
-
- return Status{};
-}
-} // namespace
-
-CLWidthConcatenate4TensorsKernel::CLWidthConcatenate4TensorsKernel()
-{
-}
-
-Status CLWidthConcatenate4TensorsKernel::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *input3, const ITensorInfo *input4, const ITensorInfo *output)
-{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input1, input2, input3, input4, output));
- return Status{};
-}
-
-void CLWidthConcatenate4TensorsKernel::configure(const CLCompileContext &compile_context,
- ITensorInfo *input1, ITensorInfo *input2,
- ITensorInfo *input3, ITensorInfo *input4,
- ITensorInfo *output)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, input3, input4, output);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input1, input2, input3, input4, output));
-
- auto padding_info = get_padding_info({ input1, input2, input3, input4, output });
- const unsigned int min_dimension = std::min(std::min(input1->dimension(0), input2->dimension(0)), std::min(input3->dimension(0), input4->dimension(0)));
- const unsigned int num_elems_processed_per_iteration = adjust_vec_size(8, min_dimension);
- const unsigned int vec_size_leftover = output->dimension(0) % num_elems_processed_per_iteration;
-
- // Add build options
- CLBuildOptions build_opts;
- build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input1->data_type()));
- build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration));
- build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(vec_size_leftover));
- build_opts.add_option("-DDEPTH=" + support::cpp11::to_string(input1->dimension(2)));
- build_opts.add_option("-DINPUT1_WIDTH=" + support::cpp11::to_string(input1->dimension(0)));
- build_opts.add_option("-DINPUT2_WIDTH=" + support::cpp11::to_string(input2->dimension(0)));
- build_opts.add_option("-DINPUT3_WIDTH=" + support::cpp11::to_string(input3->dimension(0)));
- build_opts.add_option("-DINPUT4_WIDTH=" + support::cpp11::to_string(input4->dimension(0)));
- build_opts.add_option("-DELEMENT_SIZE=" + support::cpp11::to_string(input1->element_size()));
- build_opts.add_option("-DINPUT1_ROTATE_N=" + support::cpp11::to_string((input1->dimension(0) - vec_size_leftover) % num_elems_processed_per_iteration));
- build_opts.add_option("-DINPUT2_ROTATE_N=" + support::cpp11::to_string((input1->dimension(0) + input2->dimension(0) - vec_size_leftover) % num_elems_processed_per_iteration));
- build_opts.add_option("-DINPUT3_ROTATE_N=" + support::cpp11::to_string((input1->dimension(0) + input2->dimension(0) + input3->dimension(0) - vec_size_leftover) % num_elems_processed_per_iteration));
-
- // If input have different quantization info set quantization parameters needed for the re-quantization process
- const bool have_different_qinfo = helpers::tensor_info::tensors_have_different_quantization_info(output, input1, input2, input3, input4);
- if(is_data_type_quantized_asymmetric(input1->data_type()) && have_different_qinfo)
- {
- const UniformQuantizationInfo iq1_info = input1->quantization_info().uniform();
- const UniformQuantizationInfo iq2_info = input2->quantization_info().uniform();
- const UniformQuantizationInfo iq3_info = input3->quantization_info().uniform();
- const UniformQuantizationInfo iq4_info = input4->quantization_info().uniform();
- const UniformQuantizationInfo oq_info = output->quantization_info().uniform();
-
- build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq1_info.offset));
- build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq1_info.scale));
- build_opts.add_option("-DOFFSET_IN2=" + float_to_string_with_full_precision(iq2_info.offset));
- build_opts.add_option("-DSCALE_IN2=" + float_to_string_with_full_precision(iq2_info.scale));
- build_opts.add_option("-DOFFSET_IN3=" + float_to_string_with_full_precision(iq3_info.offset));
- build_opts.add_option("-DSCALE_IN3=" + float_to_string_with_full_precision(iq3_info.scale));
- build_opts.add_option("-DOFFSET_IN4=" + float_to_string_with_full_precision(iq4_info.offset));
- build_opts.add_option("-DSCALE_IN4=" + float_to_string_with_full_precision(iq4_info.scale));
- build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oq_info.offset));
- build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oq_info.scale));
- }
-
- // Create kernel
- _kernel = create_kernel(compile_context, "concatenate_width_x4", build_opts.options());
-
- // Configure kernel window
- Window win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration));
- ICLKernel::configure_internal(win.collapse(win, Window::DimZ));
-
- // Set output valid region
- output->set_valid_region(ValidRegion(Coordinates(), output->tensor_shape()));
- ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
-
- // Set config_id for enabling LWS tuning
- _config_id = "concatenate_width_x4_";
- _config_id += lower_string(string_from_data_type(input1->data_type()));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input1->dimension(0));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input1->dimension(1));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input2->dimension(0));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input2->dimension(1));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input3->dimension(0));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input3->dimension(1));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input4->dimension(0));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input4->dimension(1));
-}
-
-void CLWidthConcatenate4TensorsKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
-
- const auto src0 = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_VEC));
- const auto src1 = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_VEC + 1));
- const auto src2 = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_VEC + 2));
- const auto src3 = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_VEC + 3));
- auto dst = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(TensorType::ACL_DST));
-
- Window slice = window.first_slice_window_4D();
-
- do
- {
- unsigned int idx = 0;
- add_4D_tensor_argument(idx, src0, slice);
- add_4D_tensor_argument(idx, src1, slice);
- add_4D_tensor_argument(idx, src2, slice);
- add_4D_tensor_argument(idx, src3, slice);
- add_4D_tensor_argument(idx, dst, slice);
- enqueue(queue, *this, window, lws_hint());
- }
- while(window.slide_window_slice_4D(slice));
-}
-} // namespace arm_compute
+++ /dev/null
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef ARM_COMPUTE_CLWIDTHCONCATENATE_4TENSORS_KERNEL_H
-#define ARM_COMPUTE_CLWIDTHCONCATENATE_4TENSORS_KERNEL_H
-
-#include "arm_compute/core/Types.h"
-#include "src/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-/** Interface for the width concatenate kernel of 4 tensors.
- * All input tensors will be concatenated into the output tensor.
- */
-class CLWidthConcatenate4TensorsKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLWidthConcatenate4TensorsKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLWidthConcatenate4TensorsKernel(const CLWidthConcatenate4TensorsKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLWidthConcatenate4TensorsKernel &operator=(const CLWidthConcatenate4TensorsKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLWidthConcatenate4TensorsKernel(CLWidthConcatenate4TensorsKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLWidthConcatenate4TensorsKernel &operator=(CLWidthConcatenate4TensorsKernel &&) = default;
- /** Default destructor */
- ~CLWidthConcatenate4TensorsKernel() = default;
- /** Initialise the kernel's input1s and output
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input1 First input tensor. Data types supported: All.
- * @param[in] input2 Second input tensor. Data types supported: same as @p input1
- * @param[in] input3 Third input tensor. Data types supported: same as @p input1
- * @param[in] input4 Fourth input tensor. Data types supported: same as @p input1
- * @param[out] output Output tensor. Data types supported: Same as @p input1.
- */
- void configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *input3, ITensorInfo *input4, ITensorInfo *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLWidthConcatenate4TensorsKernel
- *
- * @param[in] input1 First tensor info. Data types supported: All.
- * @param[in] input2 Second tensor info. Data types supported: same as @p input1
- * @param[in] input3 Third tensor info. Data types supported: same as @p input1
- * @param[in] input4 Fourth tensor info. Data types supported: same as @p input1
- * @param[in] output Output tensor info. Data types supported: Same as @p input1.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *input3, const ITensorInfo *input4, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLWIDTHCONCATENATE_4TENSORS_KERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/core/CL/kernels/CLWidthConcatenateLayerKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Utils.h"
-#include "src/core/CL/CLValidate.h"
-#include "src/core/helpers/WindowHelpers.h"
-#include "support/Cast.h"
-
-#include "support/StringSupport.h"
-
-namespace arm_compute
-{
-namespace
-{
-Status validate_arguments(const ITensorInfo *input, unsigned int width_offset, const ITensorInfo *output)
-{
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
- ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
-
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(0) + width_offset > output->dimension(0));
-
- for(size_t i = 1; i < Coordinates::num_max_dimensions; ++i)
- {
- ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(i) != output->dimension(i));
- }
- ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 4);
-
- return Status{};
-}
-} // namespace
-
-CLWidthConcatenateLayerKernel::CLWidthConcatenateLayerKernel()
-{
-}
-
-Status CLWidthConcatenateLayerKernel::validate(const ITensorInfo *input, unsigned int width_offset, const ITensorInfo *output)
-{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, width_offset, output));
- return Status{};
-}
-
-void CLWidthConcatenateLayerKernel::configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int width_offset, ITensorInfo *output)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input, width_offset, output));
-
- auto padding_info = get_padding_info({ input, output });
-
- const unsigned int num_elems_processed_per_iteration = adjust_vec_size(16, input->dimension(0));
-
- // Add build options
- CLBuildOptions build_opts;
- build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->data_type()));
- build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration));
- build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(input->dimension(0) % num_elems_processed_per_iteration));
- build_opts.add_option("-DWIDTH_OFFSET=" + support::cpp11::to_string(width_offset));
- build_opts.add_option("-DDEPTH=" + support::cpp11::to_string(input->dimension(2)));
-
- if(is_data_type_quantized_asymmetric(input->data_type()) && input->quantization_info() != output->quantization_info())
- {
- const UniformQuantizationInfo iqinfo = input->quantization_info().uniform();
- const UniformQuantizationInfo oqinfo = output->quantization_info().uniform();
-
- build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iqinfo.offset));
- build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oqinfo.offset));
- build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iqinfo.scale));
- build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oqinfo.scale));
- }
-
- // Create kernel
- _kernel = create_kernel(compile_context, "concatenate_width", build_opts.options());
- // Configure kernel window
- Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration));
- ICLKernel::configure_internal(win.collapse(win, Window::DimZ));
-
- // Set output valid region
- output->set_valid_region(ValidRegion(Coordinates(), output->tensor_shape()));
-
- ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
-}
-
-void CLWidthConcatenateLayerKernel::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
-
- const auto src = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC));
- auto dst = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(TensorType::ACL_DST));
-
- unsigned int idx = 0;
- add_4D_tensor_argument(idx, src, window);
- add_4D_tensor_argument(idx, dst, window);
- enqueue(queue, *this, window, lws_hint());
-}
-} // namespace arm_compute
+++ /dev/null
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef ARM_COMPUTE_CLWIDTHCONCATENATELAYERKERNEL_H
-#define ARM_COMPUTE_CLWIDTHCONCATENATELAYERKERNEL_H
-
-#include "arm_compute/core/Types.h"
-#include "src/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-/** Interface for the width concatenate kernel.
- * The input tensor will be concatenated into the output tensor.
- */
-class CLWidthConcatenateLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLWidthConcatenateLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLWidthConcatenateLayerKernel(const CLWidthConcatenateLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLWidthConcatenateLayerKernel &operator=(const CLWidthConcatenateLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLWidthConcatenateLayerKernel(CLWidthConcatenateLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLWidthConcatenateLayerKernel &operator=(CLWidthConcatenateLayerKernel &&) = default;
- /** Default destructor */
- ~CLWidthConcatenateLayerKernel() = default;
- /** Initialise the kernel's inputs and output
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input tensor. Data types supported: All.
- * @param[in] width_offset The offset on the X axis.
- * @param[in,out] output Output tensor. Data types supported: Same as @p input.
- *
- */
- void configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int width_offset, ITensorInfo *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLWidthConcatenateLayerKernel
- *
- * @param[in] input Input tensor info. Data types supported: All.
- * @param[in] width_offset The offset on the X axis.
- * @param[in] output Output tensor info. Data types supported: Same as @p input.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, unsigned int width_offset, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLWIDTHCONCATENATELAYERKERNEL_H */
--- /dev/null
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CL_COMPILE_CONTEXT_H
+#define ARM_COMPUTE_CL_COMPILE_CONTEXT_H
+
+#include "arm_compute/core/CL/CLCompileContext.h"
+
+namespace arm_compute
+{
+namespace opencl
+{
+using ClCompileContext = arm_compute::CLCompileContext;
+} // namespace opencl
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CL_COMPILE_CONTEXT_H */
--- /dev/null
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_ICL_KERNEL_H
+#define ARM_COMPUTE_ICL_KERNEL_H
+
+#include "arm_compute/core/ITensorInfo.h"
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+namespace opencl
+{
+using IClKernel = arm_compute::ICLKernel;
+} // namespace opencl
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_ICL_KERNEL_H */
--- /dev/null
+/*
+ * Copyright (c) 2019-2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/core/gpu/cl/kernels/ClBatchConcatenateKernel.h"
+
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Utils.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "support/Cast.h"
+
+#include "support/StringSupport.h"
+
+namespace arm_compute
+{
+namespace opencl
+{
+namespace kernels
+{
+namespace
+{
+Status validate_arguments(const ITensorInfo *src, unsigned int batch_offset, const ITensorInfo *dst)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
+ ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src);
+ ARM_COMPUTE_RETURN_ERROR_ON(src->data_type() == DataType::UNKNOWN);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst);
+
+ ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(Window::DimX) != dst->dimension(Window::DimX));
+ ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(Window::DimY) != dst->dimension(Window::DimY));
+ ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(Window::DimZ) != dst->dimension(Window::DimZ));
+ ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(3) + batch_offset > dst->dimension(3));
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(4, src, dst);
+
+ return Status{};
+}
+} // namespace
+
+ClBatchConcatenateKernel::ClBatchConcatenateKernel()
+ : _batch_offset(0)
+{
+}
+
+void ClBatchConcatenateKernel::configure(const CLCompileContext &compile_context, ITensorInfo *src, unsigned int batch_offset, ITensorInfo *dst)
+{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, batch_offset, dst));
+
+ auto padding_info = get_padding_info({ src, dst });
+
+ _batch_offset = batch_offset;
+
+ const unsigned int num_elems_processed_per_iteration = adjust_vec_size(16 / src->element_size(), src->dimension(0));
+
+ // Add build options
+ CLBuildOptions build_opts;
+ build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(src->data_type()));
+ build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration));
+ build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(src->dimension(0) % num_elems_processed_per_iteration));
+ if(is_data_type_quantized_asymmetric(src->data_type()) && src->quantization_info() != dst->quantization_info())
+ {
+ const UniformQuantizationInfo iq_info = src->quantization_info().uniform();
+ const UniformQuantizationInfo oq_info = dst->quantization_info().uniform();
+
+ build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq_info.offset));
+ build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oq_info.offset));
+ build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq_info.scale));
+ build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oq_info.scale));
+ }
+
+ // Create kernel
+ _kernel = create_kernel(compile_context, "concatenate", build_opts.options());
+
+ // Configure kernel window
+ auto win = calculate_max_window(*dst, Steps(num_elems_processed_per_iteration));
+ win.set(3, Window::Dimension(0, src->tensor_shape()[3], 1));
+ ICLKernel::configure_internal(win);
+
+ // Set dst valid region
+ dst->set_valid_region(ValidRegion(Coordinates(), dst->tensor_shape()));
+
+ // Set config_id for enabling LWS tuning
+ _config_id = "concatenate_";
+ _config_id += support::cpp11::to_string(3);
+ _config_id += "_";
+ _config_id += support::cpp11::to_string(batch_offset);
+ _config_id += "_";
+ _config_id += support::cpp11::to_string(src->dimension(0));
+ _config_id += "_";
+ _config_id += support::cpp11::to_string(src->dimension(1));
+ _config_id += "_";
+ _config_id += support::cpp11::to_string(src->dimension(2));
+ _config_id += "_";
+ _config_id += support::cpp11::to_string(src->dimension(3));
+
+ ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
+}
+
+Status ClBatchConcatenateKernel::validate(const arm_compute::ITensorInfo *src,
+ unsigned int batch_offset,
+ const arm_compute::ITensorInfo *dst)
+{
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, batch_offset, dst));
+ return Status{};
+}
+
+void ClBatchConcatenateKernel::run_op(ITensorPack &tensors, const Window &window, ::cl::CommandQueue &queue)
+{
+ ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+ ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
+
+ const auto src = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC));
+ auto dst = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(TensorType::ACL_DST));
+
+ Window slice = window.first_slice_window_3D();
+
+ const int offset_to_first_elements_in_bytes = _batch_offset * dst->info()->strides_in_bytes()[3];
+
+ unsigned int idx = 2 * num_arguments_per_3D_tensor(); // Skip the src and dst parameters
+ _kernel.setArg<cl_int>(idx, offset_to_first_elements_in_bytes);
+
+ do
+ {
+ unsigned int idx = 0;
+ add_3D_tensor_argument(idx, src, slice);
+ add_3D_tensor_argument(idx, dst, slice);
+ enqueue(queue, *this, slice, lws_hint());
+ }
+ while(window.slide_window_slice_3D(slice));
+}
+} // namespace opencl
+} // namespace kernels
+} // namespace arm_compute
--- /dev/null
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CL_BATCH_CONCATENATE_KERNEL_H
+#define ARM_COMPUTE_CL_BATCH_CONCATENATE_KERNEL_H
+
+#include "src/core/common/Macros.h"
+#include "src/core/gpu/cl/ClCompileContext.h"
+#include "src/core/gpu/cl/IClKernel.h"
+
+namespace arm_compute
+{
+namespace opencl
+{
+namespace kernels
+{
+/** Interface for the batch concatenate kernel.
+ * The src tensor will be concatenated into the destination tensor.
+ */
+class ClBatchConcatenateKernel : public IClKernel
+{
+public:
+ /** Default constructor */
+ ClBatchConcatenateKernel();
+ ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClBatchConcatenateKernel);
+ /** Initialise the kernel's source and destination
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] src Source tensor. Data types supported: All.
+ * @param[in] batch_offset The offset on axis # 3.
+ * @param[in,out] dst Destination tensor. Data types supported: Same as @p src.
+ *
+ * @note: The dst tensor's low two dimensions can't be smaller than the src one's.
+ * @note: The gaps between the two lowest dimensions of src and dst need to be divisible by 2.
+ *
+ */
+ void configure(const CLCompileContext &compile_context, ITensorInfo *src, unsigned int batch_offset, ITensorInfo *dst);
+ /** Static function to check if given info will lead to a valid configuration of @ref ClBatchConcatenateKernel
+ *
+ * @param[in] src Input tensor info. Data types supported: All.
+ * @param[in] batch_offset The offset on axis # 3.
+ * @param[in] dst Destination tensor info. Data types supported: Same as @p src.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *src, unsigned int batch_offset, const ITensorInfo *dst);
+
+ // Inherited methods overridden:
+ void run_op(ITensorPack &tensors, const Window &window, ::cl::CommandQueue &queue) override;
+
+private:
+ unsigned int _batch_offset;
+};
+} // namespace kernels
+} // namespace opencl
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CL_BATCH_CONCATENATE_KERNEL_H */
--- /dev/null
+/*
+ * Copyright (c) 2017-2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/core/gpu/cl/kernels/ClDepthConcatenateKernel.h"
+
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Utils.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "support/Cast.h"
+
+#include "support/StringSupport.h"
+
+namespace arm_compute
+{
+namespace opencl
+{
+namespace kernels
+{
+namespace
+{
+Status validate_arguments(const ITensorInfo *src, unsigned int depth_offset, const ITensorInfo *dst)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
+ ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst);
+
+ ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(Window::DimX) != dst->dimension(Window::DimX));
+ ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(Window::DimY) != dst->dimension(Window::DimY));
+ ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(2) + depth_offset > dst->dimension(2));
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(3, src, dst);
+
+ return Status{};
+}
+} // namespace
+
+ClDepthConcatenateKernel::ClDepthConcatenateKernel()
+ : _depth_offset(0)
+{
+}
+
+void ClDepthConcatenateKernel::configure(const CLCompileContext &compile_context, ITensorInfo *src, unsigned int depth_offset, ITensorInfo *dst)
+{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, depth_offset, dst));
+
+ auto padding_info = get_padding_info({ src, dst });
+
+ _depth_offset = depth_offset;
+
+ const unsigned int num_elems_processed_per_iteration = adjust_vec_size(16 / src->element_size(), src->dimension(0));
+
+ // Add build options
+ CLBuildOptions build_opts;
+ build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(src->data_type()));
+ build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration));
+ build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(src->dimension(0) % num_elems_processed_per_iteration));
+ if(is_data_type_quantized_asymmetric(src->data_type()) && src->quantization_info() != dst->quantization_info())
+ {
+ const UniformQuantizationInfo iq_info = src->quantization_info().uniform();
+ const UniformQuantizationInfo oq_info = dst->quantization_info().uniform();
+
+ build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq_info.offset));
+ build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oq_info.offset));
+ build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq_info.scale));
+ build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oq_info.scale));
+ }
+
+ // Create kernel
+ _kernel = create_kernel(compile_context, "concatenate", build_opts.options());
+
+ // Configure kernel window
+ auto win = calculate_max_window(*dst, Steps(num_elems_processed_per_iteration));
+ win.set(Window::DimZ, Window::Dimension(0, src->tensor_shape().z(), 1));
+ ICLKernel::configure_internal(win);
+
+ // Set dst valid region
+ dst->set_valid_region(ValidRegion(Coordinates(), dst->tensor_shape()));
+
+ ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
+}
+
+Status ClDepthConcatenateKernel::validate(const arm_compute::ITensorInfo *src,
+ unsigned int depth_offset,
+ const arm_compute::ITensorInfo *dst)
+{
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, depth_offset, dst));
+ return Status{};
+}
+
+void ClDepthConcatenateKernel::run_op(ITensorPack &tensors, const Window &window, ::cl::CommandQueue &queue)
+{
+ ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+ ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
+
+ const auto src = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC));
+ auto dst = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(TensorType::ACL_DST));
+
+ Window slice = window.first_slice_window_3D();
+
+ const int offset_to_first_elements_in_bytes = _depth_offset * dst->info()->strides_in_bytes()[2];
+
+ unsigned int idx = 2 * num_arguments_per_3D_tensor(); // Skip the src and dst parameters
+ _kernel.setArg<cl_int>(idx, offset_to_first_elements_in_bytes);
+
+ do
+ {
+ unsigned int idx = 0;
+ add_3D_tensor_argument(idx, src, slice);
+ add_3D_tensor_argument(idx, dst, slice);
+ enqueue(queue, *this, slice, lws_hint());
+ }
+ while(window.slide_window_slice_3D(slice));
+}
+} // namespace kernels
+} // namespace opencl
+} // namespace arm_compute
--- /dev/null
+/*
+ * Copyright (c) 2017-2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CL_DEPTH_CONCATENATE_KERNEL_H
+#define ARM_COMPUTE_CL_DEPTH_CONCATENATE_KERNEL_H
+
+#include "src/core/common/Macros.h"
+#include "src/core/gpu/cl/ClCompileContext.h"
+#include "src/core/gpu/cl/IClKernel.h"
+
+namespace arm_compute
+{
+namespace opencl
+{
+namespace kernels
+{
+/** Interface for the depth concatenate kernel.
+ * The src tensor will be concatenated into the dst tensor.
+ */
+class ClDepthConcatenateKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ ClDepthConcatenateKernel();
+ ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClDepthConcatenateKernel);
+ /** Initialise the kernel's source and destination
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] src Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] depth_offset The offset on the Z axis.
+ * @param[in,out] dst Destination tensor. Data types supported: Same as @p src.
+ *
+ * @note: The dst tensor's low two dimensions can't be smaller than the src one's.
+ * @note: The gaps between the two lowest dimensions of src and dst need to be divisible by 2.
+ *
+ */
+ void configure(const CLCompileContext &compile_context, ITensorInfo *src, unsigned int depth_offset, ITensorInfo *dst);
+ /** Static function to check if given info will lead to a valid configuration of @ref ClDepthConcatenateKernel
+ *
+ * @param[in] src Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
+ * @param[in] depth_offset The offset on the Z axis.
+ * @param[in] dst Destination tensor info. Data types supported: Same as @p src.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *src, unsigned int depth_offset, const ITensorInfo *dst);
+
+ // Inherited methods overridden:
+ void run_op(ITensorPack &tensors, const Window &window, ::cl::CommandQueue &queue) override;
+
+private:
+ unsigned int _depth_offset;
+};
+} // namespace kernels
+} // namespace opencl
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CL_DEPTH_CONCATENATE_KERNEL_H */
--- /dev/null
+/*
+ * Copyright (c) 2019-2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/core/gpu/cl/kernels/ClHeightConcatenateKernel.h"
+
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "support/Cast.h"
+
+#include "support/StringSupport.h"
+
+namespace arm_compute
+{
+namespace opencl
+{
+namespace kernels
+{
+namespace
+{
+Status validate_arguments(const ITensorInfo *src, unsigned int height_offset, const ITensorInfo *dst)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
+ ARM_COMPUTE_RETURN_ERROR_ON(src->data_type() == DataType::UNKNOWN);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst);
+ ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(Window::DimY) + height_offset > dst->dimension(Window::DimY));
+
+ ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(0) != dst->dimension(0));
+ for(size_t i = 2; i < Coordinates::num_max_dimensions; ++i)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(i) != dst->dimension(i));
+ }
+ ARM_COMPUTE_RETURN_ERROR_ON(src->num_dimensions() > 4);
+
+ return Status{};
+}
+} // namespace
+
+ClHeightConcatenateKernel::ClHeightConcatenateKernel()
+ : _height_offset(0)
+{
+}
+
+Status ClHeightConcatenateKernel::validate(const ITensorInfo *src, unsigned int height_offset, const ITensorInfo *dst)
+{
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, height_offset, dst));
+ return Status{};
+}
+
+void ClHeightConcatenateKernel::configure(const CLCompileContext &compile_context, ITensorInfo *src, unsigned int height_offset, ITensorInfo *dst)
+{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, height_offset, dst));
+
+ auto padding_info = get_padding_info({ src, dst });
+
+ _height_offset = height_offset;
+
+ // Add build options
+ const unsigned int num_elems_processed_per_iteration = adjust_vec_size(4, src->dimension(0));
+
+ CLBuildOptions build_opts;
+ build_opts.add_option("-DDATA_TYPE=" + get_cl_unsigned_type_from_element_size(src->element_size()));
+ build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration));
+ build_opts.add_option("-DHEIGHT_OFFSET=" + support::cpp11::to_string(_height_offset));
+ build_opts.add_option("-DDEPTH=" + support::cpp11::to_string(src->dimension(2)));
+ build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(src->dimension(0) % num_elems_processed_per_iteration));
+
+ if(is_data_type_quantized_asymmetric(src->data_type()) && src->quantization_info() != dst->quantization_info())
+ {
+ const UniformQuantizationInfo iq_info = src->quantization_info().uniform();
+ const UniformQuantizationInfo oq_info = dst->quantization_info().uniform();
+
+ build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq_info.offset));
+ build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oq_info.offset));
+ build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq_info.scale));
+ build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oq_info.scale));
+ }
+
+ // Create kernel
+ _kernel = create_kernel(compile_context, "concatenate_height", build_opts.options());
+ // Configure kernel window
+
+ // The window needs to be based on src as we copy all the heights of src
+ Window win = calculate_max_window(*src, Steps(num_elems_processed_per_iteration));
+ ICLKernel::configure_internal(win.collapse(win, Window::DimZ));
+
+ // Set dst valid region
+ dst->set_valid_region(ValidRegion(Coordinates(), dst->tensor_shape()));
+
+ ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
+}
+
+void ClHeightConcatenateKernel::run_op(ITensorPack &tensors, const Window &window, ::cl::CommandQueue &queue)
+{
+ ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+ ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
+
+ const auto src = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC));
+ auto dst = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(TensorType::ACL_DST));
+
+ unsigned int idx = 0;
+ add_4D_tensor_argument(idx, src, window);
+ add_4D_tensor_argument(idx, dst, window);
+ enqueue(queue, *this, window, lws_hint());
+}
+} // namespace kernels
+} // namespace opencl
+} // namespace arm_compute
--- /dev/null
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CL_HEIGHT_CONCATENATE_LAYER_KERNEL_H
+#define ARM_COMPUTE_CL_HEIGHT_CONCATENATE_LAYER_KERNEL_H
+
+#include "src/core/common/Macros.h"
+#include "src/core/gpu/cl/ClCompileContext.h"
+#include "src/core/gpu/cl/IClKernel.h"
+
+namespace arm_compute
+{
+namespace opencl
+{
+namespace kernels
+{
+/** Interface for the height concatenate kernel.
+ * The source tensor will be concatenated into the destination tensor.
+ */
+class ClHeightConcatenateKernel : public IClKernel
+{
+public:
+ /** Default constructor */
+ ClHeightConcatenateKernel();
+ ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClHeightConcatenateKernel);
+ /** Initialise the kernel's source and destination
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] src Source tensor. Data types supported: All.
+ * @param[in] height_offset The starting offset on the Y axis for the dst tensor.
+ * @param[out] dst Destination tensor. Data types supported: same as @p src.
+ *
+ */
+ void configure(const CLCompileContext &compile_context, ITensorInfo *src, unsigned int height_offset, ITensorInfo *dst);
+ /** Static function to check if given info will lead to a valid configuration of @ref ClHeightConcatenateKernel
+ *
+ * @param[in] src Source tensor info. Data types supported: All.
+ * @param[in] height_offset The starting offset on the Y axis for the dst tensor.
+ * @param[in] dst Destination tensor info. Data types supported: same as @p src.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *src, unsigned int height_offset, const ITensorInfo *dst);
+
+ // Inherited methods overridden:
+ void run_op(ITensorPack &tensors, const Window &window, ::cl::CommandQueue &queue) override;
+
+private:
+ unsigned int _height_offset;
+};
+} // namespace kernels
+} // namespace opencl
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CL_HEIGHT_CONCATENATE_LAYER_KERNEL_H */
--- /dev/null
+/*
+ * Copyright (c) 2018-2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.h"
+
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/utils/helpers/tensor_info.h"
+#include "support/Cast.h"
+
+#include "support/StringSupport.h"
+
+namespace arm_compute
+{
+namespace opencl
+{
+namespace kernels
+{
+namespace
+{
+Status validate_arguments(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src1, src2, dst);
+ ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src1);
+ ARM_COMPUTE_RETURN_ERROR_ON(src1->data_type() == DataType::UNKNOWN);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src1, src2, dst);
+ ARM_COMPUTE_RETURN_ERROR_ON(src1->dimension(0) + src2->dimension(0) > dst->dimension(0));
+
+ for(size_t i = 1; i < Coordinates::num_max_dimensions; ++i)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON(src1->dimension(i) != dst->dimension(i));
+ ARM_COMPUTE_RETURN_ERROR_ON(src2->dimension(i) != dst->dimension(i));
+ }
+ ARM_COMPUTE_RETURN_ERROR_ON(src1->num_dimensions() > 4);
+
+ return Status{};
+}
+} // namespace
+
+Status ClWidthConcatenate2TensorsKernel::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst)
+{
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src1, src2, dst));
+ return Status{};
+}
+
+void ClWidthConcatenate2TensorsKernel::configure(const CLCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst)
+{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(src1, src2, dst);
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src1, src2, dst));
+
+ auto padding_info = get_padding_info({ src1, src2, dst });
+
+ const unsigned int min_dimension = std::min(src1->dimension(0), src2->dimension(0));
+ const unsigned int num_elems_processed_per_iteration = adjust_vec_size(8, min_dimension);
+ const unsigned int vec_size_leftover = dst->dimension(0) % num_elems_processed_per_iteration;
+
+ // Add build options
+ CLBuildOptions build_opts;
+ build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(src1->data_type()));
+ build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration));
+ build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(vec_size_leftover));
+ build_opts.add_option("-DDEPTH=" + support::cpp11::to_string(src1->dimension(2)));
+ build_opts.add_option("-DINPUT1_WIDTH=" + support::cpp11::to_string(src1->dimension(0)));
+ build_opts.add_option("-DINPUT2_WIDTH=" + support::cpp11::to_string(src2->dimension(0)));
+ build_opts.add_option("-DELEMENT_SIZE=" + support::cpp11::to_string(src1->element_size()));
+ build_opts.add_option("-DINPUT1_ROTATE_N=" + support::cpp11::to_string((src1->dimension(0) - vec_size_leftover) % num_elems_processed_per_iteration));
+
+ // If input have different quantization info set quantization parameters needed for the re-quantization process
+ const bool have_different_qinfo = helpers::tensor_info::tensors_have_different_quantization_info(dst, src1, src2);
+ if(is_data_type_quantized_asymmetric(src1->data_type()) && have_different_qinfo)
+ {
+ const UniformQuantizationInfo iq1_info = src1->quantization_info().uniform();
+ const UniformQuantizationInfo iq2_info = src2->quantization_info().uniform();
+ const UniformQuantizationInfo oq_info = dst->quantization_info().uniform();
+
+ build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq1_info.offset));
+ build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq1_info.scale));
+ build_opts.add_option("-DOFFSET_IN2=" + float_to_string_with_full_precision(iq2_info.offset));
+ build_opts.add_option("-DSCALE_IN2=" + float_to_string_with_full_precision(iq2_info.scale));
+ build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oq_info.offset));
+ build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oq_info.scale));
+ }
+
+ // Create kernel
+ _kernel = create_kernel(compile_context, "concatenate_width_x2", build_opts.options());
+
+ // Configure kernel window
+ Window win = calculate_max_window(*dst, Steps(num_elems_processed_per_iteration));
+ ICLKernel::configure_internal(win.collapse(win, Window::DimZ));
+
+ // Set dst valid region
+ dst->set_valid_region(ValidRegion(Coordinates(), dst->tensor_shape()));
+ ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
+
+ // Set config_id for enabling LWS tuning
+ _config_id = "concatenate_width_x2_";
+ _config_id += lower_string(string_from_data_type(src1->data_type()));
+ _config_id += "_";
+ _config_id += support::cpp11::to_string(src1->dimension(0));
+ _config_id += "_";
+ _config_id += support::cpp11::to_string(src1->dimension(1));
+ _config_id += "_";
+ _config_id += support::cpp11::to_string(src2->dimension(0));
+ _config_id += "_";
+ _config_id += support::cpp11::to_string(src2->dimension(1));
+}
+
+void ClWidthConcatenate2TensorsKernel::run_op(ITensorPack &tensors, const Window &window, ::cl::CommandQueue &queue)
+{
+ ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+ ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
+
+ Window slice = window.first_slice_window_4D();
+
+ const auto src0 = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_VEC));
+ const auto src1 = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_VEC + 1));
+ auto dst = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(TensorType::ACL_DST));
+
+ do
+ {
+ unsigned int idx = 0;
+ add_4D_tensor_argument(idx, src0, slice);
+ add_4D_tensor_argument(idx, src1, slice);
+ add_4D_tensor_argument(idx, dst, slice);
+ enqueue(queue, *this, window, lws_hint());
+ }
+ while(window.slide_window_slice_4D(slice));
+}
+} // namespace kernels
+} // namespace opencl
+} // namespace arm_compute
--- /dev/null
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CL_WIDTHCONCATENATE_2TENSORS_KERNEL_H
+#define ARM_COMPUTE_CL_WIDTHCONCATENATE_2TENSORS_KERNEL_H
+
+#include "src/core/common/Macros.h"
+#include "src/core/gpu/cl/ClCompileContext.h"
+#include "src/core/gpu/cl/IClKernel.h"
+
+namespace arm_compute
+{
+namespace opencl
+{
+namespace kernels
+{
+/** Interface for the width concatenate kernel of 2 tensors.
+ * The src1 and src2 tensors will be concatenated into the dst tensor.
+ */
+class ClWidthConcatenate2TensorsKernel : public IClKernel
+{
+public:
+ /** Default constructor */
+ ClWidthConcatenate2TensorsKernel() = default;
+ ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClWidthConcatenate2TensorsKernel);
+ /** Initialise the kernel's sources and destination
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] src1 First source tensor. Data types supported: All.
+ * @param[in] src2 Second source tensor. Data types supported: same as @p src1
+ * @param[out] dst Destination tensor. Data types supported: Same as @p src1.
+ */
+ void configure(const CLCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst);
+ /** Static function to check if given info will lead to a valid configuration of @ref ClWidthConcatenate2TensorsKernel
+ *
+ * @param[in] src1 First tensor info. Data types supported: All.
+ * @param[in] src2 Second tensor info. Data types supported: same as @p src1
+ * @param[in] dst Destination tensor info. Data types supported: Same as @p src1.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst);
+
+ // Inherited methods overridden:
+ void run_op(ITensorPack &tensors, const Window &window, ::cl::CommandQueue &queue) override;
+};
+} // namespace kernels
+} // namespace opencl
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CL_WIDTH_CONCATENATE_2TENSORS_KERNEL_H */
--- /dev/null
+/*
+ * Copyright (c) 2018-2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/core/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.h"
+
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Utils.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/utils/helpers/tensor_info.h"
+#include "support/Cast.h"
+
+#include "support/StringSupport.h"
+
+namespace arm_compute
+{
+namespace opencl
+{
+namespace kernels
+{
+namespace
+{
+Status validate_arguments(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *src3, const ITensorInfo *src4, const ITensorInfo *dst)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src1, src2, src3, src4, dst);
+ ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src1);
+ ARM_COMPUTE_RETURN_ERROR_ON(src1->data_type() == DataType::UNKNOWN);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src1, src2, src3, src4, dst);
+ ARM_COMPUTE_RETURN_ERROR_ON(src1->dimension(0) + src2->dimension(0) + src3->dimension(0) + src4->dimension(0) > dst->dimension(0));
+
+ for(size_t i = 1; i < Coordinates::num_max_dimensions; ++i)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON(src1->dimension(i) != dst->dimension(i));
+ ARM_COMPUTE_RETURN_ERROR_ON(src2->dimension(i) != dst->dimension(i));
+ ARM_COMPUTE_RETURN_ERROR_ON(src3->dimension(i) != dst->dimension(i));
+ ARM_COMPUTE_RETURN_ERROR_ON(src4->dimension(i) != dst->dimension(i));
+ }
+ ARM_COMPUTE_RETURN_ERROR_ON(src1->num_dimensions() > 4);
+
+ return Status{};
+}
+} // namespace
+
+ClWidthConcatenate4TensorsKernel::ClWidthConcatenate4TensorsKernel()
+{
+}
+
+Status ClWidthConcatenate4TensorsKernel::validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *src3, const ITensorInfo *src4, const ITensorInfo *dst)
+{
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src1, src2, src3, src4, dst));
+ return Status{};
+}
+
+void ClWidthConcatenate4TensorsKernel::configure(const CLCompileContext &compile_context,
+ ITensorInfo *src1, ITensorInfo *src2,
+ ITensorInfo *src3, ITensorInfo *src4,
+ ITensorInfo *dst)
+{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(src1, src2, src3, src4, dst);
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src1, src2, src3, src4, dst));
+
+ auto padding_info = get_padding_info({ src1, src2, src3, src4, dst });
+ const unsigned int min_dimension = std::min(std::min(src1->dimension(0), src2->dimension(0)), std::min(src3->dimension(0), src4->dimension(0)));
+ const unsigned int num_elems_processed_per_iteration = adjust_vec_size(8, min_dimension);
+ const unsigned int vec_size_leftover = dst->dimension(0) % num_elems_processed_per_iteration;
+
+ // Add build options
+ CLBuildOptions build_opts;
+ build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(src1->data_type()));
+ build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration));
+ build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(vec_size_leftover));
+ build_opts.add_option("-DDEPTH=" + support::cpp11::to_string(src1->dimension(2)));
+ build_opts.add_option("-DINPUT1_WIDTH=" + support::cpp11::to_string(src1->dimension(0)));
+ build_opts.add_option("-DINPUT2_WIDTH=" + support::cpp11::to_string(src2->dimension(0)));
+ build_opts.add_option("-DINPUT3_WIDTH=" + support::cpp11::to_string(src3->dimension(0)));
+ build_opts.add_option("-DINPUT4_WIDTH=" + support::cpp11::to_string(src4->dimension(0)));
+ build_opts.add_option("-DELEMENT_SIZE=" + support::cpp11::to_string(src1->element_size()));
+ build_opts.add_option("-DINPUT1_ROTATE_N=" + support::cpp11::to_string((src1->dimension(0) - vec_size_leftover) % num_elems_processed_per_iteration));
+ build_opts.add_option("-DINPUT2_ROTATE_N=" + support::cpp11::to_string((src1->dimension(0) + src2->dimension(0) - vec_size_leftover) % num_elems_processed_per_iteration));
+ build_opts.add_option("-DINPUT3_ROTATE_N=" + support::cpp11::to_string((src1->dimension(0) + src2->dimension(0) + src3->dimension(0) - vec_size_leftover) % num_elems_processed_per_iteration));
+
+ // If soources have different quantization info set quantization parameters needed for the re-quantization process
+ const bool have_different_qinfo = helpers::tensor_info::tensors_have_different_quantization_info(dst, src1, src2, src3, src4);
+ if(is_data_type_quantized_asymmetric(src1->data_type()) && have_different_qinfo)
+ {
+ const UniformQuantizationInfo iq1_info = src1->quantization_info().uniform();
+ const UniformQuantizationInfo iq2_info = src2->quantization_info().uniform();
+ const UniformQuantizationInfo iq3_info = src3->quantization_info().uniform();
+ const UniformQuantizationInfo iq4_info = src4->quantization_info().uniform();
+ const UniformQuantizationInfo oq_info = dst->quantization_info().uniform();
+
+ build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq1_info.offset));
+ build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq1_info.scale));
+ build_opts.add_option("-DOFFSET_IN2=" + float_to_string_with_full_precision(iq2_info.offset));
+ build_opts.add_option("-DSCALE_IN2=" + float_to_string_with_full_precision(iq2_info.scale));
+ build_opts.add_option("-DOFFSET_IN3=" + float_to_string_with_full_precision(iq3_info.offset));
+ build_opts.add_option("-DSCALE_IN3=" + float_to_string_with_full_precision(iq3_info.scale));
+ build_opts.add_option("-DOFFSET_IN4=" + float_to_string_with_full_precision(iq4_info.offset));
+ build_opts.add_option("-DSCALE_IN4=" + float_to_string_with_full_precision(iq4_info.scale));
+ build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oq_info.offset));
+ build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oq_info.scale));
+ }
+
+ // Create kernel
+ _kernel = create_kernel(compile_context, "concatenate_width_x4", build_opts.options());
+
+ // Configure kernel window
+ Window win = calculate_max_window(*dst, Steps(num_elems_processed_per_iteration));
+ ICLKernel::configure_internal(win.collapse(win, Window::DimZ));
+
+ // Set dst valid region
+ dst->set_valid_region(ValidRegion(Coordinates(), dst->tensor_shape()));
+ ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
+
+ // Set config_id for enabling LWS tuning
+ _config_id = "concatenate_width_x4_";
+ _config_id += lower_string(string_from_data_type(src1->data_type()));
+ _config_id += "_";
+ _config_id += support::cpp11::to_string(src1->dimension(0));
+ _config_id += "_";
+ _config_id += support::cpp11::to_string(src1->dimension(1));
+ _config_id += "_";
+ _config_id += support::cpp11::to_string(src2->dimension(0));
+ _config_id += "_";
+ _config_id += support::cpp11::to_string(src2->dimension(1));
+ _config_id += "_";
+ _config_id += support::cpp11::to_string(src3->dimension(0));
+ _config_id += "_";
+ _config_id += support::cpp11::to_string(src3->dimension(1));
+ _config_id += "_";
+ _config_id += support::cpp11::to_string(src4->dimension(0));
+ _config_id += "_";
+ _config_id += support::cpp11::to_string(src4->dimension(1));
+}
+
+void ClWidthConcatenate4TensorsKernel::run_op(ITensorPack &tensors, const Window &window, ::cl::CommandQueue &queue)
+{
+ ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+ ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
+
+ const auto src0 = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_VEC));
+ const auto src1 = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_VEC + 1));
+ const auto src2 = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_VEC + 2));
+ const auto src3 = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_VEC + 3));
+ auto dst = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(TensorType::ACL_DST));
+
+ Window slice = window.first_slice_window_4D();
+
+ do
+ {
+ unsigned int idx = 0;
+ add_4D_tensor_argument(idx, src0, slice);
+ add_4D_tensor_argument(idx, src1, slice);
+ add_4D_tensor_argument(idx, src2, slice);
+ add_4D_tensor_argument(idx, src3, slice);
+ add_4D_tensor_argument(idx, dst, slice);
+ enqueue(queue, *this, window, lws_hint());
+ }
+ while(window.slide_window_slice_4D(slice));
+}
+} // namespace kernels
+} // namespace opencl
+} // namespace arm_compute
--- /dev/null
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ARM_COMPUTE_CL_WIDTH_CONCATENATE_4TENSORS_KERNEL_H
+#define ARM_COMPUTE_CL_WIDTH_CONCATENATE_4TENSORS_KERNEL_H
+
+#include "src/core/common/Macros.h"
+#include "src/core/gpu/cl/ClCompileContext.h"
+#include "src/core/gpu/cl/IClKernel.h"
+
+namespace arm_compute
+{
+namespace opencl
+{
+namespace kernels
+{
+/** Interface for the width concatenate kernel of 4 tensors.
+ * All source tensors will be concatenated into the destination tensor.
+ */
+class ClWidthConcatenate4TensorsKernel : public IClKernel
+{
+public:
+ /** Default constructor */
+ ClWidthConcatenate4TensorsKernel();
+ ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClWidthConcatenate4TensorsKernel);
+ /** Initialise the kernel's sources and destination
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] src1 First source tensor. Data types supported: All.
+ * @param[in] src2 Second source tensor. Data types supported: same as @p src1
+ * @param[in] src3 Third source tensor. Data types supported: same as @p src1
+ * @param[in] src4 Fourth source tensor. Data types supported: same as @p src1
+ * @param[out] dst Destination tensor. Data types supported: same as @p src1.
+ */
+ void configure(const CLCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *src3, ITensorInfo *src4, ITensorInfo *dst);
+ /** Static function to check if given info will lead to a valid configuration of @ref ClWidthConcatenate4TensorsKernel
+ *
+ * @param[in] src1 First tensor info. Data types supported: All.
+ * @param[in] src2 Second tensor info. Data types supported: same as @p src1
+ * @param[in] src3 Third tensor info. Data types supported: same as @p src1
+ * @param[in] src4 Fourth tensor info. Data types supported: same as @p src1
+ * @param[in] dst Destination tensor info. Data types supported: same as @p src1.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *src3, const ITensorInfo *src4, const ITensorInfo *dst);
+
+ // Inherited methods overridden:
+ void run_op(ITensorPack &tensors, const Window &window, ::cl::CommandQueue &queue) override;
+};
+} // namespace kernels
+} // namespace opencl
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CL_WIDTH_CONCATENATE_4TENSORS_KERNEL_H */
--- /dev/null
+/*
+ * Copyright (c) 2018-2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/core/gpu/cl/kernels/ClWidthConcatenateKernel.h"
+
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Utils.h"
+#include "src/core/CL/CLValidate.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "support/Cast.h"
+
+#include "support/StringSupport.h"
+
+namespace arm_compute
+{
+namespace opencl
+{
+namespace kernels
+{
+namespace
+{
+Status validate_arguments(const ITensorInfo *src, unsigned int width_offset, const ITensorInfo *dst)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
+ ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src);
+ ARM_COMPUTE_RETURN_ERROR_ON(src->data_type() == DataType::UNKNOWN);
+
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst);
+ ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(0) + width_offset > dst->dimension(0));
+
+ for(size_t i = 1; i < Coordinates::num_max_dimensions; ++i)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON(src->dimension(i) != dst->dimension(i));
+ }
+ ARM_COMPUTE_RETURN_ERROR_ON(src->num_dimensions() > 4);
+
+ return Status{};
+}
+} // namespace
+
+ClWidthConcatenateKernel::ClWidthConcatenateKernel()
+{
+}
+
+Status ClWidthConcatenateKernel::validate(const ITensorInfo *src, unsigned int width_offset, const ITensorInfo *dst)
+{
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, width_offset, dst));
+ return Status{};
+}
+
+void ClWidthConcatenateKernel::configure(const CLCompileContext &compile_context, ITensorInfo *src, unsigned int width_offset, ITensorInfo *dst)
+{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, width_offset, dst));
+
+ auto padding_info = get_padding_info({ src, dst });
+
+ const unsigned int num_elems_processed_per_iteration = adjust_vec_size(16, src->dimension(0));
+
+ // Add build options
+ CLBuildOptions build_opts;
+ build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(src->data_type()));
+ build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration));
+ build_opts.add_option("-DVEC_SIZE_LEFTOVER=" + support::cpp11::to_string(src->dimension(0) % num_elems_processed_per_iteration));
+ build_opts.add_option("-DWIDTH_OFFSET=" + support::cpp11::to_string(width_offset));
+ build_opts.add_option("-DDEPTH=" + support::cpp11::to_string(src->dimension(2)));
+
+ if(is_data_type_quantized_asymmetric(src->data_type()) && src->quantization_info() != dst->quantization_info())
+ {
+ const UniformQuantizationInfo iqinfo = src->quantization_info().uniform();
+ const UniformQuantizationInfo oqinfo = dst->quantization_info().uniform();
+
+ build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iqinfo.offset));
+ build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oqinfo.offset));
+ build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iqinfo.scale));
+ build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oqinfo.scale));
+ }
+
+ // Create kernel
+ _kernel = create_kernel(compile_context, "concatenate_width", build_opts.options());
+ // Configure kernel window
+ Window win = calculate_max_window(*src, Steps(num_elems_processed_per_iteration));
+ ICLKernel::configure_internal(win.collapse(win, Window::DimZ));
+
+ // Set dst valid region
+ dst->set_valid_region(ValidRegion(Coordinates(), dst->tensor_shape()));
+
+ ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info));
+}
+
+void ClWidthConcatenateKernel::run_op(ITensorPack &tensors, const Window &window, ::cl::CommandQueue &queue)
+{
+ ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+ ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
+
+ const auto src = utils::cast::polymorphic_downcast<const ICLTensor *>(tensors.get_const_tensor(TensorType::ACL_SRC));
+ auto dst = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(TensorType::ACL_DST));
+
+ unsigned int idx = 0;
+ add_4D_tensor_argument(idx, src, window);
+ add_4D_tensor_argument(idx, dst, window);
+ enqueue(queue, *this, window, lws_hint());
+}
+} // namespace kernels
+} // namespace opencl
+} // namespace arm_compute
--- /dev/null
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CL_WIDTH_CONCATENATE_LAYER_KERNEL_H
+#define ARM_COMPUTE_CL_WIDTH_CONCATENATE_LAYER_KERNEL_H
+
+#include "src/core/common/Macros.h"
+#include "src/core/gpu/cl/ClCompileContext.h"
+#include "src/core/gpu/cl/IClKernel.h"
+
+namespace arm_compute
+{
+namespace opencl
+{
+namespace kernels
+{
+/** Interface for the width concatenate kernel.
+ * The source tensor will be concatenated into the destination tensor.
+ */
+class ClWidthConcatenateKernel : public IClKernel
+{
+public:
+ /** Default constructor */
+ ClWidthConcatenateKernel();
+ ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(ClWidthConcatenateKernel);
+ /** Initialise the kernel's source and destination
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] src Source tensor. Data types supported: All.
+ * @param[in] width_offset The offset on the X axis.
+ * @param[in,out] dst Destination tensor. Data types supported: same as @p src.
+ *
+ */
+ void configure(const CLCompileContext &compile_context, ITensorInfo *src, unsigned int width_offset, ITensorInfo *dst);
+ /** Static function to check if given info will lead to a valid configuration of @ref ClWidthConcatenateKernel
+ *
+ * @param[in] src Source tensor info. Data types supported: All.
+ * @param[in] width_offset The offset on the X axis.
+ * @param[in] dst Destination tensor info. Data types supported: same as @p src.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *src, unsigned int width_offset, const ITensorInfo *dst);
+
+ // Inherited methods overridden:
+ void run_op(ITensorPack &tensors, const Window &window, ::cl::CommandQueue &queue) override;
+};
+} // namespace kernels
+} // namespace opencl
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CL_WIDTH_CONCATENATE_LAYER_KERNEL_H */
/*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
*/
#include "arm_compute/runtime/CL/functions/CLConcatenateLayer.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "src/core/CL/kernels/CLDepthConcatenateLayerKernel.h"
-#include "src/core/CL/kernels/CLHeightConcatenateLayerKernel.h"
-#include "src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h"
-#include "src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h"
-#include "src/core/CL/kernels/CLWidthConcatenateLayerKernel.h"
-
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
-#include "src/core/CL/kernels/CLBatchConcatenateLayerKernel.h"
-#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/CL/ICLKernel.h"
+#include "src/runtime/gpu/cl/operators/ClConcatenate.h"
namespace arm_compute
{
-namespace experimental
-{
-CLConcatenation::CLConcatenation()
- : _concat_kernels(),
- _num_inputs(0),
- _axis(Window::DimX)
-{
-}
-
-void CLConcatenation::configure(const CLCompileContext &compile_context, const std::vector<ITensorInfo *> &inputs_vector, ITensorInfo *output, size_t axis)
-{
- ARM_COMPUTE_ERROR_ON(output == nullptr);
- _axis = axis;
- _num_inputs = inputs_vector.size();
-
- TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(inputs_vector, _axis);
- std::vector<const ITensorInfo *> const_inputs_vector(inputs_vector.size());
- std::transform(inputs_vector.begin(), inputs_vector.end(), const_inputs_vector.begin(), [](ITensorInfo * t)
- {
- ARM_COMPUTE_ERROR_ON_NULLPTR(t);
- return t;
- });
-
- // Output auto inizialitation if not yet initialized
- auto_init_if_empty(*output, output_shape, 1, inputs_vector[0]->data_type());
- ARM_COMPUTE_ERROR_THROW_ON(CLConcatenateLayer::validate(const_inputs_vector, output, axis));
-
- unsigned int offset = 0;
- switch(_axis)
- {
- case Window::DimX:
- {
- switch(_num_inputs)
- {
- case 2:
- {
- // Configure WidthConcatenate2Tensors kernel
- auto kernel = std::make_unique<CLWidthConcatenate2TensorsKernel>();
- kernel->configure(compile_context, inputs_vector.at(0), inputs_vector.at(1), output);
- _concat_kernels.emplace_back(std::move(kernel));
- break;
- }
- case 4:
- {
- // Configure WidthConcatenate4Tensors kernel
- auto kernel = std::make_unique<CLWidthConcatenate4TensorsKernel>();
- kernel->configure(compile_context, inputs_vector.at(0), inputs_vector.at(1), inputs_vector.at(2), inputs_vector.at(3), output);
- _concat_kernels.emplace_back(std::move(kernel));
- break;
- }
- default:
- {
- // Configure generic case WidthConcatenate kernels
- for(unsigned int i = 0; i < _num_inputs; ++i)
- {
- auto kernel = std::make_unique<CLWidthConcatenateLayerKernel>();
- kernel->configure(compile_context, inputs_vector.at(i), offset, output);
- offset += inputs_vector.at(i)->dimension(_axis);
- _concat_kernels.emplace_back(std::move(kernel));
- }
- break;
- }
- }
- break;
- }
- case Window::DimY:
- {
- for(unsigned int i = 0; i < _num_inputs; ++i)
- {
- auto kernel = std::make_unique<CLHeightConcatenateLayerKernel>();
- kernel->configure(compile_context, inputs_vector.at(i), offset, output);
- offset += inputs_vector.at(i)->dimension(_axis);
- _concat_kernels.emplace_back(std::move(kernel));
- }
- break;
- }
- case Window::DimZ:
- {
- for(unsigned int i = 0; i < _num_inputs; ++i)
- {
- auto kernel = std::make_unique<CLDepthConcatenateLayerKernel>();
- kernel->configure(compile_context, inputs_vector.at(i), offset, output);
- offset += inputs_vector.at(i)->dimension(_axis);
- _concat_kernels.emplace_back(std::move(kernel));
- }
- break;
- }
- case 3:
- {
- for(unsigned int i = 0; i < _num_inputs; ++i)
- {
- auto kernel = std::make_unique<CLBatchConcatenateLayerKernel>();
- kernel->configure(compile_context, inputs_vector.at(i), offset, output);
- offset += inputs_vector.at(i)->dimension(_axis);
- _concat_kernels.emplace_back(std::move(kernel));
- }
- break;
- }
- default:
- ARM_COMPUTE_ERROR("Axis not supported");
- }
-}
-
-Status CLConcatenation::validate(const std::vector<const ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis)
-{
- ARM_COMPUTE_RETURN_ERROR_ON(output == nullptr);
- const unsigned int num_inputs = inputs_vector.size();
-
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output);
- ARM_COMPUTE_RETURN_ERROR_ON(num_inputs < 2);
-
- unsigned int offset = 0;
- switch(axis)
- {
- case Window::DimX:
- {
- switch(num_inputs)
- {
- case 2:
- // Validate WidthConcatenate2Tensors kernels if there are 2 inputs
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(inputs_vector[0], inputs_vector[1]);
- ARM_COMPUTE_RETURN_ON_ERROR(CLWidthConcatenate2TensorsKernel::validate(inputs_vector[0], inputs_vector[1], output));
- break;
- case 4:
- // Validate WidthConcatenate4Tensors kernels if there are 4 inputs
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(inputs_vector[0], inputs_vector[1], inputs_vector[2], inputs_vector[3]);
- ARM_COMPUTE_RETURN_ON_ERROR(CLWidthConcatenate4TensorsKernel::validate(inputs_vector[0], inputs_vector[1], inputs_vector[2], inputs_vector[3], output));
- break;
- default:
- // Validate generic case of WidthConcatenate kernel
- for(const auto &input : inputs_vector)
- {
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
- ARM_COMPUTE_RETURN_ON_ERROR(CLWidthConcatenateLayerKernel::validate(input, offset, output));
- offset += input->dimension(axis);
- }
- break;
- }
- break;
- }
- case Window::DimY:
- {
- for(const auto &input : inputs_vector)
- {
- ARM_COMPUTE_RETURN_ON_ERROR(CLHeightConcatenateLayerKernel::validate(input, offset, output));
- offset += input->dimension(axis);
- }
- break;
- }
- case Window::DimZ:
- {
- for(const auto &input : inputs_vector)
- {
- ARM_COMPUTE_RETURN_ON_ERROR(CLDepthConcatenateLayerKernel::validate(input, offset, output));
- offset += input->dimension(axis);
- }
- break;
- }
- case 3:
- {
- for(const auto &input : inputs_vector)
- {
- ARM_COMPUTE_RETURN_ON_ERROR(CLBatchConcatenateLayerKernel::validate(input, offset, output));
- offset += input->dimension(axis);
- }
- break;
- }
- default:
- ARM_COMPUTE_ERROR("Axis not supported");
- }
-
- if(output->total_size() != 0)
- {
- TensorShape output_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(inputs_vector, axis);
- ARM_COMPUTE_RETURN_ERROR_ON(output_shape.total_size() != output->tensor_shape().total_size());
- }
-
- return Status{};
-}
-
-void CLConcatenation::run(ITensorPack &tensors)
-{
- if(tensors.empty())
- {
- ARM_COMPUTE_ERROR("No inputs provided");
- }
-
- if(static_cast<int>(tensors.size()) - 1 != static_cast<int>(_num_inputs))
- {
- ARM_COMPUTE_ERROR("Configured with different number of inputs");
- }
-
- if(_axis == Window::DimX && (_num_inputs == 2 || _num_inputs == 4))
- {
- ARM_COMPUTE_ERROR_ON(_concat_kernels.empty());
- CLScheduler::get().enqueue_op(*_concat_kernels.at(0), tensors, true);
- }
- else
- {
- int i = 0;
- for(auto &k : _concat_kernels)
- {
- ITensorPack pack;
- pack.add_tensor(TensorType::ACL_SRC, tensors.get_const_tensor(ACL_SRC_VEC + i));
- pack.add_tensor(TensorType::ACL_DST, tensors.get_tensor(ACL_DST));
- CLScheduler::get().enqueue_op(*k, pack, true);
- ++i;
- }
- }
-}
-} // namespace experimental
-
struct CLConcatenateLayer::Impl
{
- std::vector<const ICLTensor *> srcs{};
- ICLTensor *dst{ nullptr };
- unsigned int num_inputs{ 0 };
- unsigned int axis{ 0 };
- std::unique_ptr<experimental::CLConcatenation> op{ nullptr };
+ std::vector<const ICLTensor *> srcs{};
+ ICLTensor *dst{ nullptr };
+ unsigned int num_inputs{ 0 };
+ unsigned int axis{ 0 };
+ std::unique_ptr<opencl::ClConcatenate> op{ nullptr };
};
CLConcatenateLayer::CLConcatenateLayer()
_impl->dst = output;
_impl->axis = axis;
_impl->num_inputs = inputs_vector.size();
- _impl->op = std::make_unique<experimental::CLConcatenation>();
+ _impl->op = std::make_unique<opencl::ClConcatenate>();
std::vector<ITensorInfo *> inputs_vector_info;
for(unsigned int i = 0; i < inputs_vector.size(); ++i)
Status CLConcatenateLayer::validate(const std::vector<const ITensorInfo *> &inputs_vector, const ITensorInfo *output, size_t axis)
{
- return experimental::CLConcatenation::validate(inputs_vector, output, axis);
+ return opencl::ClConcatenate::validate(inputs_vector, output, axis);
}
void CLConcatenateLayer::run()
--- /dev/null
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_ICL_OPERATOR_H
+#define ARM_COMPUTE_ICL_OPERATOR_H
+
+#include "arm_compute/core/ITensorInfo.h"
+#include "arm_compute/runtime/CL/ICLOperator.h"
+
+namespace arm_compute
+{
+namespace opencl
+{
+using IClOperator = experimental::ICLOperator;
+} // namespace opencl
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_ICL_OPERATOR_H */
--- /dev/null
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/runtime/gpu/cl/operators/ClConcatenate.h"
+
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
+
+#include "src/core/gpu/cl/kernels/ClBatchConcatenateKernel.h"
+#include "src/core/gpu/cl/kernels/ClDepthConcatenateKernel.h"
+#include "src/core/gpu/cl/kernels/ClHeightConcatenateKernel.h"
+#include "src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.h"
+#include "src/core/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.h"
+#include "src/core/gpu/cl/kernels/ClWidthConcatenateKernel.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Types.h"
+#include "src/core/helpers/AutoConfiguration.h"
+
+namespace arm_compute
+{
+namespace opencl
+{
+ClConcatenate::ClConcatenate()
+ : _concat_kernels(),
+ _num_inputs(0),
+ _axis(Window::DimX)
+{
+}
+
+void ClConcatenate::configure(const CLCompileContext &compile_context, const std::vector<ITensorInfo *> &src_vector, ITensorInfo *dst, size_t axis)
+{
+ ARM_COMPUTE_ERROR_ON(dst == nullptr);
+ _axis = axis;
+ _num_inputs = src_vector.size();
+
+ TensorShape dst_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(src_vector, _axis);
+ std::vector<const ITensorInfo *> const_src_vector(src_vector.size());
+ std::transform(src_vector.begin(), src_vector.end(), const_src_vector.begin(), [](ITensorInfo * t)
+ {
+ ARM_COMPUTE_ERROR_ON_NULLPTR(t);
+ return t;
+ });
+
+ // dst auto inizialitation if not yet initialized
+ auto_init_if_empty(*dst, dst_shape, 1, src_vector[0]->data_type());
+ ARM_COMPUTE_ERROR_THROW_ON(ClConcatenate::validate(const_src_vector, dst, axis));
+
+ unsigned int offset = 0;
+ switch(_axis)
+ {
+ case Window::DimX:
+ {
+ switch(_num_inputs)
+ {
+ case 2:
+ {
+ // Configure WidthConcatenate2Tensors kernel
+ auto kernel = std::make_unique<kernels::ClWidthConcatenate2TensorsKernel>();
+ kernel->configure(compile_context, src_vector.at(0), src_vector.at(1), dst);
+ _concat_kernels.emplace_back(std::move(kernel));
+ break;
+ }
+ case 4:
+ {
+ // Configure WidthConcatenate4Tensors kernel
+ auto kernel = std::make_unique<kernels::ClWidthConcatenate4TensorsKernel>();
+ kernel->configure(compile_context, src_vector.at(0), src_vector.at(1), src_vector.at(2), src_vector.at(3), dst);
+ _concat_kernels.emplace_back(std::move(kernel));
+ break;
+ }
+ default:
+ {
+ // Configure generic case WidthConcatenate kernels
+ for(unsigned int i = 0; i < _num_inputs; ++i)
+ {
+ auto kernel = std::make_unique<kernels::ClWidthConcatenateKernel>();
+ kernel->configure(compile_context, src_vector.at(i), offset, dst);
+ offset += src_vector.at(i)->dimension(_axis);
+ _concat_kernels.emplace_back(std::move(kernel));
+ }
+ break;
+ }
+ }
+ break;
+ }
+ case Window::DimY:
+ {
+ for(unsigned int i = 0; i < _num_inputs; ++i)
+ {
+ auto kernel = std::make_unique<kernels::ClHeightConcatenateKernel>();
+ kernel->configure(compile_context, src_vector.at(i), offset, dst);
+ offset += src_vector.at(i)->dimension(_axis);
+ _concat_kernels.emplace_back(std::move(kernel));
+ }
+ break;
+ }
+ case Window::DimZ:
+ {
+ for(unsigned int i = 0; i < _num_inputs; ++i)
+ {
+ auto kernel = std::make_unique<kernels::ClDepthConcatenateKernel>();
+ kernel->configure(compile_context, src_vector.at(i), offset, dst);
+ offset += src_vector.at(i)->dimension(_axis);
+ _concat_kernels.emplace_back(std::move(kernel));
+ }
+ break;
+ }
+ case 3:
+ {
+ for(unsigned int i = 0; i < _num_inputs; ++i)
+ {
+ auto kernel = std::make_unique<kernels::ClBatchConcatenateKernel>();
+ kernel->configure(compile_context, src_vector.at(i), offset, dst);
+ offset += src_vector.at(i)->dimension(_axis);
+ _concat_kernels.emplace_back(std::move(kernel));
+ }
+ break;
+ }
+ default:
+ ARM_COMPUTE_ERROR("Axis not supported");
+ }
+}
+
+Status ClConcatenate::validate(const std::vector<const ITensorInfo *> &src_vector, const ITensorInfo *dst, size_t axis)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON(dst == nullptr);
+ const unsigned int num_inputs = src_vector.size();
+
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(dst);
+ ARM_COMPUTE_RETURN_ERROR_ON(num_inputs < 2);
+
+ unsigned int offset = 0;
+ switch(axis)
+ {
+ case Window::DimX:
+ {
+ switch(num_inputs)
+ {
+ case 2:
+ // Validate WidthConcatenate2Tensors kernels if there are 2 inputs
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src_vector[0], src_vector[1]);
+ ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClWidthConcatenate2TensorsKernel::validate(src_vector[0], src_vector[1], dst));
+ break;
+ case 4:
+ // Validate WidthConcatenate4Tensors kernels if there are 4 inputs
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src_vector[0], src_vector[1], src_vector[2], src_vector[3]);
+ ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClWidthConcatenate4TensorsKernel::validate(src_vector[0], src_vector[1], src_vector[2], src_vector[3], dst));
+ break;
+ default:
+ // Validate generic case of WidthConcatenate kernel
+ for(const auto &src : src_vector)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src);
+ ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClWidthConcatenateKernel::validate(src, offset, dst));
+ offset += src->dimension(axis);
+ }
+ break;
+ }
+ break;
+ }
+ case Window::DimY:
+ {
+ for(const auto &src : src_vector)
+ {
+ ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClHeightConcatenateKernel::validate(src, offset, dst));
+ offset += src->dimension(axis);
+ }
+ break;
+ }
+ case Window::DimZ:
+ {
+ for(const auto &src : src_vector)
+ {
+ ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClDepthConcatenateKernel::validate(src, offset, dst));
+ offset += src->dimension(axis);
+ }
+ break;
+ }
+ case 3:
+ {
+ for(const auto &src : src_vector)
+ {
+ ARM_COMPUTE_RETURN_ON_ERROR(kernels::ClBatchConcatenateKernel::validate(src, offset, dst));
+ offset += src->dimension(axis);
+ }
+ break;
+ }
+ default:
+ ARM_COMPUTE_ERROR("Axis not supported");
+ }
+
+ if(dst->total_size() != 0)
+ {
+ TensorShape dst_shape = arm_compute::misc::shape_calculator::calculate_concatenate_shape(src_vector, axis);
+ ARM_COMPUTE_RETURN_ERROR_ON(dst_shape.total_size() != dst->tensor_shape().total_size());
+ }
+
+ return Status{};
+}
+
+void ClConcatenate::run(ITensorPack &tensors)
+{
+ if(tensors.empty())
+ {
+ ARM_COMPUTE_ERROR("No inputs provided");
+ }
+
+ if(static_cast<int>(tensors.size()) - 1 != static_cast<int>(_num_inputs))
+ {
+ ARM_COMPUTE_ERROR("Configured with different number of inputs");
+ }
+
+ if(_axis == Window::DimX && (_num_inputs == 2 || _num_inputs == 4))
+ {
+ ARM_COMPUTE_ERROR_ON(_concat_kernels.empty());
+ CLScheduler::get().enqueue_op(*_concat_kernels.at(0), tensors, true);
+ }
+ else
+ {
+ int i = 0;
+ for(auto &k : _concat_kernels)
+ {
+ ITensorPack pack;
+ pack.add_tensor(TensorType::ACL_SRC, tensors.get_const_tensor(ACL_SRC_VEC + i));
+ pack.add_tensor(TensorType::ACL_DST, tensors.get_tensor(ACL_DST));
+ CLScheduler::get().enqueue_op(*k, pack, true);
+ ++i;
+ }
+ }
+}
+} // namespace opencl
+} // namespace arm_compute
--- /dev/null
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLCONCATENATE_H
+#define ARM_COMPUTE_CLCONCATENATE_H
+
+#include "src/core/gpu/cl/ClCompileContext.h"
+#include "src/core/gpu/cl/IClKernel.h"
+#include "src/runtime/gpu/cl/IClOperator.h"
+
+#include <vector>
+
+namespace arm_compute
+{
+namespace opencl
+{
+/** Basic function to execute concatenate tensors along a given axis. This function calls the following kernels:
+ *
+ * -# @ref kernels::ClWidthConcatenateKernel (if underlying concatenation axis is 0).
+ * -# @ref kernels::ClHeightConcatenateKernel (if underlying concatenation axis is 1).
+ * -# @ref kernels::ClDepthConcatenateKernel (if underlying concatenation axis is 2).
+ * -# @ref kernels::ClBatchConcatenateKernel (if underlying concatenation axis is 3).
+ */
+class ClConcatenate : public IClOperator
+{
+public:
+ /** Default constructor */
+ ClConcatenate();
+ /** Initialise the kernel's inputs vector and dst.
+ *
+ * @note Input and dst tensor dimensions preconditions defer depending on the concatenation axis.
+ * @note Preconditions can be found respectively at @ref kernels::ClWidthConcatenateKernel,
+ * @ref kernels::ClHeightConcatenateKernel and @ref kernels::ClDepthConcatenateKernel.
+ *
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in,out] src_vector The vectors containing all the tensors to concatenate. Data types supported: All
+ * @param[out] dst Destination tensor. Data types supported: same as @p src_vector.
+ * @param[in] axis Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3.
+ */
+ void configure(const ClCompileContext &compile_context, const std::vector<ITensorInfo *> &src_vector, ITensorInfo *dst, size_t axis);
+ /** Static function to check if given info will lead to a valid configuration of @ref ClConcatenate
+ *
+ * @note Input and dst tensor dimensions preconditions defer depending on the concatenation axis.
+ * @note Preconditions can be found respectively at @ref kernels::ClWidthConcatenateKernel,
+ * @ref kernels::ClHeightConcatenateKernel and @ref kernels::ClDepthConcatenateKernel.
+ *
+ * @param[in] src_vector The vectors containing all the tensors info to concatenate. Data types supported: All
+ * @param[in] dst Destination tensor info. Data types supported: same as @p src_vector.
+ * @param[in] axis Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3.
+ *
+ * @return a status
+ */
+ static Status validate(const std::vector<const ITensorInfo *> &src_vector, const ITensorInfo *dst, size_t axis);
+
+ // Inherited methods overridden:
+ void run(ITensorPack &tensors) override;
+
+private:
+ std::vector<std::unique_ptr<IClKernel>> _concat_kernels;
+ unsigned int _num_inputs;
+ unsigned int _axis;
+};
+} // namespace opencl
+} // namespace arm_comPUTE
+#endif /* ARM_COMPUTE_CL_CONCATENATE_H */