-Subproject commit ff15a53098d2545808e2c840ad0d5bace6ae87a2
+Subproject commit ba65985c4a47effae4620b95b158ecae8764d2e2
+++ /dev/null
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLKERNELS_H
-#define ARM_COMPUTE_CLKERNELS_H
-
-/* Header regrouping all the CL kernels */
-#include "arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h"
-#include "arm_compute/core/CL/kernels/CLAccumulateKernel.h"
-#include "arm_compute/core/CL/kernels/CLActivationLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLArgMinMaxLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLBatchConcatenateLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLBatchToSpaceLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLBitwiseAndKernel.h"
-#include "arm_compute/core/CL/kernels/CLBitwiseNotKernel.h"
-#include "arm_compute/core/CL/kernels/CLBitwiseOrKernel.h"
-#include "arm_compute/core/CL/kernels/CLBitwiseXorKernel.h"
-#include "arm_compute/core/CL/kernels/CLBoundingBoxTransformKernel.h"
-#include "arm_compute/core/CL/kernels/CLBox3x3Kernel.h"
-#include "arm_compute/core/CL/kernels/CLCannyEdgeKernel.h"
-#include "arm_compute/core/CL/kernels/CLChannelCombineKernel.h"
-#include "arm_compute/core/CL/kernels/CLChannelExtractKernel.h"
-#include "arm_compute/core/CL/kernels/CLChannelShuffleLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLCol2ImKernel.h"
-#include "arm_compute/core/CL/kernels/CLColorConvertKernel.h"
-#include "arm_compute/core/CL/kernels/CLComparisonKernel.h"
-#include "arm_compute/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h"
-#include "arm_compute/core/CL/kernels/CLConvolutionKernel.h"
-#include "arm_compute/core/CL/kernels/CLCopyKernel.h"
-#include "arm_compute/core/CL/kernels/CLCropKernel.h"
-#include "arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h"
-#include "arm_compute/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h"
-#include "arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLDepthToSpaceLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h"
-#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h"
-#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h"
-#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h"
-#include "arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLDerivativeKernel.h"
-#include "arm_compute/core/CL/kernels/CLDilateKernel.h"
-#include "arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLElementWiseUnaryLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h"
-#include "arm_compute/core/CL/kernels/CLErodeKernel.h"
-#include "arm_compute/core/CL/kernels/CLFFTDigitReverseKernel.h"
-#include "arm_compute/core/CL/kernels/CLFFTRadixStageKernel.h"
-#include "arm_compute/core/CL/kernels/CLFFTScaleKernel.h"
-#include "arm_compute/core/CL/kernels/CLFastCornersKernel.h"
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
-#include "arm_compute/core/CL/kernels/CLFlattenLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLFloorKernel.h"
-#include "arm_compute/core/CL/kernels/CLFuseBatchNormalizationKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
-#include "arm_compute/core/CL/kernels/CLGatherKernel.h"
-#include "arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h"
-#include "arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h"
-#include "arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h"
-#include "arm_compute/core/CL/kernels/CLGenerateProposalsLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h"
-#include "arm_compute/core/CL/kernels/CLHOGDetectorKernel.h"
-#include "arm_compute/core/CL/kernels/CLHarrisCornersKernel.h"
-#include "arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLHistogramKernel.h"
-#include "arm_compute/core/CL/kernels/CLIm2ColKernel.h"
-#include "arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLIntegralImageKernel.h"
-#include "arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLLKTrackerKernel.h"
-#include "arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h"
-#include "arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h"
-#include "arm_compute/core/CL/kernels/CLMaxUnpoolingLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLMeanStdDevKernel.h"
-#include "arm_compute/core/CL/kernels/CLMeanStdDevNormalizationKernel.h"
-#include "arm_compute/core/CL/kernels/CLMedian3x3Kernel.h"
-#include "arm_compute/core/CL/kernels/CLMemsetKernel.h"
-#include "arm_compute/core/CL/kernels/CLMinMaxLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLMinMaxLocationKernel.h"
-#include "arm_compute/core/CL/kernels/CLNonLinearFilterKernel.h"
-#include "arm_compute/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h"
-#include "arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLPadLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLPermuteKernel.h"
-#include "arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h"
-#include "arm_compute/core/CL/kernels/CLPoolingLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLPriorBoxLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h"
-#include "arm_compute/core/CL/kernels/CLQuantizationLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLROIAlignLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLROIPoolingLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLRangeKernel.h"
-#include "arm_compute/core/CL/kernels/CLReductionOperationKernel.h"
-#include "arm_compute/core/CL/kernels/CLRemapKernel.h"
-#include "arm_compute/core/CL/kernels/CLReorgLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLReshapeLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLReverseKernel.h"
-#include "arm_compute/core/CL/kernels/CLScaleKernel.h"
-#include "arm_compute/core/CL/kernels/CLScharr3x3Kernel.h"
-#include "arm_compute/core/CL/kernels/CLSelectKernel.h"
-#include "arm_compute/core/CL/kernels/CLSobel3x3Kernel.h"
-#include "arm_compute/core/CL/kernels/CLSobel5x5Kernel.h"
-#include "arm_compute/core/CL/kernels/CLSobel7x7Kernel.h"
-#include "arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLSpaceToBatchLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLSpaceToDepthLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLStackLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLStridedSliceKernel.h"
-#include "arm_compute/core/CL/kernels/CLTableLookupKernel.h"
-#include "arm_compute/core/CL/kernels/CLThresholdKernel.h"
-#include "arm_compute/core/CL/kernels/CLTileKernel.h"
-#include "arm_compute/core/CL/kernels/CLTransposeKernel.h"
-#include "arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLWarpAffineKernel.h"
-#include "arm_compute/core/CL/kernels/CLWarpPerspectiveKernel.h"
-#include "arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h"
-#include "arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h"
-#include "arm_compute/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h"
-#include "arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h"
-#include "arm_compute/core/CL/kernels/CLWinogradInputTransformKernel.h"
-#include "arm_compute/core/CL/kernels/CLWinogradOutputTransformKernel.h"
-#include "arm_compute/core/CL/kernels/CLYOLOLayerKernel.h"
-#include "arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h"
-
-#endif /* ARM_COMPUTE_CLKERNELS_H */
const ICLFloatArray *scale; /**< Quantization scale array */
const ICLInt32Array *offset; /**< Quantization offset array */
};
+
+/** Internal keypoint structure for Lucas-Kanade Optical Flow */
+struct CLLKInternalKeypoint
+{
+ float x{ 0.f }; /**< x coordinate of the keypoint */
+ float y{ 0.f }; /**< y coordinate of the keypoint */
+ float tracking_status{ 0.f }; /**< the tracking status of the keypoint */
+ float dummy{ 0.f }; /**< Dummy field, to make sure the data structure 128-bit align, so that GPU can use vload4 */
+};
+
+/** Structure for storing Spatial Gradient Matrix and the minimum eigenvalue for each keypoint */
+struct CLCoefficientTable
+{
+ float A11; /**< iA11 * FLT_SCALE */
+ float A12; /**< iA11 * FLT_SCALE */
+ float A22; /**< iA11 * FLT_SCALE */
+ float min_eig; /**< Minimum eigenvalue */
+};
+
+/** Structure for storing ival, ixval and iyval for each point inside the window */
+struct CLOldValue
+{
+ int16_t ival; /**< ival extracts from old image */
+ int16_t ixval; /**< ixval extracts from scharr Gx image */
+ int16_t iyval; /**< iyval extracts from scharr Gy image */
+ int16_t dummy; /**< Dummy field, to make sure the data structure 128-bit align, so that GPU can use vload4 */
+};
+
+/** Interface for OpenCL Array of Internal Key Points. */
+using ICLLKInternalKeypointArray = ICLArray<CLLKInternalKeypoint>;
+/** Interface for OpenCL Array of Coefficient Tables. */
+using ICLCoefficientTableArray = ICLArray<CLCoefficientTable>;
+/** Interface for OpenCL Array of Old Values. */
+using ICLOldValArray = ICLArray<CLOldValue>;
+
} // namespace arm_compute
#endif /* ARM_COMPUTE_CL_TYPES_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_ICLKERNEL_H
-#define ARM_COMPUTE_ICLKERNEL_H
-
-#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/CLTypes.h"
-#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/GPUTarget.h"
-#include "arm_compute/core/IKernel.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/experimental/Types.h"
-
-#include <string>
-
-namespace arm_compute
-{
-template <typename T>
-class ICLArray;
-class ICLTensor;
-class Window;
-
-/** Common interface for all the OpenCL kernels */
-class ICLKernel : public IKernel
-{
-private:
- /** Returns the number of arguments enqueued per array object.
- *
- * @return The number of arguments enqueued per array object.
- */
- template <unsigned int dimension_size>
- constexpr static unsigned int num_arguments_per_array()
- {
- return num_arguments_per_tensor<dimension_size>();
- }
- /** Returns the number of arguments enqueued per tensor object.
- *
- * @return The number of arguments enqueued per tensor object.
- */
- template <unsigned int dimension_size>
- constexpr static unsigned int num_arguments_per_tensor()
- {
- return 2 + 2 * dimension_size;
- }
- using IKernel::configure; //Prevent children from calling IKernel::configure() directly
-protected:
- /** Configure the kernel's window and local workgroup size hint.
- *
- * @param[in] window The maximum window which will be returned by window()
- * @param[in] lws_hint (Optional) Local-Workgroup-Size to use.
- */
- void configure_internal(const Window &window, cl::NDRange lws_hint = CLKernelLibrary::get().default_ndrange())
- {
- _lws_hint = lws_hint;
- IKernel::configure(window);
- }
-
-public:
- /** Constructor */
- ICLKernel()
- : _kernel(nullptr), _target(GPUTarget::MIDGARD), _config_id(arm_compute::default_config_id), _max_workgroup_size(0), _lws_hint()
- {
- }
- /** Returns a reference to the OpenCL kernel of this object.
- *
- * @return A reference to the OpenCL kernel of this object.
- */
- cl::Kernel &kernel()
- {
- return _kernel;
- }
- /** Add the passed 1D array's parameters to the object's kernel's arguments starting from the index idx.
- *
- * @param[in,out] idx Index at which to start adding the array's arguments. Will be incremented by the number of kernel arguments set.
- * @param[in] array Array to set as an argument of the object's kernel.
- * @param[in] strides @ref Strides object containing stride of each dimension in bytes.
- * @param[in] num_dimensions Number of dimensions of the @p array.
- * @param[in] window Window the kernel will be executed on.
- */
- template <typename T>
- void add_1D_array_argument(unsigned int &idx, const ICLArray<T> *array, const Strides &strides, unsigned int num_dimensions, const Window &window)
- {
- add_array_argument<T, 1>(idx, array, strides, num_dimensions, window);
- }
- /** Add the passed 1D tensor's parameters to the object's kernel's arguments starting from the index idx.
- *
- * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set.
- * @param[in] tensor Tensor to set as an argument of the object's kernel.
- * @param[in] window Window the kernel will be executed on.
- */
- void add_1D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
- {
- add_tensor_argument<1>(idx, tensor, window);
- }
- /** Add the passed 1D tensor's parameters to the object's kernel's arguments starting from the index idx if the condition is true.
- *
- * @param[in] cond Condition to check
- * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set.
- * @param[in] tensor Tensor to set as an argument of the object's kernel.
- * @param[in] window Window the kernel will be executed on.
- */
- void add_1D_tensor_argument_if(bool cond, unsigned int &idx, const ICLTensor *tensor, const Window &window)
- {
- if(cond)
- {
- add_1D_tensor_argument(idx, tensor, window);
- }
- }
- /** Add the passed 2D tensor's parameters to the object's kernel's arguments starting from the index idx.
- *
- * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set.
- * @param[in] tensor Tensor to set as an argument of the object's kernel.
- * @param[in] window Window the kernel will be executed on.
- */
- void add_2D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
- {
- add_tensor_argument<2>(idx, tensor, window);
- }
- /** Add the passed 2D tensor's parameters to the object's kernel's arguments starting from the index idx if the condition is true.
- *
- * @param[in] cond Condition to check
- * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set.
- * @param[in] tensor Tensor to set as an argument of the object's kernel.
- * @param[in] window Window the kernel will be executed on.
- */
- void add_2D_tensor_argument_if(bool cond, unsigned int &idx, const ICLTensor *tensor, const Window &window)
- {
- if(cond)
- {
- add_2D_tensor_argument(idx, tensor, window);
- }
- }
- /** Add the passed 3D tensor's parameters to the object's kernel's arguments starting from the index idx.
- *
- * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set.
- * @param[in] tensor Tensor to set as an argument of the object's kernel.
- * @param[in] window Window the kernel will be executed on.
- */
- void add_3D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
- {
- add_tensor_argument<3>(idx, tensor, window);
- }
- /** Add the passed 4D tensor's parameters to the object's kernel's arguments starting from the index idx.
- *
- * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set.
- * @param[in] tensor Tensor to set as an argument of the object's kernel.
- * @param[in] window Window the kernel will be executed on.
- */
- void add_4D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
- {
- add_tensor_argument<4>(idx, tensor, window);
- }
- /** Returns the number of arguments enqueued per 1D array object.
- *
- * @return The number of arguments enqueues per 1D array object.
- */
- constexpr static unsigned int num_arguments_per_1D_array()
- {
- return num_arguments_per_array<1>();
- }
- /** Returns the number of arguments enqueued per 1D tensor object.
- *
- * @return The number of arguments enqueues per 1D tensor object.
- */
- constexpr static unsigned int num_arguments_per_1D_tensor()
- {
- return num_arguments_per_tensor<1>();
- }
- /** Returns the number of arguments enqueued per 2D tensor object.
- *
- * @return The number of arguments enqueues per 2D tensor object.
- */
- constexpr static unsigned int num_arguments_per_2D_tensor()
- {
- return num_arguments_per_tensor<2>();
- }
- /** Returns the number of arguments enqueued per 3D tensor object.
- *
- * @return The number of arguments enqueues per 3D tensor object.
- */
- constexpr static unsigned int num_arguments_per_3D_tensor()
- {
- return num_arguments_per_tensor<3>();
- }
- /** Returns the number of arguments enqueued per 4D tensor object.
- *
- * @return The number of arguments enqueues per 4D tensor object.
- */
- constexpr static unsigned int num_arguments_per_4D_tensor()
- {
- return num_arguments_per_tensor<4>();
- }
- /** Enqueue the OpenCL kernel to process the given window on the passed OpenCL command queue.
- *
- * @note The queue is *not* flushed by this method, and therefore the kernel will not have been executed by the time this method returns.
- *
- * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
- * @param[in,out] queue Command queue on which to enqueue the kernel.
- */
- virtual void run(const Window &window, cl::CommandQueue &queue)
- {
- ARM_COMPUTE_UNUSED(window, queue);
- }
- /** Enqueue the OpenCL kernel to process the given window on the passed OpenCL command queue.
- *
- * @note The queue is *not* flushed by this method, and therefore the kernel will not have been executed by the time this method returns.
- *
- * @param[in] tensors A vector containing the tensors to operato on.
- * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
- * @param[in,out] queue Command queue on which to enqueue the kernel.
- */
- virtual void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)
- {
- ARM_COMPUTE_UNUSED(tensors, window, queue);
- }
- /** Add the passed parameters to the object's kernel's arguments starting from the index idx.
- *
- * @param[in,out] idx Index at which to start adding the arguments. Will be incremented by the number of kernel arguments set.
- * @param[in] value Value to set as an argument of the object's kernel.
- */
- template <typename T>
- void add_argument(unsigned int &idx, T value)
- {
- _kernel.setArg(idx++, value);
- }
-
- /** Set the Local-Workgroup-Size hint
- *
- * @note This method should be called after the configuration of the kernel
- *
- * @param[in] lws_hint Local-Workgroup-Size to use
- */
- void set_lws_hint(const cl::NDRange &lws_hint)
- {
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); // lws_hint will be overwritten by configure()
- _lws_hint = lws_hint;
- }
-
- /** Return the Local-Workgroup-Size hint
- *
- * @return Current lws hint
- */
- cl::NDRange lws_hint() const
- {
- return _lws_hint;
- }
-
- /** Get the configuration ID
- *
- * @note The configuration ID can be used by the caller to distinguish different calls of the same OpenCL kernel
- * In particular, this method can be used by CLScheduler to keep track of the best LWS for each configuration of the same kernel.
- * The configuration ID should be provided only for the kernels potentially affected by the LWS geometry
- *
- * @note This method should be called after the configuration of the kernel
- *
- * @return configuration id string
- */
- const std::string &config_id() const
- {
- return _config_id;
- }
-
- /** Set the targeted GPU architecture
- *
- * @param[in] target The targeted GPU architecture
- */
- void set_target(GPUTarget target)
- {
- _target = target;
- }
-
- /** Set the targeted GPU architecture according to the CL device
- *
- * @param[in] device A CL device
- */
- void set_target(cl::Device &device);
-
- /** Get the targeted GPU architecture
- *
- * @return The targeted GPU architecture.
- */
- GPUTarget get_target() const
- {
- return _target;
- }
-
- /** Get the maximum workgroup size for the device the CLKernelLibrary uses.
- *
- * @return The maximum workgroup size value.
- */
- size_t get_max_workgroup_size();
- /** Get the global work size given an execution window
- *
- * @param[in] window Execution window
- *
- * @return Global work size of the given execution window
- */
- static cl::NDRange gws_from_window(const Window &window);
-
-private:
- /** Add the passed array's parameters to the object's kernel's arguments starting from the index idx.
- *
- * @param[in,out] idx Index at which to start adding the array's arguments. Will be incremented by the number of kernel arguments set.
- * @param[in] array Array to set as an argument of the object's kernel.
- * @param[in] strides @ref Strides object containing stride of each dimension in bytes.
- * @param[in] num_dimensions Number of dimensions of the @p array.
- * @param[in] window Window the kernel will be executed on.
- */
- template <typename T, unsigned int dimension_size>
- void add_array_argument(unsigned int &idx, const ICLArray<T> *array, const Strides &strides, unsigned int num_dimensions, const Window &window);
- /** Add the passed tensor's parameters to the object's kernel's arguments starting from the index idx.
- *
- * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set.
- * @param[in] tensor Tensor to set as an argument of the object's kernel.
- * @param[in] window Window the kernel will be executed on.
- */
- template <unsigned int dimension_size>
- void add_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window);
-
-protected:
- cl::Kernel _kernel; /**< OpenCL kernel to run */
- GPUTarget _target; /**< The targeted GPU */
- std::string _config_id; /**< Configuration ID */
- size_t _max_workgroup_size; /**< The maximum workgroup size for this kernel */
-private:
- cl::NDRange _lws_hint; /**< Local workgroup size hint for the OpenCL kernel */
-};
-
-/** Add the kernel to the command queue with the given window.
- *
- * @note Depending on the size of the window, this might translate into several jobs being enqueued.
- *
- * @note If kernel->kernel() is empty then the function will return without adding anything to the queue.
- *
- * @param[in,out] queue OpenCL command queue.
- * @param[in] kernel Kernel to enqueue
- * @param[in] window Window the kernel has to process.
- * @param[in] lws_hint (Optional) Local workgroup size requested. Default is based on the device target.
- * @param[in] use_dummy_work_items (Optional) Use dummy work items in order to have two dimensional power of two NDRange. Default is false
- * Note: it is kernel responsibility to check if the work-item is out-of-range
- *
- * @note If any dimension of the lws is greater than the global workgroup size then no lws will be passed.
- */
-void enqueue(cl::CommandQueue &queue, ICLKernel &kernel, const Window &window, const cl::NDRange &lws_hint = CLKernelLibrary::get().default_ndrange(), bool use_dummy_work_items = false);
-
-/** Add the passed array's parameters to the object's kernel's arguments starting from the index idx.
- *
- * @param[in,out] idx Index at which to start adding the array's arguments. Will be incremented by the number of kernel arguments set.
- * @param[in] array Array to set as an argument of the object's kernel.
- * @param[in] strides @ref Strides object containing stride of each dimension in bytes.
- * @param[in] num_dimensions Number of dimensions of the @p array.
- * @param[in] window Window the kernel will be executed on.
- */
-template <typename T, unsigned int dimension_size>
-void ICLKernel::add_array_argument(unsigned &idx, const ICLArray<T> *array, const Strides &strides, unsigned int num_dimensions, const Window &window)
-{
- ARM_COMPUTE_ERROR_ON(array == nullptr);
-
- // Calculate offset to the start of the window
- unsigned int offset_first_element = 0;
-
- for(unsigned int n = 0; n < num_dimensions; ++n)
- {
- offset_first_element += window[n].start() * strides[n];
- }
-
- unsigned int idx_start = idx;
- _kernel.setArg(idx++, array->cl_buffer());
-
- for(unsigned int dimension = 0; dimension < dimension_size; dimension++)
- {
- _kernel.setArg<cl_uint>(idx++, strides[dimension]);
- _kernel.setArg<cl_uint>(idx++, strides[dimension] * window[dimension].step());
- }
-
- _kernel.setArg<cl_uint>(idx++, offset_first_element);
-
- ARM_COMPUTE_ERROR_ON_MSG_VAR(idx_start + num_arguments_per_array<dimension_size>() != idx,
- "add_%dD_array_argument() is supposed to add exactly %d arguments to the kernel", dimension_size, num_arguments_per_array<dimension_size>());
- ARM_COMPUTE_UNUSED(idx_start);
-}
-}
-#endif /*ARM_COMPUTE_ICLKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2017-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_ICLSIMPLE2DKERNEL_H
-#define ARM_COMPUTE_ICLSIMPLE2DKERNEL_H
-
-#include "arm_compute/core/CL/ICLSimpleKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for simple OpenCL kernels having 1 tensor input and 1 tensor output. This interface can be used when the work-item processes a 2D tile */
-class ICLSimple2DKernel : public ICLSimpleKernel
-{
-public:
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-};
-}
-#endif /*ARM_COMPUTE_ICLSIMPLE2DKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2017-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_ICLSIMPLE3DKERNEL_H
-#define ARM_COMPUTE_ICLSIMPLE3DKERNEL_H
-
-#include "arm_compute/core/CL/ICLSimple2DKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for simple OpenCL kernels having 1 tensor input and 1 tensor output.
- * Both input tensor and output tensor must have at least 3 dimensions.
- */
-class ICLSimple3DKernel : public ICLSimple2DKernel
-{
-public:
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-};
-}
-#endif /*ARM_COMPUTE_ICLSIMPLE3DKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_ICLSIMPLEKERNEL_H
-#define ARM_COMPUTE_ICLSIMPLEKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/Helpers.h"
-
-namespace arm_compute
-{
-/** Interface for simple OpenCL kernels having 1 tensor input and 1 tensor output */
-class ICLSimpleKernel : public ICLKernel
-{
-public:
- /** Constructor. */
- ICLSimpleKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- ICLSimpleKernel(const ICLSimpleKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- ICLSimpleKernel &operator=(const ICLSimpleKernel &) = delete;
- /** Allow instances of this class to be moved */
- ICLSimpleKernel(ICLSimpleKernel &&) = default;
- /** Allow instances of this class to be moved */
- ICLSimpleKernel &operator=(ICLSimpleKernel &&) = default;
- /** Default destructor */
- ~ICLSimpleKernel() = default;
-
- /** Configure the kernel
- *
- * @param[in] input Source tensor.
- * @param[out] output Destination tensor.
- * @param[in] num_elems_processed_per_iteration Number of processed elements per iteration.
- * @param[in] border_undefined (Optional) True if the border mode is undefined. False if it's replicate or constant.
- * @param[in] border_size (Optional) Size of the border.
- */
- void configure(const ICLTensor *input, ICLTensor *output, unsigned int num_elems_processed_per_iteration, bool border_undefined = false, const BorderSize &border_size = BorderSize());
-
-protected:
- const ICLTensor *_input;
- ICLTensor *_output;
-};
-}
-
-#endif /*ARM_COMPUTE_ICLSIMPLEKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLABSOLUTEDIFFERENCEKERNEL_H
-#define ARM_COMPUTE_CLABSOLUTEDIFFERENCEKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the absolute difference kernel.
- *
- * Absolute difference is computed by:
- * @f[ output(x,y) = | input1(x,y) - input2(x,y) | @f]
- */
-class CLAbsoluteDifferenceKernel : public ICLKernel
-{
-public:
- /** Default constructor. */
- CLAbsoluteDifferenceKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLAbsoluteDifferenceKernel(const CLAbsoluteDifferenceKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLAbsoluteDifferenceKernel &operator=(const CLAbsoluteDifferenceKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLAbsoluteDifferenceKernel(CLAbsoluteDifferenceKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLAbsoluteDifferenceKernel &operator=(CLAbsoluteDifferenceKernel &&) = default;
- /** Default destructor */
- ~CLAbsoluteDifferenceKernel() = default;
-
- /** Set the inputs and output images.
- *
- * @param[in] input1 Source tensor. Data types supported: U8/S16.
- * @param[in] input2 Source tensor. Data types supported: U8/S16.
- * @param[out] output Destination tensor. Data types supported: U8/S16.
- */
- void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
- /** Set the inputs and output images.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input1 Source tensor. Data types supported: U8/S16.
- * @param[in] input2 Source tensor. Data types supported: U8/S16.
- * @param[out] output Destination tensor. Data types supported: U8/S16.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input1; /**< Source tensor 1. */
- const ICLTensor *_input2; /**< Source tensor 2. */
- ICLTensor *_output; /**< Destination tensor. */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLABSOLUTEDIFFERENCEKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLACCUMULATEKERNEL_H
-#define ARM_COMPUTE_CLACCUMULATEKERNEL_H
-
-#include "arm_compute/core/CL/ICLSimple2DKernel.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the accumulate kernel.
- *
- * Accumulation is computed by:
- * @f[ accum(x,y) = accum(x,y) + input(x,y) @f]
- */
-class CLAccumulateKernel : public ICLSimple2DKernel
-{
-public:
- /** Set the input and accumulation tensors.
- *
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] accum Destination tensor. Data types supported: S16.
- */
- void configure(const ICLTensor *input, ICLTensor *accum);
- /** Set the input and accumulation tensors.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] accum Destination tensor. Data types supported: S16.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *accum);
-};
-
-/** Interface for the accumulate weighted kernel.
- *
- * Weighted accumulation is computed:
- * @f[ accum(x,y) = (1 - \alpha)*accum(x,y) + \alpha*input(x,y) @f]
- *
- * Where @f$ 0 \le \alpha \le 1 @f$
- * Conceptually, the rounding for this is defined as:
- * @f[ output(x,y)= uint8( (1 - \alpha) * float32( int32( output(x,y) ) ) + \alpha * float32( int32( input(x,y) ) ) ) @f]
-*/
-class CLAccumulateWeightedKernel : public ICLSimple2DKernel
-{
-public:
- /** Set the input and accumulation images, and the scale value.
- *
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[in] alpha Scalar value in the range [0, 1.0]. Data types supported: F32.
- * @param[in,out] accum Accumulated tensor. Data types supported: U8.
- */
- void configure(const ICLTensor *input, float alpha, ICLTensor *accum);
- /** Set the input and accumulation images, and the scale value.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[in] alpha Scalar value in the range [0, 1.0]. Data types supported: F32.
- * @param[in,out] accum Accumulated tensor. Data types supported: U8.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, float alpha, ICLTensor *accum);
-};
-
-/** Interface for the accumulate squared kernel.
- *
- * The accumulation of squares is computed:
- * @f[ accum(x,y) = saturate_{int16} ( (uint16) accum(x,y) + (((uint16)(input(x,y)^2)) >> (shift)) ) @f]
- *
- * Where @f$ 0 \le shift \le 15 @f$
-*/
-class CLAccumulateSquaredKernel : public ICLSimple2DKernel
-{
-public:
- /** Set the input and accumulation tensors and the shift value.
- *
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[in] shift Shift value in the range of [0, 15]. Data types supported: U32.
- * @param[in,out] accum Accumulated tensor. Data types supported: S16.
- */
- void configure(const ICLTensor *input, uint32_t shift, ICLTensor *accum);
- /** Set the input and accumulation tensors and the shift value.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[in] shift Shift value in the range of [0, 15]. Data types supported: U32.
- * @param[in,out] accum Accumulated tensor. Data types supported: S16.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, uint32_t shift, ICLTensor *accum);
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLACCUMULATEKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLACTIVATIONLAYERKERNEL_H
-#define ARM_COMPUTE_CLACTIVATIONLAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-/** Interface for the activation layer kernel. */
-class CLActivationLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLActivationLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLActivationLayerKernel(const CLActivationLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLActivationLayerKernel &operator=(const CLActivationLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLActivationLayerKernel(CLActivationLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLActivationLayerKernel &operator=(CLActivationLayerKernel &&) = default;
- /** Default destructor */
- ~CLActivationLayerKernel() = default;
- /** Set the input and output tensor.
- *
- * @note If the output tensor is a nullptr, the activation function will be performed in-place
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result
- * of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32.
- * @param[out] output Destination tensor. Data type supported: same as @p input
- * @param[in] act_info Activation layer information.
- */
- void configure(const CLCompileContext &compile_context, ITensorInfo *input, ITensorInfo *output, ActivationLayerInfo act_info);
- /** Static function to check if given info will lead to a valid configuration of @ref CLActivationLayerKernel
- *
- * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result
- * of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32.
- * @param[in] output Destination tensor info. Data type supported: same as @p input
- * @param[in] act_info Activation layer information.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info);
-
- // Inherited methods overridden:
- void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
-
-private:
- bool _run_in_place;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLACTIVATIONLAYERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2019-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLARGMINMAXLAYERKERNEL_H
-#define ARM_COMPUTE_CLARGMINMAXLAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the reduction operation kernel
- *
- * @note The default data type for an uninitialized output tensor is
- * signed 32-bit integer (S32). It is the user's responsibility to check
- * that the results do not overflow because the indices are computed
- * in unsigned 32-bit (U32).
- */
-class CLArgMinMaxLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLArgMinMaxLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLArgMinMaxLayerKernel(const CLArgMinMaxLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLArgMinMaxLayerKernel &operator=(const CLArgMinMaxLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLArgMinMaxLayerKernel(CLArgMinMaxLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLArgMinMaxLayerKernel &operator=(CLArgMinMaxLayerKernel &&) = default;
- /** Default destructor */
- ~CLArgMinMaxLayerKernel() = default;
-
- /** Set the input and output tensors.
- *
- * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/S32/F16/F32.
- * @param[in] prev_output Destination tensor of the previous iterations of @ref CLArgMinMaxLayerKernel. Data types supported: U32/S32
- * Has to be nullptr for the first iteration
- * @param[out] output Destination tensor. Data types supported: U32/S32
- * Output will have the same number of dimensions as input.
- * @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1,2,3
- * @param[in] op Reduction operation to perform. Only ArgMin and ArgMax are supported.
- */
- void configure(const ICLTensor *input, const ICLTensor *prev_output, ICLTensor *output, unsigned int axis, ReductionOperation op);
- /** Set the input and output tensors.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/S32/F16/F32.
- * @param[in] prev_output Destination tensor of the previous iterations of @ref CLArgMinMaxLayerKernel. Data types supported: U32/S32
- * Has to be nullptr for the first iteration
- * @param[out] output Destination tensor. Data types supported: U32/S32
- * Output will have the same number of dimensions as input.
- * @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1,2,3
- * @param[in] op Reduction operation to perform. Only ArgMin and ArgMax are supported.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *prev_output, ICLTensor *output, unsigned int axis, ReductionOperation op);
-
- /** Static function to check if given info will lead to a valid configuration of @ref CLArgMinMaxLayerKernel.
- *
- * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/S32/F16/F32.
- * @param[in] prev_output Destination tensor info of the previous iterations. Data types supported: U32/S32
- * Has to be nullptr for the first iteration
- * @param[in] output Destination tensor info. Data types supported: U32/S32
- * Output will have the same number of dimensions as input.
- * @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1,2,3
- * @param[in] op Reduction operation to perform. Only ArgMin and ArgMax are supported.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *prev_output, const ITensorInfo *output, unsigned int axis, ReductionOperation op);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- const ICLTensor *_prev_output;
- ICLTensor *_output;
- unsigned int _reduction_axis;
- ReductionOperation _op;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLARGMINMAXLAYERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2019-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef ARM_COMPUTE_CLBATCHCONCATENATEKERNEL_H
-#define ARM_COMPUTE_CLBATCHCONCATENATEKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the batch concatenate kernel.
- * The input tensor will be concatenated into the output tensor.
- */
-class CLBatchConcatenateLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLBatchConcatenateLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLBatchConcatenateLayerKernel(const CLBatchConcatenateLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLBatchConcatenateLayerKernel &operator=(const CLBatchConcatenateLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLBatchConcatenateLayerKernel(CLBatchConcatenateLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLBatchConcatenateLayerKernel &operator=(CLBatchConcatenateLayerKernel &&) = default;
- /** Default destructor */
- ~CLBatchConcatenateLayerKernel() = default;
- /** Initialise the kernel's inputs and output
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input tensor. Data types supported: All.
- * @param[in] batch_offset The offset on axis # 3.
- * @param[in,out] output Output tensor. Data types supported: Same as @p input.
- *
- * @note: The output tensor's low two dimensions can't be smaller than the input one's.
- * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2.
- *
- */
- void configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int batch_offset, ITensorInfo *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLBatchConcatenateLayerKernel
- *
- * @param[in] input Input tensor info. Data types supported: All.
- * @param[in] batch_offset The offset on axis # 3.
- * @param[in] output Output tensor info. Data types supported: Same as @p input.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, unsigned int batch_offset, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
-
-private:
- unsigned int _batch_offset;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLBATCHCONCATENATEKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLBATCHNORMALIZATIONLAYERKERNEL_H
-#define ARM_COMPUTE_CLBATCHNORMALIZATIONLAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the BatchNormalization layer kernel.
- */
-class CLBatchNormalizationLayerKernel : public ICLKernel
-{
-public:
- /** Constructor */
- CLBatchNormalizationLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLBatchNormalizationLayerKernel(const CLBatchNormalizationLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLBatchNormalizationLayerKernel &operator=(const CLBatchNormalizationLayerKernel &) = delete;
- /** Default Move Constructor. */
- CLBatchNormalizationLayerKernel(CLBatchNormalizationLayerKernel &&) = default;
- /** Default move assignment operator */
- CLBatchNormalizationLayerKernel &operator=(CLBatchNormalizationLayerKernel &&) = default;
- /** Default destructor */
- ~CLBatchNormalizationLayerKernel() = default;
-
- /** Set the input and output tensors.
- *
- * @note If the output tensor is a nullptr, the batch normalization function will be performed in-place
- *
- * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result.
- * 3 lower dimensions represent a single input with dimensions [width, height, FM].
- * The rest are optional and used for representing batches. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC.
- * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input
- * @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
- * @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
- * @param[in] beta (Optional) Beta values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for beta is 0. Data types supported: Same as @p input
- * @param[in] gamma (Optional) Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for gamma is 1. Data types supported: Same as @p input
- * @param[in] epsilon (Optional) Small value to avoid division with zero. Default value is 0.001f.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
- */
- void configure(ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta = nullptr, const ICLTensor *gamma = nullptr, float epsilon = 0.001f,
- ActivationLayerInfo act_info = ActivationLayerInfo());
- /** Set the input and output tensors.
- *
- * @note If the output tensor is a nullptr, the batch normalization function will be performed in-place
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result.
- * 3 lower dimensions represent a single input with dimensions [width, height, FM].
- * The rest are optional and used for representing batches. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC.
- * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input
- * @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
- * @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
- * @param[in] beta (Optional) Beta values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for beta is 0. Data types supported: Same as @p input
- * @param[in] gamma (Optional) Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for gamma is 1. Data types supported: Same as @p input
- * @param[in] epsilon (Optional) Small value to avoid division with zero. Default value is 0.001f.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
- */
- void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta = nullptr,
- const ICLTensor *gamma = nullptr, float epsilon = 0.001f, ActivationLayerInfo act_info = ActivationLayerInfo());
- /** Static function to check if given info will lead to a valid configuration of @ref CLBatchNormalizationLayerKernel
- *
- * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result.
- * 3 lower dimensions represent a single input with dimensions [width, height, FM].
- * The rest are optional and used for representing batches. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC.
- * @param[in] output Destination tensor info. Output will have the same number of dimensions as input. Data type supported: same as @p input
- * @param[in] mean Mean values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
- * @param[in] var Variance values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
- * @param[in] beta (Optional) Beta values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for beta is 0. Data types supported: Same as @p input
- * @param[in] gamma (Optional) Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for gamma is 1. Data types supported: Same as @p input
- * @param[in] epsilon (Optional) Small value to avoid division with zero. Default value is 0.001f.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output,
- const ITensorInfo *mean, const ITensorInfo *var,
- const ITensorInfo *beta = nullptr, const ITensorInfo *gamma = nullptr,
- float epsilon = 0.001f, ActivationLayerInfo act_info = ActivationLayerInfo());
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- ICLTensor *_input;
- ICLTensor *_output;
- const ICLTensor *_mean;
- const ICLTensor *_var;
- const ICLTensor *_beta;
- const ICLTensor *_gamma;
- float _epsilon;
- bool _run_in_place;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLBATCHNORMALIZATIONLAYERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLBATCHTOSPACELAYERKERNEL_H
-#define ARM_COMPUTE_CLBATCHTOSPACELAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the batch to space kernel */
-class CLBatchToSpaceLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLBatchToSpaceLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLBatchToSpaceLayerKernel(const CLBatchToSpaceLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLBatchToSpaceLayerKernel &operator=(const CLBatchToSpaceLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLBatchToSpaceLayerKernel(CLBatchToSpaceLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLBatchToSpaceLayerKernel &operator=(CLBatchToSpaceLayerKernel &&) = default;
- /** Default destructor */
- ~CLBatchToSpaceLayerKernel() = default;
- /** Initialise the kernel's inputs and output.
- *
- * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
- * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32
- * @param[out] output Tensor output. Data types supported: same as @p input
- */
- void configure(const ICLTensor *input, const ICLTensor *block_shape, ICLTensor *output);
- /** Initialise the kernel's inputs and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
- * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32
- * @param[out] output Tensor output. Data types supported: same as @p input
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *block_shape, ICLTensor *output);
- /** Initialise the kernel's inputs and output (Static block shape).
- *
- * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
- * @param[in] block_shape_x Block shape x value.
- * @param[in] block_shape_y Block shape y value.
- * @param[out] output Tensor output. Data types supported: same as @p input
- */
- void configure(const ICLTensor *input, const int32_t block_shape_x, const int32_t block_shape_y, ICLTensor *output);
- /** Initialise the kernel's inputs and output (Static block shape).
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
- * @param[in] block_shape_x Block shape x value.
- * @param[in] block_shape_y Block shape y value.
- * @param[out] output Tensor output. Data types supported: same as @p input
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const int32_t block_shape_x, const int32_t block_shape_y, ICLTensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLBatchToSpaceLayerKernel
- *
- * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
- * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32
- * @param[in] output Tensor output. Data types supported: same as @p input
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLBatchToSpaceLayerKernel (Static block shape).
- *
- * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
- * @param[in] block_shape_x Block shape x value.
- * @param[in] block_shape_y Block shape y value.
- * @param[in] output Tensor output. Data types supported: same as @p input
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const int32_t block_shape_x, const int32_t block_shape_y, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input; /**< Source tensor */
- const ICLTensor *_block_shape; /**< Block shape tensor */
- ICLTensor *_output; /**< Destination tensor */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLBATCHTOSPACELAYERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLBITWISEANDKERNEL_H
-#define ARM_COMPUTE_CLBITWISEANDKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the bitwise AND operation kernel.
- *
- * Result is computed by:
- * @f[ output(x,y) = input1(x,y) \land input2(x,y) @f]
- */
-class CLBitwiseAndKernel : public ICLKernel
-{
-public:
- /** Default constructor. */
- CLBitwiseAndKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLBitwiseAndKernel(const CLBitwiseAndKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLBitwiseAndKernel &operator=(const CLBitwiseAndKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLBitwiseAndKernel(CLBitwiseAndKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLBitwiseAndKernel &operator=(CLBitwiseAndKernel &&) = default;
- /** Set the inputs and output images
- *
- * @param[in] input1 Source tensor. Data types supported: U8.
- * @param[in] input2 Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor. Data types supported: U8.
- */
- void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
- /** Set the inputs and output images
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input1 Source tensor. Data types supported: U8.
- * @param[in] input2 Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor. Data types supported: U8.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input1; /**< Source tensor 1 */
- const ICLTensor *_input2; /**< Source tensor 2 */
- ICLTensor *_output; /**< Destination tensor */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLBITWISEANDKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLBITWISENOTKERNEL_H
-#define ARM_COMPUTE_CLBITWISENOTKERNEL_H
-
-#include "arm_compute/core/CL/ICLSimple2DKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the bitwise NOT operation kernel.
- *
- * Result is computed by:
- * @f[ output(x,y) = \lnot input(x,y) @f]
- */
-class CLBitwiseNotKernel : public ICLSimple2DKernel
-{
-public:
- /** Set the inputs and output images.
- *
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor. Data types supported: U8.
- */
- void configure(const ICLTensor *input, ICLTensor *output);
- /** Set the inputs and output images.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor. Data types supported: U8.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLBITWISENOTKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLBITWISEORKERNEL_H
-#define ARM_COMPUTE_CLBITWISEORKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the bitwise OR operation kernel.
- *
- * Result is computed by:
- * @f[ output(x,y) = input1(x,y) \lor input2(x,y) @f]
- */
-class CLBitwiseOrKernel : public ICLKernel
-{
-public:
- /** Default constructor. */
- CLBitwiseOrKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLBitwiseOrKernel(const CLBitwiseOrKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLBitwiseOrKernel &operator=(const CLBitwiseOrKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLBitwiseOrKernel(CLBitwiseOrKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLBitwiseOrKernel &operator=(CLBitwiseOrKernel &&) = default;
- /** Set the inputs and output images
- *
- * @param[in] input1 Source tensor. Data types supported: U8.
- * @param[in] input2 Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor. Data types supported: U8.
- */
- void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
- /** Set the inputs and output images
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input1 Source tensor. Data types supported: U8.
- * @param[in] input2 Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor. Data types supported: U8.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input1; /**< Source tensor 1 */
- const ICLTensor *_input2; /**< Source tensor 2 */
- ICLTensor *_output; /**< Destination tensor */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLBITWISEORKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLBITWISEXORKERNEL_H
-#define ARM_COMPUTE_CLBITWISEXORKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the bitwise XOR operation kernel.
- *
- * Result is computed by:
- * @f[ output(x,y) = input1(x,y) \oplus input2(x,y) @f]
- */
-class CLBitwiseXorKernel : public ICLKernel
-{
-public:
- /** Default constructor. */
- CLBitwiseXorKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLBitwiseXorKernel(const CLBitwiseXorKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLBitwiseXorKernel &operator=(const CLBitwiseXorKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLBitwiseXorKernel(CLBitwiseXorKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLBitwiseXorKernel &operator=(CLBitwiseXorKernel &&) = default;
- /** Set the inputs and output images
- *
- * @param[in] input1 Source tensor. Data types supported: U8.
- * @param[in] input2 Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor. Data types supported: U8.
- */
- void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
- /** Set the inputs and output images
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input1 Source tensor. Data types supported: U8.
- * @param[in] input2 Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor. Data types supported: U8.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input1; /**< Source tensor 1 */
- const ICLTensor *_input2; /**< Source tensor 2 */
- ICLTensor *_output; /**< Destination tensor */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLBITWISEXORKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLBOUNDINGBOXTRANSFORMKERNEL_H
-#define ARM_COMPUTE_CLBOUNDINGBOXTRANSFORMKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the bounding box kernel */
-class CLBoundingBoxTransformKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLBoundingBoxTransformKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLBoundingBoxTransformKernel(const CLBoundingBoxTransformKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLBoundingBoxTransformKernel &operator=(const CLBoundingBoxTransformKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLBoundingBoxTransformKernel(CLBoundingBoxTransformKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLBoundingBoxTransformKernel &operator=(CLBoundingBoxTransformKernel &&) = default;
- /** Default destructor */
- ~CLBoundingBoxTransformKernel() = default;
-
- /** Set the input and output tensors.
- *
- * @param[in] boxes Source tensor. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32.
- * @param[out] pred_boxes Destination tensor. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input
- * @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes.
- * Data types supported: QASYMM8 if @p input is QASYMM16, otherwise same as @p input
- * @param[in] info Contains BoundingBox operation information described in @ref BoundingBoxTransformInfo.
- *
- * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct.
- *
- */
- void configure(const ICLTensor *boxes, ICLTensor *pred_boxes, const ICLTensor *deltas, const BoundingBoxTransformInfo &info);
- /** Set the input and output tensors.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] boxes Source tensor. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32.
- * @param[out] pred_boxes Destination tensor. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input
- * @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes.
- * Data types supported: QASYMM8 if @p input is QASYMM16, otherwise same as @p input
- * @param[in] info Contains BoundingBox operation information described in @ref BoundingBoxTransformInfo.
- *
- * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct.
- *
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *boxes, ICLTensor *pred_boxes, const ICLTensor *deltas, const BoundingBoxTransformInfo &info);
-
- /** Static function to check if given info will lead to a valid configuration of @ref CLBoundingBoxTransform
- *
- * @param[in] boxes Source tensor info. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32.
- * @param[in] pred_boxes Destination tensor info. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input
- * @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes.
- * Data types supported: QASYMM8 if @p input is QASYMM16, otherwise same as @p input
- * @param[in] info Contains BoundingBox operation information described in @ref BoundingBoxTransformInfo.
- *
- * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct.
- *
- * @return a Status
- */
- static Status validate(const ITensorInfo *boxes, const ITensorInfo *pred_boxes, const ITensorInfo *deltas, const BoundingBoxTransformInfo &info);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_boxes;
- ICLTensor *_pred_boxes;
- const ICLTensor *_deltas;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLBOUNDINGBOXTRANSFORMKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLBOX3X3KERNEL_H
-#define ARM_COMPUTE_CLBOX3X3KERNEL_H
-
-#include "arm_compute/core/CL/ICLSimple2DKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the box 3x3 filter kernel.
- *
- */
-class CLBox3x3Kernel : public ICLSimple2DKernel
-{
-public:
- /**Initialise the kernel's input and output.
- *
- * @param[in] input An input tensor. Data types supported: U8
- * @param[out] output The output tensor. Data types supported: U8.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined);
- /**Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input An input tensor. Data types supported: U8
- * @param[out] output The output tensor. Data types supported: U8.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined);
-
- //Inherited methods overriden:
- BorderSize border_size() const override;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLBOX3X3KERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLCANNYEDGEKERNEL_H
-#define ARM_COMPUTE_CLCANNYEDGEKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to perform Gradient computation.
- */
-class CLGradientKernel : public ICLKernel
-{
-public:
- /** Constructor */
- CLGradientKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGradientKernel(const CLGradientKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGradientKernel &operator=(const CLGradientKernel &) = delete;
- /** Initialise the kernel's sources, destinations and border mode.
- *
- * @note gx, gy and mag must all be the same size (either 16 or 32).
- *
- * @param[in] gx Source tensor - Gx component. Data types supported: S16/S32.
- * @param[in] gy Source tensor - Gy component. Data types supported: Same as gx.
- * @param[out] magnitude Destination tensor - Magnitude. Data types supported: U16/U32. Must match the pixel size of gx, gy.
- * @param[out] phase Destination tensor - Quantized phase. Data types supported: U8.
- * @param[in] norm_type Normalization type. if 1, L1-Norm otherwise L2-Norm.
- */
- void configure(const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase, int32_t norm_type);
- /** Initialise the kernel's sources, destinations and border mode.
- *
- * @note gx, gy and mag must all be the same size (either 16 or 32).
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] gx Source tensor - Gx component. Data types supported: S16/S32.
- * @param[in] gy Source tensor - Gy component. Data types supported: Same as gx.
- * @param[out] magnitude Destination tensor - Magnitude. Data types supported: U16/U32. Must match the pixel size of gx, gy.
- * @param[out] phase Destination tensor - Quantized phase. Data types supported: U8.
- * @param[in] norm_type Normalization type. if 1, L1-Norm otherwise L2-Norm.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase, int32_t norm_type);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_gx; /**< Source tensor - Gx component */
- const ICLTensor *_gy; /**< Source tensor - Gy component */
- ICLTensor *_magnitude; /**< Destination tensor - Magnitude */
- ICLTensor *_phase; /**< Destination tensor - Quantized phase */
-};
-
-/** OpenCL kernel to perform Non-Maxima suppression for Canny Edge.
- *
- * @note This kernel is meant to be used alongside CannyEdge and performs a non-maxima suppression using magnitude and phase of input
- * to characterize points as possible edges. The output buffer needs to be cleared before this kernel is executed.
- *
- * @note Hysteresis is computed in @ref CLEdgeTraceKernel
- */
-class CLEdgeNonMaxSuppressionKernel : public ICLKernel
-{
-public:
- /** Constructor */
- CLEdgeNonMaxSuppressionKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLEdgeNonMaxSuppressionKernel(const CLEdgeNonMaxSuppressionKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLEdgeNonMaxSuppressionKernel &operator=(const CLEdgeNonMaxSuppressionKernel &) = delete;
- /** Initialise the kernel's sources, destination and border mode.
- *
- * @param[in] magnitude Source tensor - Magnitude. Data types supported: U16/U32.
- * @param[in] phase Source tensor - Quantized phase. Data types supported: U8.
- * @param[out] output Destination tensor. Data types supported: U16/U32.
- * @param[in] lower_thr Lower threshold.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *magnitude, const ICLTensor *phase, ICLTensor *output, int32_t lower_thr, bool border_undefined);
- /** Initialise the kernel's sources, destination and border mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] magnitude Source tensor - Magnitude. Data types supported: U16/U32.
- * @param[in] phase Source tensor - Quantized phase. Data types supported: U8.
- * @param[out] output Destination tensor. Data types supported: U16/U32.
- * @param[in] lower_thr Lower threshold.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *magnitude, const ICLTensor *phase, ICLTensor *output, int32_t lower_thr, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-private:
- const ICLTensor *_magnitude; /**< Source tensor - Magnitude. */
- const ICLTensor *_phase; /**< Source tensor - Quantized phase. */
- ICLTensor *_output; /**< Destination tensor. */
-};
-
-/** OpenCL kernel to perform Edge tracing.
- */
-class CLEdgeTraceKernel : public ICLKernel
-{
-public:
- /** Constructor */
- CLEdgeTraceKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLEdgeTraceKernel(const CLEdgeTraceKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLEdgeTraceKernel &operator=(const CLEdgeTraceKernel &) = delete;
- /** Initialise the kernel's source, destination and border mode.
- *
- * @param[in] input Source tensor. Data types supported: U16/U32.
- * @param[out] output Destination tensor. Data types supported: U8.
- * @param[in] upper_thr Upper threshold used for the hysteresis
- * @param[in] lower_thr Lower threshold used for the hysteresis
- * @param[in,out] visited Tensor for keeping the visited pixels. Data types supported: U32.
- * Expected to be initialized to 0 before each run.
- * @param[in,out] recorded Tensor for keeping the recorded pixels. Data types supported: U32
- * Expected to be initialized to 0 before each run.
- * @param[in,out] l1_stack Tensor with the L1 stack for each pixel. Data types supported: S32.
- * Expected to be initialized to 0 before each run.
- * @param[in,out] l1_stack_counter Tensor for counting the elements in the L1 stack of each pixel. Data types supported: U8.
- * Expected to be initialized to 0 before each run.
- */
- void configure(const ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr,
- ICLTensor *visited, ICLTensor *recorded, ICLTensor *l1_stack, ICLTensor *l1_stack_counter);
- /** Initialise the kernel's source, destination and border mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U16/U32.
- * @param[out] output Destination tensor. Data types supported: U8.
- * @param[in] upper_thr Upper threshold used for the hysteresis
- * @param[in] lower_thr Lower threshold used for the hysteresis
- * @param[in,out] visited Tensor for keeping the visited pixels. Data types supported: U32.
- * Expected to be initialized to 0 before each run.
- * @param[in,out] recorded Tensor for keeping the recorded pixels. Data types supported: U32
- * Expected to be initialized to 0 before each run.
- * @param[in,out] l1_stack Tensor with the L1 stack for each pixel. Data types supported: S32.
- * Expected to be initialized to 0 before each run.
- * @param[in,out] l1_stack_counter Tensor for counting the elements in the L1 stack of each pixel. Data types supported: U8.
- * Expected to be initialized to 0 before each run.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr,
- ICLTensor *visited, ICLTensor *recorded, ICLTensor *l1_stack, ICLTensor *l1_stack_counter);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input; /**< Source tensor. */
- ICLTensor *_output; /**< Destination tensor. */
- int32_t _lower_thr; /**< Lower threshold used for the hysteresis. */
- int32_t _upper_thr; /**< Upper threshold used for the hysteresis. */
- ICLTensor *_visited; /**< Marks visited elements */
- ICLTensor *_recorded; /**< Marks recorded elements */
- ICLTensor *_l1_stack; /**< L1 hysteris stack */
- ICLTensor *_l1_stack_counter; /**< L1 hysteris stack counter */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLCANNYEDGEKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLCHANNELCOMBINEKERNEL_H
-#define ARM_COMPUTE_CLCHANNELCOMBINEKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-#include <array>
-#include <cstdint>
-
-namespace arm_compute
-{
-class ICLMultiImage;
-class ICLTensor;
-using ICLImage = ICLTensor;
-
-/** Interface for the channel combine kernel */
-class CLChannelCombineKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLChannelCombineKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLChannelCombineKernel(const CLChannelCombineKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLChannelCombineKernel &operator=(const CLChannelCombineKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLChannelCombineKernel(CLChannelCombineKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLChannelCombineKernel &operator=(CLChannelCombineKernel &&) = default;
- /** Default destructor */
- ~CLChannelCombineKernel() = default;
- /** Configure function's inputs and outputs.
- *
- * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format.
- * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format.
- * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format.
- * @param[in] plane3 The 2D plane that forms channel 3. Must be of U8 format.
- * @param[out] output The single planar output tensor. Supported formats: RGB888/RGBA8888/YUYV422/UYVY422.
- */
- void configure(const ICLTensor *plane0, const ICLTensor *plane1, const ICLTensor *plane2, const ICLTensor *plane3, ICLTensor *output);
- /** Configure function's inputs and outputs.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format.
- * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format.
- * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format.
- * @param[in] plane3 The 2D plane that forms channel 3. Must be of U8 format.
- * @param[out] output The single planar output tensor.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *plane0, const ICLTensor *plane1, const ICLTensor *plane2, const ICLTensor *plane3, ICLTensor *output);
- /** Configure function's inputs and outputs.
- *
- * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format.
- * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format.
- * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format.
- * @param[out] output The multi planar output tensor. Supported formats: RGB888/RGBA8888/YUYV422/UYVY422.
- */
- void configure(const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output);
- /** Configure function's inputs and outputs.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format.
- * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format.
- * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format.
- * @param[out] output The multi planar output tensor. Supported formats: RGB888/RGBA8888/YUYV422/UYVY422.
- */
- void configure(const CLCompileContext &compile_context, const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- std::array<const ICLTensor *, 4> _planes;
- ICLTensor *_output;
- ICLMultiImage *_output_multi;
- std::array<uint32_t, 3> _x_subsampling;
- std::array<uint32_t, 3> _y_subsampling;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLCHANNELCOMBINEKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLCHANNELEXTRACTKERNEL_H
-#define ARM_COMPUTE_CLCHANNELEXTRACTKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ICLMultiImage;
-class ICLTensor;
-using ICLImage = ICLTensor;
-
-/** Interface for the channel extract kernel */
-class CLChannelExtractKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLChannelExtractKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLChannelExtractKernel(const CLChannelExtractKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLChannelExtractKernel &operator=(const CLChannelExtractKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLChannelExtractKernel(CLChannelExtractKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLChannelExtractKernel &operator=(CLChannelExtractKernel &&) = default;
- /** Default destructor */
- ~CLChannelExtractKernel() = default;
- /** Set the input and output of the kernel
- *
- * @param[in] input Source tensor. Formats supported: RGB888/RGBA8888/YUYV422/UYVY422
- * @param[in] channel Channel to extract.
- * @param[out] output Destination tensor. Must be of U8 format.
- */
- void configure(const ICLTensor *input, Channel channel, ICLTensor *output);
- /** Set the input and output of the kernel
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Formats supported: RGB888/RGBA8888/YUYV422/UYVY422
- * @param[in] channel Channel to extract.
- * @param[out] output Destination tensor. Must be of U8 format.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, Channel channel, ICLTensor *output);
- /** Set the input and output of the kernel
- *
- * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV/YUV444
- * @param[in] channel Channel to extract.
- * @param[out] output Single-planar 2D destination image. Must be of U8 format.
- */
- void configure(const ICLMultiImage *input, Channel channel, ICLImage *output);
- /** Set the input and output of the kernel
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV/YUV444
- * @param[in] channel Channel to extract.
- * @param[out] output Single-planar 2D destination image. Must be of U8 format.
- */
- void configure(const CLCompileContext &compile_context, const ICLMultiImage *input, Channel channel, ICLImage *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
- uint32_t _num_elems_processed_per_iteration;
- uint32_t _subsampling;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLCHANNELEXTRACTKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLCHANNELSHUFFLELAYERKERNEL_H
-#define ARM_COMPUTE_CLCHANNELSHUFFLELAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the channel shuffle kernel */
-class CLChannelShuffleLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLChannelShuffleLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLChannelShuffleLayerKernel(const CLChannelShuffleLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLChannelShuffleLayerKernel &operator=(const CLChannelShuffleLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLChannelShuffleLayerKernel(CLChannelShuffleLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLChannelShuffleLayerKernel &operator=(CLChannelShuffleLayerKernel &&) = default;
- /** Default destructor */
- ~CLChannelShuffleLayerKernel() = default;
- /** Configure function's inputs and outputs.
- *
- * @param[in] input Input tensor. Data types supported: All.
- * @param[out] output Output tensor. Data type supported: Same as @p input
- * @param[in] num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups.
- */
- void configure(const ICLTensor *input, ICLTensor *output, unsigned int num_groups);
- /** Configure function's inputs and outputs.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input tensor. Data types supported: All.
- * @param[out] output Output tensor. Data type supported: Same as @p input
- * @param[in] num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, unsigned int num_groups);
- /** Static function to check if given info will lead to a valid configuration of @ref CLChannelShuffleLayerKernel
- *
- * @param[in] input Input tensor info. Data types supported: All.
- * @param[in] output Output tensor info. Data type supported: Same as @p input
- * @param[in] num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int num_groups);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLCHANNELSHUFFLELAYERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLCOL2IMKERNEL_H
-#define ARM_COMPUTE_CLCOL2IMKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the col2im reshaping kernel.
- *
- * Rearranges each matrix column into image blocks. It's the inverse operation of @ref CLIm2ColKernel.
- *
- * For example, a vector of 9 elements can be reshaped to a block(image) of 3x3:
- *
- * @f[
- * \left( \begin{array}{ccccccccc}
- * a0 & a1 & a2 & a3 & a4 & a5 & a6 & a7 & a8 \\
- * \end{array} \right)
- * \rightarrow
- * \left( \begin{array}{ccc}
- * a0 & a1 & a2 \\
- * a3 & a4 & a5 \\
- * a6 & a7 & a8 \\
- * \end{array} \right)
- * @f]
- */
-class CLCol2ImKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLCol2ImKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLCol2ImKernel(const CLCol2ImKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLCol2ImKernel &operator=(const CLCol2ImKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLCol2ImKernel(CLCol2ImKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLCol2ImKernel &operator=(CLCol2ImKernel &&) = default;
- /** Default destructor */
- ~CLCol2ImKernel() = default;
- /** Set the input and output of the kernel.
- *
- * @param[in] input The input tensor to convert. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
- * @param[out] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM],
- * while the rest represent batch of outputs. Data types supported: Same as @p input. Data layout: NCHW
- * @param[in] convolved_dims Output convolved dimensions.
- * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution
- */
- void configure(const ICLTensor *input, ICLTensor *output, const Size2D &convolved_dims, unsigned int num_groups = 1);
- /** Set the input and output of the kernel.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input The input tensor to convert. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
- * @param[out] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM],
- * while the rest represent batch of outputs. Data types supported: Same as @p input. Data layout: NCHW
- * @param[in] convolved_dims Output convolved dimensions.
- * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Size2D &convolved_dims, unsigned int num_groups = 1);
- /** Static function to check if given info will lead to a valid configuration of @ref CLCol2ImKernel
- *
- * @param[in] input The input tensor to convert. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
- * @param[in] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM],
- * while the rest represent batch of outputs. Data types supported: Same as @p input. Data layout: NCHW
- * @param[in] convolved_dims Output convolved dimensions.
- * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &convolved_dims, unsigned int num_groups = 1);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-public:
- const ICLTensor *_input;
- ICLTensor *_output;
- Size2D _convolved_dims;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLCOL2IMKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLCOLORCONVERTKERNEL_H
-#define ARM_COMPUTE_CLCOLORCONVERTKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLMultiImage;
-class ICLTensor;
-using ICLImage = ICLTensor;
-
-/** Interface for the color convert kernel.
- *
- */
-class CLColorConvertKernel : public ICLKernel
-{
-public:
- /** Default constructor. */
- CLColorConvertKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLColorConvertKernel(const CLColorConvertKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLColorConvertKernel &operator=(const CLColorConvertKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLColorConvertKernel(CLColorConvertKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLColorConvertKernel &operator=(CLColorConvertKernel &&) = default;
- /** Default destructor. */
- ~CLColorConvertKernel() = default;
-
- /** Set the input and output of the kernel
- *
- * @param[in] input Source tensor. Formats supported: RGBA8888/UYVY422/YUYV422/RGB888
- * @param[out] output Destination tensor. Formats supported: RGB888 (if the formats of @p input are RGBA8888/UYVY422/YUYV422),
- * RGBA8888 (if the formats of @p input are UYVY422/YUYV422/RGB888/),
- * U8 (if the formats of @p input is RGB888)
- */
- void configure(const ICLTensor *input, ICLTensor *output);
- /** Set the input and output of the kernel
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Formats supported: RGBA8888/UYVY422/YUYV422/RGB888
- * @param[out] output Destination tensor. Formats supported: RGB888 (if the formats of @p input are RGBA8888/UYVY422/YUYV422),
- * RGBA8888 (if the formats of @p input are UYVY422/YUYV422/RGB888/),
- * U8 (if the formats of @p input is RGB888)
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
- /** Set the input and output of the kernel
- *
- * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV
- * @param[out] output Single-planar destination image. Formats supported: RGB888/RGBA8888
- */
- void configure(const ICLMultiImage *input, ICLImage *output);
- /** Set the input and output of the kernel
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV
- * @param[out] output Single-planar destination image. Formats supported: RGB888/RGBA8888
- */
- void configure(const CLCompileContext &compile_context, const ICLMultiImage *input, ICLImage *output);
- /** Set the input and output of the kernel
- *
- * @param[in] input Single-planar source image. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422
- * @param[out] output Multi-planar destination image. Formats supported: NV12/IYUV/YUV444 (if the formats of @p input are RGB888/RGB8888)
- */
- void configure(const ICLImage *input, ICLMultiImage *output);
- /** Set the input and output of the kernel
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Single-planar source image. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422
- * @param[out] output Multi-planar destination image. Formats supported: NV12/IYUV/YUV444 (if the formats of @p input are RGB888/RGB8888)
- */
- void configure(const CLCompileContext &compile_context, const ICLImage *input, ICLMultiImage *output);
- /** Set the input and output of the kernel
- *
- * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV
- * @param[out] output Multi-planar destination image. Formats supported: YUV444/IYUV (if the formats of @p input are NV12/NV21)/NV12 (if the format of @p input is IYUV)
- */
- void configure(const ICLMultiImage *input, ICLMultiImage *output);
- /** Set the input and output of the kernel
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV
- * @param[out] output Multi-planar destination image. Formats supported: YUV444/IYUV (if the formats of @p input are NV12/NV21)/NV12 (if the format of @p input is IYUV)
- */
- void configure(const CLCompileContext &compile_context, const ICLMultiImage *input, ICLMultiImage *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input; /*pointer to single planar tensor input */
- ICLTensor *_output; /*pointer to single planar tensor output */
- const ICLMultiImage *_multi_input; /*pointer to multi-planar input */
- ICLMultiImage *_multi_output; /*pointer to multi-planar output */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLCOLORCONVERTKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLCOMPARISONKERNEL_H
-#define ARM_COMPUTE_CLCOMPARISONKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ICLTensor;
-
-/** Interface for the comparison kernel. */
-class CLComparisonKernel : public ICLKernel
-{
-public:
- /** Default constructor. */
- CLComparisonKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLComparisonKernel(const CLComparisonKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLComparisonKernel &operator=(const CLComparisonKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLComparisonKernel(CLComparisonKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLComparisonKernel &operator=(CLComparisonKernel &&) = default;
- /** Default destructor */
- ~CLComparisonKernel() = default;
- /** Set the inputs and output tensors
- *
- * @param[in] input1 Source tensor. Data types supported: All.
- * @param[in] input2 Source tensor. Data types supported: Same as @p input1.
- * @param[out] output Destination tensor. Data types supported: U8.
- * @param[in] operation Comparison operation to use.
- */
- void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ComparisonOperation operation);
- /** Set the inputs and output tensors
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input1 Source tensor. Data types supported: All.
- * @param[in] input2 Source tensor. Data types supported: Same as @p input1.
- * @param[out] output Destination tensor. Data types supported: U8.
- * @param[in] operation Comparison operation to use.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ComparisonOperation operation);
- /** Static function to check if given info will lead to a valid configuration of @ref CLComparisonKernel
- *
- * @param[in] input1 Source tensor. Data types supported: All.
- * @param[in] input2 Source tensor. Data types supported: Same as @p input1.
- * @param[in] output Destination tensor. Data types supported: U8.
- * @param[in] operation Comparison operation to use.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ComparisonOperation operation);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-private:
- const ICLTensor *_input1; /**< Source tensor 1 */
- const ICLTensor *_input2; /**< Source tensor 2 */
- ICLTensor *_output; /**< Destination tensor */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLCOMPARISONKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLCONVERTFULLYCONNECTEDWEIGHTSKERNEL_H
-#define ARM_COMPUTE_CLCONVERTFULLYCONNECTEDWEIGHTSKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface to convert the 2D Fully Connected weights from NCHW to NHWC or vice versa.
- *
- * @note This function can be applied to the 2D weights used by a Fully Connected layer if:
- * - It follows a Convolution layer
- * - The data layout used by the network does not match the one the model has been trained in.
- *
- * @note This function assumes the weights are already reshaped (transposed)
- */
-class CLConvertFullyConnectedWeightsKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLConvertFullyConnectedWeightsKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLConvertFullyConnectedWeightsKernel(const CLConvertFullyConnectedWeightsKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLConvertFullyConnectedWeightsKernel &operator=(const CLConvertFullyConnectedWeightsKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLConvertFullyConnectedWeightsKernel(CLConvertFullyConnectedWeightsKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLConvertFullyConnectedWeightsKernel &operator=(CLConvertFullyConnectedWeightsKernel &&) = default;
- /** Default destructor */
- ~CLConvertFullyConnectedWeightsKernel() = default;
- /** Set the input and output tensor.
- *
- * @param[in] input Source weights tensor to convert. Must be 2 dimensional. Data types supported: All.
- * @param[out] output The converted weights tensor. Shape and Data Type: Same as @p input.
- * @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer).
- * @param[in] data_layout The data layout the weights have been trained in.
- */
- void configure(const ICLTensor *input, ICLTensor *output, const TensorShape &original_input_shape, DataLayout data_layout);
- /** Set the input and output tensor.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source weights tensor to convert. Must be 2 dimensional. Data types supported: All.
- * @param[out] output The converted weights tensor. Shape and Data Type: Same as @p input.
- * @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer).
- * @param[in] data_layout The data layout the weights have been trained in.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const TensorShape &original_input_shape, DataLayout data_layout);
- /** Static function to check if given info will lead to a valid configuration of @ref CLConvertFullyConnectedWeightsKernel
- *
- * @param[in] input Source weights tensor info to convert. Must be 2 dimensional. Data types supported: All.
- * @param[in] output The converted weights tensor info. Shape and Data Type: Same as @p input.
- * @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer).
- * @param[in] data_layout The data layout the weights have been trained in.
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const TensorShape &original_input_shape, DataLayout data_layout);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLCONVERTFULLYCONNECTEDWEIGHTSKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLCONVOLUTIONKERNEL_H
-#define ARM_COMPUTE_CLCONVOLUTIONKERNEL_H
-
-#include "arm_compute/core/CL/ICLSimple2DKernel.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/****************************************************************************************\
- * Square Convolution *
-\****************************************************************************************/
-
-/** Interface for the kernel to run an arbitrary size convolution on a tensor. (Currently supports 3x3, 5x5, 7x7 and 9x9).
- * The client can supply a convolution matrix \f$ C_{m,n} \f$.
- * @f{eqnarray}{
- * k_0 &=& \frac{m}{2} \\
- * l_0 &=& \frac{n}{2} \\
- * sum &=& \sum_{k=0,l=0}^{k=m-1,l=n-1} input(x+k-k_0, y+l-l_0) C_{k,l}
- * @f}
- *
- * @note The above equation for this function is similar to the default OpenCV Filter2D function,
- * which actually computes a correlation and not a convolution.
- * In case of a real convolution the convolution matrix should be flipped both horizontally and vertically.
- */
-template <unsigned int matrix_size>
-class CLConvolutionKernel : public ICLSimple2DKernel
-{
-public:
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor, Data types supported: U8, S16.
- * @param[in] conv Convolution matrix to apply to the input tensor.
- * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined);
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor, Data types supported: U8, S16.
- * @param[in] conv Convolution matrix to apply to the input tensor.
- * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined);
-
- // Inherited methods overridden:
- BorderSize border_size() const override;
-};
-
-/** Interface for the kernel which applies a 3x3 convolution to a tensor. */
-using CLConvolution3x3Kernel = CLConvolutionKernel<3>;
-/** Interface for the kernel which applies a 5x5 convolution to a tensor. */
-using CLConvolution5x5Kernel = CLConvolutionKernel<5>;
-/** Interface for the kernel which applies a 7x7 convolution to a tensor. */
-using CLConvolution7x7Kernel = CLConvolutionKernel<7>;
-/** Interface for the kernel which applies a 9x9 convolution to a tensor. */
-using CLConvolution9x9Kernel = CLConvolutionKernel<9>;
-
-/****************************************************************************************\
- * Separable Square Convolution *
-\****************************************************************************************/
-
-/** Kernel for the Horizontal pass of a Separable Convolution. Currently support 5x5, 7x7, 9x9 */
-template <unsigned int matrix_size>
-class CLSeparableConvolutionHorKernel : public ICLSimple2DKernel
-{
-public:
- /** Default Constructor */
- CLSeparableConvolutionHorKernel();
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor, Data types supported: S16.
- * @param[in] conv Convolution matrix to apply to the input tensor.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, bool border_undefined);
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor, Data types supported: U16/S16/S32.
- * @param[in] conv Convolution matrix to apply to the input tensor.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, bool border_undefined);
-
- // Inherited methods overridden:
- BorderSize border_size() const override;
-
-private:
- BorderSize _border_size; /**< Border size */
-};
-
-/** Interface for the kernel which applies a horizontal pass of 5x5 convolution to a tensor. */
-using CLSeparableConvolution5x5HorKernel = CLSeparableConvolutionHorKernel<5>;
-/** Interface for the kernel which applies a horizontal pass of 7x7 convolution to a tensor. */
-using CLSeparableConvolution7x7HorKernel = CLSeparableConvolutionHorKernel<7>;
-/** Interface for the kernel which applies a horizontal pass of 9x9 convolution to a tensor. */
-using CLSeparableConvolution9x9HorKernel = CLSeparableConvolutionHorKernel<9>;
-
-/** Kernel for the Vertical pass of a Separable Convolution. Currently supports 5x5, 7x7, 9x9 */
-template <unsigned int matrix_size>
-class CLSeparableConvolutionVertKernel : public ICLSimple2DKernel
-{
-public:
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] input Source tensor. Data types supported: U16/S16/S32.
- * @param[out] output Destination tensor, Data types supported: U8, S16.
- * @param[in] conv Convolution matrix to apply to the input tensor.
- * @param[in] scale Scale of the convolution matrix.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- * @param[in] data_type Data type to use for intermeidate result. @sa data_type_for_convolution
- */
- void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined, DataType data_type = DataType::S32);
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U16/S16/S32.
- * @param[out] output Destination tensor, Data types supported: U8, S16.
- * @param[in] conv Convolution matrix to apply to the input tensor.
- * @param[in] scale Scale of the convolution matrix.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- * @param[in] data_type Data type to use for intermeidate result. @sa data_type_for_convolution
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined, DataType data_type = DataType::S32);
-
- // Inherited methods overridden:
- BorderSize border_size() const override;
-};
-
-/** Interface for the kernel which applies a vertical pass of 5x5 convolution to a tensor. */
-using CLSeparableConvolution5x5VertKernel = CLSeparableConvolutionVertKernel<5>;
-/** Interface for the kernel which applies a vertical pass of 7x7 convolution to a tensor. */
-using CLSeparableConvolution7x7VertKernel = CLSeparableConvolutionVertKernel<7>;
-/** Interface for the kernel which applies a vertical pass of 9x9 convolution to a tensor. */
-using CLSeparableConvolution9x9VertKernel = CLSeparableConvolutionVertKernel<9>;
-
-/****************************************************************************************\
- * Rectangle Convolution *
-\****************************************************************************************/
-
-/** Kernel for the running convolution on a rectangle matrix.
- *
- * @note Supports combinations of 3,5,7 and 9.
- */
-class CLConvolutionRectangleKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLConvolutionRectangleKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLConvolutionRectangleKernel(const CLConvolutionRectangleKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLConvolutionRectangleKernel &operator=(const CLConvolutionRectangleKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLConvolutionRectangleKernel(CLConvolutionRectangleKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLConvolutionRectangleKernel &operator=(CLConvolutionRectangleKernel &&) = default;
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor, Data types supported: U8, S16.
- * @param[in] conv Convolution matrix to apply to the input tensor.
- * @param[in] width Width of convolution matrix (Number of columns)
- * @param[in] height Height of convolution matrix (Number of rows)
- * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t width, uint32_t height, uint32_t scale, bool border_undefined);
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor, Data types supported: U8, S16.
- * @param[in] conv Convolution matrix to apply to the input tensor.
- * @param[in] width Width of convolution matrix (Number of columns)
- * @param[in] height Height of convolution matrix (Number of rows)
- * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t width, uint32_t height, uint32_t scale, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-private:
- BorderSize _border_size;
- const ICLTensor *_input;
- ICLTensor *_output;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLCONVOLUTIONKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLCOPYKERNEL_H
-#define ARM_COMPUTE_CLCOPYKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to perform a copy between two tensors */
-class CLCopyKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLCopyKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers). */
- CLCopyKernel(const CLCopyKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers). */
- CLCopyKernel &operator=(const CLCopyKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLCopyKernel(CLCopyKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLCopyKernel &operator=(CLCopyKernel &&) = default;
- /** Initialize the kernel's input, output.
- *
- * @param[in] input Source tensor. Data types supported: All.
- * @param[out] output Destination tensor. Data types supported: same as @p input.
- * @param[in] output_window (Optional) Window to be used in case only copying into part of a tensor. Default is nullptr.
- */
- void configure(const ICLTensor *input, ICLTensor *output, Window *output_window = nullptr);
- /** Initialize the kernel's input, output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: All.
- * @param[out] output Destination tensor. Data types supported: same as @p input.
- * @param[in] output_window (Optional) Window to be used in case only copying into part of a tensor. Default is nullptr.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, Window *output_window = nullptr);
- /** Static function to check if given info will lead to a valid configuration of @ref CLCopyKernel
- *
- * @param[in] input Source tensor info. Data types supported: All.
- * @param[in] output Destination tensor info. Data types supported: same as @p input.
- * @param[in] output_window (Optional) Window to be used in case only copying into part of a tensor. Default is nullptr.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, Window *output_window = nullptr);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
- Window _output_window;
- bool _has_output_window;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLCOPYKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2019-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLCROPKERNEL_H
-#define ARM_COMPUTE_CLCROPKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to perform a copy between two tensors */
-class CLCropKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLCropKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers). */
- CLCropKernel(const CLCropKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers). */
- CLCropKernel &operator=(const CLCropKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLCropKernel(CLCropKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLCropKernel &operator=(CLCropKernel &&) = default;
- /** Configure kernel
- *
- * @note Supported tensor rank: up to 4
- *
- * @param[in] input Source tensor. Data type supported: All. Data layouts supported: NHWC.
- * @param[out] output Destination tensor. Data type supported: F32
- * @param[in] start Coordinates of where to start cropping the image.
- * @param[in] end Coordinates of where to end cropping the image.
- * @param[in] batch_index Fourth dimension index of the 3D image to crop in @p input.
- * @param[in] extrapolation_value Value to be used for values outside of the image. Default is 0.
- * @param[in] output_window Output window to be used in case cropped image is being copied into a tensor. Default is nullptr.
- */
- void configure(const ICLTensor *input, ICLTensor *output, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value = 0, Window *output_window = nullptr);
- /** Configure kernel
- *
- * @note Supported tensor rank: up to 4
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data type supported: All. Data layouts supported: NHWC.
- * @param[out] output Destination tensor. Data type supported: F32
- * @param[in] start Coordinates of where to start cropping the image.
- * @param[in] end Coordinates of where to end cropping the image.
- * @param[in] batch_index Fourth dimension index of the 3D image to crop in @p input.
- * @param[in] extrapolation_value Value to be used for values outside of the image. Default is 0.
- * @param[in] output_window Output window to be used in case cropped image is being copied into a tensor. Default is nullptr.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value = 0,
- Window *output_window = nullptr);
-
- /** Static function to check if given info will lead to a valid configuration of @ref CLStridedSliceKernel
- *
- * @note Supported tensor rank: up to 4
- *
- * @param[in] input Source tensor info. Data type supported: All. Data layouts supported: NHWC.
- * @param[in] output Destination tensor info. Data type supported: F32
- * @param[in] start Coordinates of where to start cropping the image.
- * @param[in] end Coordinates of where to end cropping the image.
- * @param[in] batch_index Fourth dimension index of the 3D image to crop in @p input.
- * @param[in] extrapolation_value Value to be used for values outside of the image. Default is 0.
- * @param[in] output_window Output window to be used in case cropped image is being copied into a tensor. Default is nullptr.
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value = 0,
- Window *output_window = nullptr);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
- Coordinates2D _start;
- uint32_t _batch_index;
- float _extrapolation_value;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLCROPKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLDECONVOLUTIONLAYERUPSAMPLEKERNEL_H
-#define ARM_COMPUTE_CLDECONVOLUTIONLAYERUPSAMPLEKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the Deconvolution layer kernel on OpenCL.
- */
-class CLDeconvolutionLayerUpsampleKernel : public ICLKernel
-{
-public:
- /** Constructor */
- CLDeconvolutionLayerUpsampleKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDeconvolutionLayerUpsampleKernel(const CLDeconvolutionLayerUpsampleKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDeconvolutionLayerUpsampleKernel &operator=(const CLDeconvolutionLayerUpsampleKernel &) = delete;
- /** Default Move Constructor. */
- CLDeconvolutionLayerUpsampleKernel(CLDeconvolutionLayerUpsampleKernel &&) = default;
- /** Default move assignment operator */
- CLDeconvolutionLayerUpsampleKernel &operator=(CLDeconvolutionLayerUpsampleKernel &&) = default;
- /** Default destructor */
- ~CLDeconvolutionLayerUpsampleKernel() = default;
-
- /** Initialise the kernel's input and output.
- *
- * @param[in] input Source tensor. Data types supported: All.
- * @param[out] output Destination tensor. Data types supported: same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
- * @param[in] info Contains padding and stride information described in @ref PadStrideInfo.
- */
- void configure(const ICLTensor *input, ICLTensor *output, const PadStrideInfo &info);
- /** Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: All.
- * @param[out] output Destination tensor. Data types supported: same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
- * @param[in] info Contains padding and stride information described in @ref PadStrideInfo.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PadStrideInfo &info);
- /** Static function to check if given info will lead to a valid configuration of @ref CLDeconvolutionLayerUpsample
- *
- * @param[in] input Source tensor info. Data types supported: All.
- * @param[in] output Destination tensor info. Data types supported: same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
- * @param[in] info Contains padding and stride information described in @ref PadStrideInfo.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PadStrideInfo &info);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
- PadStrideInfo _info;
- DataLayout _data_layout;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLDECONVOLUTIONLAYERUPSAMPLEKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2019-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLDECONVOLUTIONLAYERRESHAPEOUTPUTKERNEL_H
-#define ARM_COMPUTE_CLDECONVOLUTIONLAYERRESHAPEOUTPUTKERNEL_H
-
-#include "arm_compute/core/CL/ICLSimpleKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the OpenCL kernel to be used for reshaping the tensor before returning the result of deconvolution.
- *
- * The input tensor to this OpenCL kernel is expected to be the result of a @ref CLGEMM operation between the Deconvolution input and the Deconvolution filter.
- *
- * The input tensor should have the following shape: [filter_width * filter_height * ofms, width, height, batch_size]
- *
- * The output tensor should have the following shape: [stride_x * (input_width - 1) + filter_width - 2 * padx, stride_y * (input_height - 1) + filter_height - 2 * pady, ofms, batch_size]
- *
- * For example, given a tensor with dimensions [4, 2, 2] this function returns a tensor with dimensions [1, 4, 4].
- *
- */
-class CLDeconvolutionReshapeOutputKernel : public ICLSimpleKernel
-{
-public:
- /** Default constructor */
- CLDeconvolutionReshapeOutputKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDeconvolutionReshapeOutputKernel(const CLDeconvolutionReshapeOutputKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDeconvolutionReshapeOutputKernel &operator=(const CLDeconvolutionReshapeOutputKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLDeconvolutionReshapeOutputKernel(CLDeconvolutionReshapeOutputKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLDeconvolutionReshapeOutputKernel &operator=(CLDeconvolutionReshapeOutputKernel &&) = default;
- /** Default destructor */
- ~CLDeconvolutionReshapeOutputKernel() = default;
-
- /** Initialise the kernel's source and destination.
- *
- * @param[in] input Input tensor. Supported data types: QASYMM8/QASYMM8_SIGNED/S32/F16/F32.
- * @param[in] bias Bias tensor to be added directly during the reshape operation. Supported data types: same as @p input. Supported data layouts: same as @p input.
- * @param[out] output Output tensor with the following shape: [stride_x * (input_width - 1) + filter_width - 2 * padx, stride_y * (input_height - 1) + filter_height - 2 * pady, ofms, batch_size]
- * Supported data types: same as @p input. Supported data layouts: same as @p input.
- * @param[in] input_info Deconvolution input tensor info. Supported data types: same as @p input. Supported data layouts: same as @p input.
- * @param[in] weights_info Deconvolution weights tensor info. Supported data types: same as @p input. Supported data layouts: same as @p input.
- * @param[in] deconv_info Contains padding and policies to be used in the deconvolution, this is described in @ref PadStrideInfo. This kernel supports only stride_x = weights.width && stride_y = weights.height. Moreover, padding is not supported.
- */
- void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const ITensorInfo *input_info, const ITensorInfo *weights_info, const PadStrideInfo &deconv_info);
- /** Initialise the kernel's source and destination.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input tensor. Supported data types: QASYMM8/QASYMM8_SIGNED/S32/F16/F32.
- * @param[in] bias Bias tensor to be added directly during the reshape operation. Supported data types: same as @p input. Supported data layouts: same as @p input.
- * @param[out] output Output tensor with the following shape: [stride_x * (input_width - 1) + filter_width - 2 * padx, stride_y * (input_height - 1) + filter_height - 2 * pady, ofms, batch_size]
- * Supported data types: same as @p input. Supported data layouts: same as @p input.
- * @param[in] input_info Deconvolution input tensor info. Supported data types: same as @p input. Supported data layouts: same as @p input.
- * @param[in] weights_info Deconvolution weights tensor info. Supported data types: same as @p input. Supported data layouts: same as @p input.
- * @param[in] deconv_info Contains padding and policies to be used in the deconvolution, this is described in @ref PadStrideInfo. This kernel supports only stride_x = weights.width && stride_y = weights.height. Moreover, padding is not supported.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const ITensorInfo *input_info, const ITensorInfo *weights_info,
- const PadStrideInfo &deconv_info);
-
- /** Static function to check if given info will lead to a valid configuration of @ref CLDeconvolutionReshapeOutputKernel.
- *
- * @param[in] input GEMM output tensor info to be reshaped. Supported data types: QASYMM8/QASYMM8_SIGNED/S32/F16/F32.
- * @param[in] bias (Optional) Optional bias tensor info to be added directly during the reshape operation. Supported data types: same as @p input. Supported data layouts: same as @p input.
- * @param[in] output Reshaped output tensor info. Supported data types: same as @p input. Supported data layouts: same as @p input.
- * @param[in] input_info Original input tensor info. Supported data types: same as @p input. Supported data layouts: same as @p input.
- * @param[in] weights_info Original weights tensor info output. Supported data types: same as @p input. Supported data layouts: same as @p input.
- * @param[in] deconv_info Contains padding and policies to be used in the deconvolution, this is described in @ref PadStrideInfo. This kernel supports only stride_x = weights.width && stride_y = weights.height. Moreover, padding is not supported.
- *
- * @return a Status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const ITensorInfo *input_info, const ITensorInfo *weights_info, const PadStrideInfo &deconv_info);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- bool _add_bias;
- const ICLTensor *_bias;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLDECONVOLUTIONLAYERRESHAPEOUTPUTKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H
-#define ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-/** Interface for the depth concatenate kernel.
- * The input tensor will be concatenated into the output tensor.
- */
-class CLDepthConcatenateLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLDepthConcatenateLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDepthConcatenateLayerKernel(const CLDepthConcatenateLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDepthConcatenateLayerKernel &operator=(const CLDepthConcatenateLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLDepthConcatenateLayerKernel(CLDepthConcatenateLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLDepthConcatenateLayerKernel &operator=(CLDepthConcatenateLayerKernel &&) = default;
- /** Default destructor */
- ~CLDepthConcatenateLayerKernel() = default;
- /** Initialise the kernel's inputs and output
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] depth_offset The offset on the Z axis.
- * @param[in,out] output Output tensor. Data types supported: Same as @p input.
- *
- * @note: The output tensor's low two dimensions can't be smaller than the input one's.
- * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2.
- *
- */
- void configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int depth_offset, ITensorInfo *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLDepthConcatenateLayerKernel
- *
- * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
- * @param[in] depth_offset The offset on the Z axis.
- * @param[in] output Output tensor info. Data types supported: Same as @p input.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, unsigned int depth_offset, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
-
-private:
- unsigned int _depth_offset;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLDEPTHCONVERTKERNEL_H
-#define ARM_COMPUTE_CLDEPTHCONVERTKERNEL_H
-
-#include "arm_compute/core/CL/ICLSimple3DKernel.h"
-#include "arm_compute/core/Types.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the depth conversion kernel. */
-class CLDepthConvertLayerKernel : public ICLSimple3DKernel
-{
-public:
- /** Set the input and output of the kernel.
- *
- * Valid conversions Input -> Output :
- *
- * - QSYMM8_PER_CHANNEL -> QASYMM8 (ATTENTION: it is the user's responsibility to keep track of the quantization info in the TensorInfo meta-data)
- * - U8 -> S8, U16, S16, U32, S32, F16, F32
- * - U16 -> U8, S8, S16, U32, S32, F16, F32
- * - S16 -> U8, S8, U16, U32, S32, F16, F32
- * - U32 -> U8, S8, U16, S16, S32, F16, F32
- * - S32 -> U8, S8, U16, S16, U32, F16, F32
- * - F16 -> U8, S8, U16, S16, U32, F32
- * - F32 -> U8, S8, U16, S16, U32, F16
- *
- * @param[in] input The input tensor to convert. Data types supported: U8/S8/QSYMM8_PER_CHANNEL/U16/S16/U32/S32/F16/F32.
- * @param[out] output The output tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
- * @param[in] policy Conversion policy
- * @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8.
- */
- void configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift);
- /** Set the input and output of the kernel.
- *
- * Valid conversions Input -> Output :
- *
- * - QSYMM8_PER_CHANNEL -> QASYMM8 (ATTENTION: it is the user's responsibility to keep track of the quantization info in the TensorInfo meta-data)
- * - U8 -> S8, U16, S16, U32, S32, F16, F32
- * - U16 -> U8, S8, S16, U32, S32, F16, F32
- * - S16 -> U8, S8, U16, U32, S32, F16, F32
- * - U32 -> U8, S8, U16, S16, S32, F16, F32
- * - S32 -> U8, S8, U16, S16, U32, F16, F32
- * - F16 -> U8, S8, U16, S16, U32, F32
- * - F32 -> U8, S8, U16, S16, U32, F16
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input The input tensor to convert. Data types supported: U8/S8/QSYMM8_PER_CHANNEL/U16/S16/U32/S32/F16/F32.
- * @param[out] output The output tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
- * @param[in] policy Conversion policy
- * @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift);
- /** Static function to check if given info will lead to a valid configuration of @ref CLDepthConvertLayerKernel
- *
- * @param[in] input Source tensor info. Data types supported: U8/S8/QSYMM8_PER_CHANNEL/U16/S16/U32/S32/F16/F32.
- * @param[in] output Destination tensor info. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
- * @param[in] policy Conversion policy
- * @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift);
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLDEPTHCONVERTKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2019-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLDEPTHTOSPACELAYERKERNEL_H
-#define ARM_COMPUTE_CLDEPTHTOSPACELAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the depth to space kernel */
-class CLDepthToSpaceLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLDepthToSpaceLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDepthToSpaceLayerKernel(const CLDepthToSpaceLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDepthToSpaceLayerKernel &operator=(const CLDepthToSpaceLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLDepthToSpaceLayerKernel(CLDepthToSpaceLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLDepthToSpaceLayerKernel &operator=(CLDepthToSpaceLayerKernel &&) = default;
- /** Default destructor */
- ~CLDepthToSpaceLayerKernel() = default;
- /** Initialise the kernel's inputs and output.
- *
- * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
- * @param[out] output Tensor output. Data types supported: same as @p input
- * @param[in] block_shape Block shape value.
- */
- void configure(const ICLTensor *input, ICLTensor *output, int32_t block_shape);
- /** Initialise the kernel's inputs and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
- * @param[out] output Tensor output. Data types supported: same as @p input
- * @param[in] block_shape Block shape value.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t block_shape);
- /** Static function to check if given info will lead to a valid configuration of @ref CLDepthToSpaceLayerKernel.
- *
- * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All.
- * @param[in] output Tensor output info. Data types supported: same as @p input
- * @param[in] block_shape Block shape value.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input; /**< Source tensor */
- ICLTensor *_output; /**< Destination tensor */
- int32_t _block_shape; /**< Block shape */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLDEPTHTOSPACELAYERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLDEPTHWISECONVOLUTIONNCHWKERNEL3x3_H
-#define ARM_COMPUTE_CLDEPTHWISECONVOLUTIONNCHWKERNEL3x3_H
-
-#include "arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the kernel to run a 3x3 depthwise convolution on a tensor when the data layout is NCHW.
- */
-class CLDepthwiseConvolutionLayer3x3NCHWKernel : public ICLDepthwiseConvolutionLayer3x3Kernel
-{
-public:
- /** Default constructor */
- CLDepthwiseConvolutionLayer3x3NCHWKernel();
- /** Initialize the function's source, destination, conv and border_size.
- *
- * @param[in] input Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] weights Weights tensor. A 3D tensor with dimensions [3, 3, IFM].
- * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
- * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
- * @param[out] output Destination tensor. Data type supported: Same as @p input.
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for QASYMM8 supported.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization,
- * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
- * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization,
- * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
- */
- void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
- unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U),
- const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) override;
- /** Initialize the function's source, destination, conv and border_size.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] weights Weights tensor. A 3D tensor with dimensions [3, 3, IFM].
- * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
- * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
- * @param[out] output Destination tensor. Data type supported: Same as @p input.
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for QASYMM8 supported.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization,
- * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
- * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization,
- * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
- unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U),
- const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) override;
- /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3NCHWKernel
- *
- * @param[in] input Source tensor info. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] weights Weights tensor info. A 3D tensor with dimensions [3, 3, IFM].
- * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
- * @param[in] biases Biases tensor info. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
- * @param[in] output Destination tensor. Data type supported: Same as @p input.
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported.
- * @param[in] gpu_target (Optional) GPU target to validate the kernel for. Defaults to midgard.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- * @param[in] output_multipliers (Optional) Output multipliers tensor info for quantized computations. In case of per-channel quantization,
- * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
- * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization,
- * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
- unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), GPUTarget gpu_target = GPUTarget::MIDGARD,
- const Size2D &dilation = Size2D(1U, 1U), const ITensorInfo *output_multipliers = nullptr, const ITensorInfo *output_shifts = nullptr);
-
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-private:
- unsigned int _conv_stride_x;
- unsigned int _conv_pad_top;
- unsigned int _conv_pad_left;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLDEPTHWISECONVOLUTIONNCHWKERNEL3x3_H */
+++ /dev/null
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLDEPTHWISECONVOLUTIONNHWCKERNEL3x3_H
-#define ARM_COMPUTE_CLDEPTHWISECONVOLUTIONNHWCKERNEL3x3_H
-
-#include "arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the kernel to run a 3x3 depthwise convolution on a tensor when the data layout is NHWC.
- */
-class CLDepthwiseConvolutionLayer3x3NHWCKernel : public ICLDepthwiseConvolutionLayer3x3Kernel
-{
-public:
- /** Default constructor */
- CLDepthwiseConvolutionLayer3x3NHWCKernel();
- /** Default move assignment operator. */
- /** Initialize the function's source, destination, conv and border_size.
- *
- * @param[in] input Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] weights Weights tensor. A 3D tensor with dimensions [IFM, 3, 3].
- * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
- * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
- * @param[out] output Destination tensor. Data type supported: Same as @p input.
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization,
- * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
- * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization,
- * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
- */
- void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
- unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U),
- const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) override;
- /** Initialize the function's source, destination, conv and border_size.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] weights Weights tensor. A 3D tensor with dimensions [IFM, 3, 3].
- * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
- * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
- * @param[out] output Destination tensor. Data type supported: Same as @p input.
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization,
- * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
- * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization,
- * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
- unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U),
- const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) override;
- /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3NHWCKernel
- *
- * @param[in] input Source tensor info. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] weights Weights tensor info. A 3D tensor with dimensions [IFM, 3, 3].
- * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
- * @param[in] biases Biases tensor info. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
- * @param[in] output Destination tensor info. Data type supported: Same as @p input.
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- * @param[in] output_multipliers (Optional) Output multipliers tensor info for quantized computations. In case of per-channel quantization,
- * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
- * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization,
- * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
- unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U),
- const ITensorInfo *output_multipliers = nullptr, const ITensorInfo *output_shifts = nullptr);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-private:
- unsigned int _num_planes_processed_per_iteration;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLDEPTHWISECONVOLUTIONNHWCKERNEL3x3_H */
+++ /dev/null
-/*
- * Copyright (c) 2019-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H
-#define ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-#include "arm_compute/core/KernelDescriptors.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the kernel to run a MxN depthwise convolution. M and N are respectively the rows and columns of the filter
- This kernel assumes that tensor for the weights is NOT reshaped (Native version) */
-class CLDepthwiseConvolutionLayerNativeKernel : public ICLKernel
-{
-public:
- /** Default Constructor */
- CLDepthwiseConvolutionLayerNativeKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDepthwiseConvolutionLayerNativeKernel(const CLDepthwiseConvolutionLayerNativeKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDepthwiseConvolutionLayerNativeKernel &operator=(const CLDepthwiseConvolutionLayerNativeKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLDepthwiseConvolutionLayerNativeKernel(CLDepthwiseConvolutionLayerNativeKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLDepthwiseConvolutionLayerNativeKernel &operator=(CLDepthwiseConvolutionLayerNativeKernel &&) = default;
- /** Initialize the function's source, destination and parameters
- *
- * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/FP32/FP16. Data layout supported: NHWC
- * @param[in] weights Weights tensor. A 3D tensor with dimensions [IFM, N, M].
- * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
- * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
- * @param[out] output Destination tensor. Data type supported: Same as @p input.
- * @param[in] dwc_weights_info Depthwise convolution layer weights info to retrieve the number of output elements processed by each thread
- * @param[in] dwc_info Depthwise convolution layer info
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization,
- * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
- * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization,
- * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
- */
- void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const DWCWeightsKernelInfo &dwc_weights_info,
- const DWCKernelInfo &dwc_info, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, const Size2D &dilation = Size2D(1U, 1U),
- const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr);
- /** Initialize the function's source, destination and parameters
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/FP32/FP16. Data layout supported: NHWC
- * @param[in] weights Weights tensor. A 3D tensor with dimensions [IFM, N, M].
- * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
- * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
- * @param[out] output Destination tensor. Data type supported: Same as @p input.
- * @param[in] dwc_weights_info Depthwise convolution layer weights info to retrieve the number of output elements processed by each thread
- * @param[in] dwc_info Depthwise convolution layer info
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization,
- * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
- * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization,
- * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const DWCWeightsKernelInfo &dwc_weights_info,
- const DWCKernelInfo &dwc_info, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, const Size2D &dilation = Size2D(1U, 1U),
- const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr);
- /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayerNativeKernel
- *
- * @param[in] input Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/FP32/FP16. Data layout supported: NHWC
- * @param[in] weights Weights tensor info. A 3D tensor with dimensions [IFM, N, M].
- * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
- * @param[in] biases Biases tensor info. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
- * @param[in] output Destination tensor info. Data type supported: Same as @p input.
- * @param[in] dwc_weights_info Depthwise convolution layer weights info to retrieve the number of output elements processed by each thread
- * @param[in] dwc_info Depthwise convolution layer info
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization,
- * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
- * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization,
- * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const DWCWeightsKernelInfo &dwc_weights_info,
- const DWCKernelInfo &dwc_info, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, const Size2D &dilation = Size2D(1U, 1U),
- const ITensorInfo *output_multipliers = nullptr, const ITensorInfo *output_shifts = nullptr);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- const ICLTensor *_weights;
- const ICLTensor *_biases;
- ICLTensor *_output;
- unsigned int _depth_multiplier;
- const ICLTensor *_output_multipliers;
- const ICLTensor *_output_shifts;
- bool _is_quantized;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2019-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERRESHAPEWEIGHTSKERNEL_H
-#define ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERRESHAPEWEIGHTSKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the kernel to reshape the weights of depthwise convolution. */
-class CLDepthwiseConvolutionLayerReshapeWeightsKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLDepthwiseConvolutionLayerReshapeWeightsKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDepthwiseConvolutionLayerReshapeWeightsKernel(const CLDepthwiseConvolutionLayerReshapeWeightsKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDepthwiseConvolutionLayerReshapeWeightsKernel &operator=(const CLDepthwiseConvolutionLayerReshapeWeightsKernel &) = delete;
- /** Default Move Constructor. */
- CLDepthwiseConvolutionLayerReshapeWeightsKernel(CLDepthwiseConvolutionLayerReshapeWeightsKernel &&) = default;
- /** Default move assignment operator */
- CLDepthwiseConvolutionLayerReshapeWeightsKernel &operator=(CLDepthwiseConvolutionLayerReshapeWeightsKernel &&) = default;
-
- /** Initialize the function's source and destination.
- *
- * @param[in] input The input tensor of dimension [IFM, W, H]. Data types supported: All. Data layouts supported: NHWC
- * @param[out] output The output tensor of dimension [W*H*C0, ceil(IFM/C0)]. C0 is the number of channels read by each thread. Data types supported: same as @p weights.
- * @param[in] info Depthwise convolution information to reshape the input tensor.
- */
- void configure(const ICLTensor *input, ICLTensor *output, const DepthwiseConvolutionReshapeInfo &info);
- /** Initialize the function's source and destination.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input The input tensor of dimension [IFM, W, H]. Data types supported: All. Data layouts supported: NHWC
- * @param[out] output The output tensor of dimension [W*H*C0, ceil(IFM/C0)]. C0 is the number of channels read by each thread. Data types supported: same as @p weights.
- * @param[in] info Depthwise convolution information to reshape the input tensor.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const DepthwiseConvolutionReshapeInfo &info);
-
- /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3NHWCKernel
- *
- * @param[in] input The input tensor info of dimension [IFM, W, H]. Data types supported: All. Data layouts supported: NHWC
- * @param[in] output The output tensor info of dimension [W*H*C0, ceil(IFM/C0)]. C0 is the number of channels read by each thread. Data types supported: same as @p weights.
- * @param[in] info Depthwise convolution information to reshape the input tensor.
- *
- * @return a Status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const DepthwiseConvolutionReshapeInfo &info);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
-
- void configure_dot_product(const DepthwiseConvolutionReshapeInfo &info);
- void configure_generic(const DepthwiseConvolutionReshapeInfo &info);
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERRESHAPEWEIGHTSKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLDEQUANTIZATIONLAYERKERNEL_H
-#define ARM_COMPUTE_CLDEQUANTIZATIONLAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the dequantization layer kernel. */
-class CLDequantizationLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLDequantizationLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDequantizationLayerKernel(const CLDequantizationLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDequantizationLayerKernel &operator=(const CLDequantizationLayerKernel &) = delete;
- /** Default Move Constructor. */
- CLDequantizationLayerKernel(CLDequantizationLayerKernel &&) = default;
- /** Default move assignment operator */
- CLDequantizationLayerKernel &operator=(CLDequantizationLayerKernel &&) = default;
- /** Default destructor */
- ~CLDequantizationLayerKernel() = default;
- /** Set the input, output, min and max.
- *
- * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
- * @param[out] output Destination tensor. Data types supported: F16/F32.
- */
- void configure(const ICLTensor *input, ICLTensor *output);
- /** Set the input, output, min and max.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
- * @param[out] output Destination tensor. Data types supported: F16/F32.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLDequantizationLayerKernel
- *
- * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
- * @param[in] output Output tensor info. Data types supported: F16/F32.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLDEQUANTIZATIONLAYERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLDERIVATIVEKERNEL_H
-#define ARM_COMPUTE_CLDERIVATIVEKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the derivative kernel. */
-class CLDerivativeKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLDerivativeKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDerivativeKernel(const CLDerivativeKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDerivativeKernel &operator=(const CLDerivativeKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLDerivativeKernel(CLDerivativeKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLDerivativeKernel &operator=(CLDerivativeKernel &&) = default;
- /** Default destructor */
- ~CLDerivativeKernel() = default;
- /** Initialise the kernel's sources, destination and border
- *
- * @note At least one of output_x or output_y must be set
- *
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16.
- * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
- /** Initialise the kernel's sources, destination and border
- *
- * @note At least one of output_x or output_y must be set
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16.
- * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-private:
- const ICLTensor *_input; /**< Input tensor */
- ICLTensor *_output_x; /**< Output tensor - Derivate along the X direction */
- ICLTensor *_output_y; /**< Output tensor - Derivate along the Y direction */
- bool _run_derivative_x; /**< Do we need to run Derivative X ? */
- bool _run_derivative_y; /**< Do we need to run Derivative Y ? */
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLDERIVATIVEKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLDILATEKERNEL_H
-#define ARM_COMPUTE_CLDILATEKERNEL_H
-
-#include "arm_compute/core/CL/ICLSimple2DKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the dilate kernel.
- *
- */
-class CLDilateKernel : public ICLSimple2DKernel
-{
-public:
- /**Initialise the kernel's input and output.
- *
- * @param[in] input An input tensor. Data types supported: U8
- * @param[out] output The output tensor. Data types supported: U8.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined);
- /**Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input An input tensor. Data types supported: U8
- * @param[out] output The output tensor. Data types supported: U8.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined);
-
- // Inherited methods overridden:
- BorderSize border_size() const override;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLDILATEKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLDIRECTCONVOLUTIONLAYERKERNEL_H
-#define ARM_COMPUTE_CLDIRECTCONVOLUTIONLAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the direct convolution kernel.
- */
-class CLDirectConvolutionLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLDirectConvolutionLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDirectConvolutionLayerKernel(const CLDirectConvolutionLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLDirectConvolutionLayerKernel &operator=(const CLDirectConvolutionLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLDirectConvolutionLayerKernel(CLDirectConvolutionLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLDirectConvolutionLayerKernel &operator=(CLDirectConvolutionLayerKernel &&) = default;
- /** Default destructor */
- ~CLDirectConvolutionLayerKernel() = default;
- /** Set the input, weights, biases and output tensors.
- *
- * @note: DirectConvolution only works in the following configurations:
- * 1x1 convolution with stride_x = 1/2/3, stride_y = 1/2/3
- * 3x3 convolution with stride_x = 1/2, stride_y = 1/2
- * 5x5 convolution with stride_x = 1/2, stride_y = 1/2
- * 9x9 convolution with stride_x = 1/2, stride_y = 1/2
- *
- * @param[in] input The input tensor to convolve. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
- * The 3rd dimension must be the same as the input's volume 3rd dimension.
- * Data type supported:Same as @p input.
- * @param[in] biases Biases tensor. Biases are 1D tensor with dimension [OFM].
- * Data type supported: Should match @p input data type, except for input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type
- * @param[out] output Output tensor.
- * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: Same as @p input.
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- */
- void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info);
- /** Set the input, weights, biases and output tensors.
- *
- * @note: DirectConvolution only works in the following configurations:
- * 1x1 convolution with stride_x = 1/2/3, stride_y = 1/2/3
- * 3x3 convolution with stride_x = 1/2, stride_y = 1/2
- * 5x5 convolution with stride_x = 1/2, stride_y = 1/2
- * 9x9 convolution with stride_x = 1/2, stride_y = 1/2
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input The input tensor to convolve. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
- * The 3rd dimension must be the same as the input's volume 3rd dimension.
- * Data type supported:Same as @p input.
- * @param[in] biases Biases tensor. Biases are 1D tensor with dimension [OFM].
- * Data type supported: Should match @p input data type, except for input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type
- * @param[out] output Output tensor.
- * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: Same as @p input.
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info);
- /** Static function to check if given info will lead to a valid configuration of @ref CLDirectConvolutionLayerKernel
- *
- * @param[in] input The input tensor to convolve. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
- * The 3rd dimension must be the same as the input's volume 3rd dimension.
- * Data type supported:Same as @p input.
- * @param[in] biases Biases tensor. Biases are 1D tensor with dimension [OFM].
- * Data type supported: Should match @p input data type, except for input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type.
- * @param[in] output Output tensor.
- * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: Same as @p input.
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- * @param[in] target Target GPU architecture.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, const GPUTarget target);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-public:
- const ICLTensor *_input;
- const ICLTensor *_biases;
- const ICLTensor *_weights;
- ICLTensor *_output;
- DataLayout _data_layout;
- BorderSize _border_size;
- int _conv_stride_x;
- int _conv_stride_y;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLDIRECTCONVOLUTIONLAYERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLELEMENTWISEUNARYLAYERKERNEL_H
-#define ARM_COMPUTE_CLELEMENTWISEUNARYLAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/CL/ICLSimpleKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-/** Interface for the elementwise unary operator */
-class CLElementWiseUnaryLayerKernel : public ICLKernel
-{
-public:
- /** Initialise the kernel's inputs, output.
- *
- * @param[in] input First tensor input info. Data types supported: F16/F32.
- * @param[out] output Output tensor info. Data types supported: Same as @p input.
- * @param[in] op Element wise unary operation to perform.
- */
- void configure(const ITensorInfo *input, ITensorInfo *output, const ElementWiseUnary &op);
- /** Initialise the kernel's inputs, output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input First tensor input info. Data types supported: F16/F32.
- * @param[out] output Output tensor info. Data types supported: Same as @p input.
- * @param[in] op Element wise unary operation to perform.
- */
- void configure(const CLCompileContext &compile_context, const ITensorInfo *input, ITensorInfo *output, const ElementWiseUnary &op);
- /** Static function to check if given info will lead to a valid configuration of @ref CLElementWiseUnaryLayerKernel
- *
- * @param[in] input First tensor input info. Data types supported: F16/F32.
- * @param[in] output Output tensor info. Data types supported: Same as @p input.
- * @param[in] op Element wise unary operation to perform.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ElementWiseUnary &op);
-
- // Inherited methods overridden:
- void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLELEMENTWISEUNARYLAYERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLELEMENTWISEOPERATIONKERNEL_H
-#define ARM_COMPUTE_CLELEMENTWISEOPERATIONKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for an element-wise operation kernel
- *
- * Element-wise operation is computed by:
- * @f[ output(x,y) = OP(input1(x,y), input2(x,y))@f]
- *
- */
-class CLElementwiseOperationKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLElementwiseOperationKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLElementwiseOperationKernel(const CLElementwiseOperationKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLElementwiseOperationKernel &operator=(const CLElementwiseOperationKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLElementwiseOperationKernel(CLElementwiseOperationKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLElementwiseOperationKernel &operator=(CLElementwiseOperationKernel &&) = default;
- /** Default destructor */
- ~CLElementwiseOperationKernel() = default;
-
- // Inherited methods overridden:
- void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
-
-protected:
- /** The name of the operation */
- virtual std::string name() = 0;
-
- /** Initialise the kernel's output.
- *
- * @param[in] input1 First tensor input info. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/F16/U32/S32/F32.
- * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
- * @param[in] output Output tensor info. Data types supported: Same as @p input1.
- *
- * @return a pair of Status and Window
- */
- virtual std::pair<Status, Window> validate_and_configure_window(ITensorInfo &input1, ITensorInfo &input2, ITensorInfo &output) = 0;
-
- /** Generate the build options for the specific kernel
- *
- * @reutrn a CLBuildOptions struct
- */
- virtual CLBuildOptions generate_build_options(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output) = 0;
-
- /** Generate the identifier for tuning
- *
- * @reutrn a string
- */
- virtual std::string generate_id_for_tuning(const std::string &kernel_name, const ITensorInfo &input1, const ITensorInfo &output) = 0;
-
- /** Commmon configure function for element-wise operators with no additional options (e.g., Div, Min, Max, SquaredDiff)
- *
- */
- void configure_common(ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output);
- /** Commmon configure function for element-wise operators with no additional options (e.g., Div, Min, Max, SquaredDiff)
- *
- */
- void configure_common(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output);
-
- ActivationLayerInfo _act_info;
-
-private:
- const ITensorInfo *_input1; /**< Source tensor info 1 */
- const ITensorInfo *_input2; /**< Source tensor info 2 */
- ITensorInfo *_output; /**< Destination tensor info */
-};
-
-/** Addition operation */
-class CLSaturatedArithmeticOperationKernel : public CLElementwiseOperationKernel
-{
-public:
- CLSaturatedArithmeticOperationKernel()
- : CLElementwiseOperationKernel(), _policy(), _op()
- {
- }
-
- /** Static function to check if given info will lead to a valid configuration of @ref CLSaturatedArithmeticOperationKernel
- *
- * @param[in] op Arithmetic operation to be executed.
- * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32.
- * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
- * @param[in] output Output tensor info. Data types supported: Same as @p input1.
- * @param[in] policy Policy to use to handle overflow.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- */
- void configure(ArithmeticOperation op, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, const ConvertPolicy &policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
- /** Static function to check if given info will lead to a valid configuration of @ref CLSaturatedArithmeticOperationKernel
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] op Arithmetic operation to be executed.
- * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32.
- * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
- * @param[in] output Output tensor info. Data types supported: Same as @p input1.
- * @param[in] policy Policy to use to handle overflow.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- */
- void configure(const CLCompileContext &compile_context, ArithmeticOperation op, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, const ConvertPolicy &policy,
- const ActivationLayerInfo &act_info = ActivationLayerInfo());
-
- /** Static function to check if given info will lead to a valid configuration of @ref CLSaturatedArithmeticOperationKernel
- *
- * @param[in] op Arithmetic operation to be executed.
- * @param[in] input1 First tensor input info info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32.
- * @param[in] input2 Second tensor input info info. Data types supported: Same as @p input1.
- * @param[in] output Output tensor info info. Data types supported: Same as @p input1.
- * @param[in] policy Policy to use to handle overflow.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- *
- * @return a Status
- */
- static Status validate(ArithmeticOperation op, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ConvertPolicy &policy,
- const ActivationLayerInfo &act_info = ActivationLayerInfo());
-
-protected:
- // Inherited methods overridden:
- std::string name() override;
- std::pair<Status, Window> validate_and_configure_window(ITensorInfo &input1, ITensorInfo &input2, ITensorInfo &output) override;
- CLBuildOptions generate_build_options(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output) override;
- std::string generate_id_for_tuning(const std::string &kernel_name, const ITensorInfo &input1, const ITensorInfo &output) override;
-
-private:
- ConvertPolicy _policy;
- ArithmeticOperation _op;
-};
-
-class CLArithmeticOperationKernel : public CLElementwiseOperationKernel
-{
-public:
- CLArithmeticOperationKernel()
- : CLElementwiseOperationKernel(), _op()
- {
- }
-
- /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel
- *
- * @param[in] op Arithmetic operation to be executed.
- * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32.
- * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
- * @param[in] output Output tensor info. Data types supported: Same as @p input1.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- */
- void configure(ArithmeticOperation op, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
- /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] op Arithmetic operation to be executed.
- * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32.
- * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
- * @param[in] output Output tensor info. Data types supported: Same as @p input1.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- */
- void configure(const CLCompileContext &compile_context, ArithmeticOperation op, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output,
- const ActivationLayerInfo &act_info = ActivationLayerInfo());
-
- /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel
- *
- * @param[in] op Arithmetic operation to be executed.
- * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32.
- * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
- * @param[in] output Output tensor info. Data types supported: Same as @p input1.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- *
- * @return a Status
- */
- static Status validate(ArithmeticOperation op, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
-
-protected:
- // Inherited methods overridden:
- std::string name() override;
- std::pair<Status, Window> validate_and_configure_window(ITensorInfo &input1, ITensorInfo &input2, ITensorInfo &output) override;
- CLBuildOptions generate_build_options(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output) override;
- std::string generate_id_for_tuning(const std::string &kernel_name, const ITensorInfo &input1, const ITensorInfo &output) override;
-
-private:
- ArithmeticOperation _op;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLELEMENTWISEOPERATIONKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLERODEKERNEL_H
-#define ARM_COMPUTE_CLERODEKERNEL_H
-
-#include "arm_compute/core/CL/ICLSimple2DKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the erode kernel.
- *
- */
-class CLErodeKernel : public ICLSimple2DKernel
-{
-public:
- /**Initialise the kernel's input and output.
- *
- * @param[in] input An input tensor. Data types supported: U8
- * @param[out] output The output tensor. Data types supported: U8.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined);
- /**Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input An input tensor. Data types supported: U8
- * @param[out] output The output tensor. Data types supported: U8.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined);
-
- // Inherited methods overridden:
- BorderSize border_size() const override;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLERODEKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2019-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLFFTDIGITREVERSEKERNEL_H
-#define ARM_COMPUTE_CLFFTDIGITREVERSEKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-#include "arm_compute/core/KernelDescriptors.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ICLTensor;
-
-/** Interface for the digit reverse operation kernel. */
-class CLFFTDigitReverseKernel : public ICLKernel
-{
-public:
- /** Constructor */
- CLFFTDigitReverseKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLFFTDigitReverseKernel(const CLFFTDigitReverseKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLFFTDigitReverseKernel &operator=(const CLFFTDigitReverseKernel &) = delete;
- /** Default Move Constructor. */
- CLFFTDigitReverseKernel(CLFFTDigitReverseKernel &&) = default;
- /** Default move assignment operator */
- CLFFTDigitReverseKernel &operator=(CLFFTDigitReverseKernel &&) = default;
- /** Default destructor */
- ~CLFFTDigitReverseKernel() = default;
- /** Set the input and output tensors.
- *
- * @param[in] input Source tensor. Data types supported: F32.
- * @param[out] output Destination tensor. Data type supported: same as @p input
- * @param[in] idx Digit reverse index tensor. Data type supported: U32
- * @param[in] config Kernel configuration.
- */
- void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *idx, const FFTDigitReverseKernelInfo &config);
- /** Set the input and output tensors.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: F32.
- * @param[out] output Destination tensor. Data type supported: same as @p input
- * @param[in] idx Digit reverse index tensor. Data type supported: U32
- * @param[in] config Kernel configuration.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *idx, const FFTDigitReverseKernelInfo &config);
- /** Static function to check if given info will lead to a valid configuration of @ref CLFFTDigitReverseKernel
- *
- * @param[in] input Source tensor info. Data types supported: F32.
- * @param[in] output Destination tensor info. Data type supported: same as @p input
- * @param[in] idx Digit reverse index tensor info. Data type supported: U32
- * @param[in] config Kernel configuration.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *idx, const FFTDigitReverseKernelInfo &config);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
- const ICLTensor *_idx;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLFFTDIGITREVERSEKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2019-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLFFTRADIXSTAGEKERNEL_H
-#define ARM_COMPUTE_CLFFTRADIXSTAGEKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-#include "arm_compute/core/KernelDescriptors.h"
-
-#include <set>
-
-namespace arm_compute
-{
-// Forward declarations
-class ICLTensor;
-
-/** Interface for the FFT radix stage kernel. */
-class CLFFTRadixStageKernel : public ICLKernel
-{
-public:
- /** Constructor */
- CLFFTRadixStageKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLFFTRadixStageKernel(const CLFFTRadixStageKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLFFTRadixStageKernel &operator=(const CLFFTRadixStageKernel &) = delete;
- /** Default Move Constructor. */
- CLFFTRadixStageKernel(CLFFTRadixStageKernel &&) = default;
- /** Default move assignment operator */
- CLFFTRadixStageKernel &operator=(CLFFTRadixStageKernel &&) = default;
- /** Default destructor */
- ~CLFFTRadixStageKernel() = default;
- /** Set the input and output tensors.
- *
- * @note If the output tensor is nullptr, the FFT will be performed in-place
- *
- * @param[in,out] input Source tensor. Data types supported: F32.
- * @param[out] output Destination tensor. Can be nullptr. Data type supported: same as @p input
- * @param[in] config FFT descriptor metadata.
- */
- void configure(ICLTensor *input, ICLTensor *output, const FFTRadixStageKernelInfo &config);
- /** Set the input and output tensors.
- *
- * @note If the output tensor is nullptr, the FFT will be performed in-place
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in,out] input Source tensor. Data types supported: F32.
- * @param[out] output Destination tensor. Can be nullptr. Data type supported: same as @p input
- * @param[in] config FFT descriptor metadata.
- */
- void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const FFTRadixStageKernelInfo &config);
- /** Static function to check if given info will lead to a valid configuration of @ref CLFFTRadixStageKernel
- *
- * @param[in] input Source tensor info. Data types supported: F32.
- * @param[in] output Destination tensor info. Can be nullptr. Data type supported: same as @p input
- * @param[in] config FFT descriptor metadata.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const FFTRadixStageKernelInfo &config);
- /** Returns the radix that are support by the FFT kernel
- *
- * @return A set of supported radix
- */
- static std::set<unsigned int> supported_radix();
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- ICLTensor *_input;
- ICLTensor *_output;
- bool _run_in_place;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLFFTRADIXSTAGEKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2019-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLFFTSCALEKERNEL_H
-#define ARM_COMPUTE_CLFFTSCALEKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-#include "arm_compute/core/KernelDescriptors.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ICLTensor;
-
-/** Interface for the inverse fft scale kernel. */
-class CLFFTScaleKernel : public ICLKernel
-{
-public:
- /** Constructor */
- CLFFTScaleKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLFFTScaleKernel(const CLFFTScaleKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLFFTScaleKernel &operator=(const CLFFTScaleKernel &) = delete;
- /** Default Move Constructor. */
- CLFFTScaleKernel(CLFFTScaleKernel &&) = default;
- /** Default move assignment operator */
- CLFFTScaleKernel &operator=(CLFFTScaleKernel &&) = default;
- /** Default destructor */
- ~CLFFTScaleKernel() = default;
- /** Set the input and output tensors.
- *
- * @param[in,out] input Source tensor. Data types supported: F32.
- * @param[out] output Destination tensor. Data type supported: same as @p input
- * @param[in] config Kernel configuration
- */
- void configure(ICLTensor *input, ICLTensor *output, const FFTScaleKernelInfo &config);
- /** Set the input and output tensors.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in,out] input Source tensor. Data types supported: F32.
- * @param[out] output Destination tensor. Data type supported: same as @p input
- * @param[in] config Kernel configuration
- */
- void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const FFTScaleKernelInfo &config);
- /** Static function to check if given info will lead to a valid configuration of @ref CLFFTScaleKernel
- *
- * @param[in] input Source tensor info. Data types supported: F32.
- * @param[in] output Destination tensor info. Data type supported: same as @p input
- * @param[in] config Kernel configuration
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const FFTScaleKernelInfo &config);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- ICLTensor *_input;
- ICLTensor *_output;
- bool _run_in_place;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLFFTSCALEKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLFASTCORNERSKERNEL_H
-#define ARM_COMPUTE_CLFASTCORNERSKERNEL_H
-
-#include "arm_compute/core/CL/ICLArray.h"
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-#include <cstdint>
-
-namespace cl
-{
-class Buffer;
-}
-
-namespace arm_compute
-{
-class ICLTensor;
-using ICLImage = ICLTensor;
-
-/** CL kernel to perform fast corners */
-class CLFastCornersKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLFastCornersKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLFastCornersKernel(const CLFastCornersKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLFastCornersKernel &operator=(const CLFastCornersKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLFastCornersKernel(CLFastCornersKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLFastCornersKernel &operator=(CLFastCornersKernel &&) = default;
- /** Default destructor */
- ~CLFastCornersKernel() = default;
-
- /** Initialise the kernel.
- *
- * @param[in] input Source image. Data types supported: U8.
- * @param[out] output Output image. Data types supported: U8.
- * @param[in] threshold Threshold on difference between intensity of the central pixel and pixels on Bresenham's circle of radius 3.
- * @param[in] non_max_suppression True if non-maxima suppresion is applied, false otherwise.
- * @param[in] border_mode Strategy to use for borders.
- */
- void configure(const ICLImage *input, ICLImage *output, float threshold, bool non_max_suppression, BorderMode border_mode);
- /** Initialise the kernel.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source image. Data types supported: U8.
- * @param[out] output Output image. Data types supported: U8.
- * @param[in] threshold Threshold on difference between intensity of the central pixel and pixels on Bresenham's circle of radius 3.
- * @param[in] non_max_suppression True if non-maxima suppresion is applied, false otherwise.
- * @param[in] border_mode Strategy to use for borders.
- */
- void configure(const CLCompileContext &compile_context, const ICLImage *input, ICLImage *output, float threshold, bool non_max_suppression, BorderMode border_mode);
-
- // Inherited methods overridden
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-private:
- const ICLImage *_input;
- ICLImage *_output;
-};
-
-/** CL kernel to copy keypoints information to ICLKeyPointArray and counts the number of key points */
-class CLCopyToArrayKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLCopyToArrayKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLCopyToArrayKernel(const CLCopyToArrayKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLCopyToArrayKernel &operator=(const CLCopyToArrayKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLCopyToArrayKernel(CLCopyToArrayKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLCopyToArrayKernel &operator=(CLCopyToArrayKernel &&) = default;
- /** Default destructor */
- ~CLCopyToArrayKernel() = default;
-
- /** Initialise the kernel.
- *
- * @param[in] input Source image. Data types supported: U8.
- * @param[in] update_number Flag to indicate whether we need to update the number of corners
- * @param[out] corners Array of keypoints to store the results.
- * @param[out] num_buffers Number of keypoints to store the results.
- */
- void configure(const ICLImage *input, bool update_number, ICLKeyPointArray *corners, cl::Buffer *num_buffers);
- /** Initialise the kernel.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source image. Data types supported: U8.
- * @param[in] update_number Flag to indicate whether we need to update the number of corners
- * @param[out] corners Array of keypoints to store the results.
- * @param[out] num_buffers Number of keypoints to store the results.
- */
- void configure(const CLCompileContext &compile_context, const ICLImage *input, bool update_number, ICLKeyPointArray *corners, cl::Buffer *num_buffers);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLImage *_input; /**< source image */
- ICLKeyPointArray *_corners; /**< destination array */
- cl::Buffer *_num_buffer; /**< CL memory to record number of key points in the array */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLFASTCORNERSKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLFILLBORDERKERNEL_H
-#define ARM_COMPUTE_CLFILLBORDERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/PixelValue.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for filling the border of a kernel */
-class CLFillBorderKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLFillBorderKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLFillBorderKernel(const CLFillBorderKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLFillBorderKernel &operator=(const CLFillBorderKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLFillBorderKernel(CLFillBorderKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLFillBorderKernel &operator=(CLFillBorderKernel &&) = default;
- /** Default destructor */
- ~CLFillBorderKernel() = default;
-
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in,out] tensor Tensor to process Data types supported: U8/QASYMM8/S8/QASYMM8_SIGNED/U16/S16/U32/S32/F16/F32.
- * @param[in] border_size Size of the border to fill in elements.
- * @param[in] border_mode Border mode to use for the convolution.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(const CLCompileContext &compile_context, ICLTensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue());
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in,out] tensor Tensor to process Data types supported: U8/QASYMM8/S8/QASYMM8_SIGNED/U16/S16/U32/S32/F16/F32.
- * @param[in] border_size Size of the border to fill in elements.
- * @param[in] border_mode Border mode to use for the convolution.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(ICLTensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue());
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in,out] tensor Tensor to process Data types supported: U8/QASYMM8/S8/QASYMM8_SIGNED/U16/S16/U32/S32/F16/F32.
- * @param[in] border_size Size of the border to fill in elements.
- * @param[in] border_mode Border mode to use for the convolution.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(const CLCompileContext &compile_context, ITensorInfo *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue());
-
- /** Function to set the constant value on fill border kernel depending on type.
- *
- * @param[in] idx Index of the kernel argument to set.
- * @param[in] constant_border_value Constant value to use for borders if border_mode is set to CONSTANT.
- */
- template <class T>
- void set_constant_border(unsigned int idx, const PixelValue &constant_border_value);
-
- // Inherited methods overridden:
- void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
- void run(const Window &window, cl::CommandQueue &queue) override;
- bool is_parallelisable() const override;
-
-private:
- ICLTensor *_tensor;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLFILLBORDERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLFLATTENLAYERKERNEL_H
-#define ARM_COMPUTE_CLFLATTENLAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL interface for the flatten kernel.*/
-class CLFlattenLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLFlattenLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLFlattenLayerKernel(const CLFlattenLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLFlattenLayerKernel &operator=(const CLFlattenLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLFlattenLayerKernel(CLFlattenLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLFlattenLayerKernel &operator=(CLFlattenLayerKernel &&) = default;
- /** Set the input and output of the kernel.
- *
- * @param[in] input First input tensor to flatten with at least 3 dimensions.
- * The dimensions above the third will be interpreted as batches. Data types supported: All.
- * @param[out] output Output tensor with shape [w*h*d, input_batches] where:
- * w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input
- */
- void configure(const ICLTensor *input, ICLTensor *output);
- /** Set the input and output of the kernel.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input First input tensor to flatten with at least 3 dimensions.
- * The dimensions above the third will be interpreted as batches. Data types supported: All.
- * @param[out] output Output tensor with shape [w*h*d, input_batches] where:
- * w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLFlattenLayerKernel
- *
- * @param[in] input First input tensor to flatten with at least 3 dimensions.
- * The dimensions above the third will be interpreted as batches. Data types supported: All.
- * @param[out] output Output tensor with shape [w*h*d, input_batches] where:
- * w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-public:
- const ICLTensor *_input;
- ICLTensor *_output;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLFLATTENLAYERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLFLOORKERNEL_H
-#define ARM_COMPUTE_CLFLOORKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to perform a floor operation */
-class CLFloorKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLFloorKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLFloorKernel(const CLFloorKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLFloorKernel &operator=(const CLFloorKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLFloorKernel(CLFloorKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLFloorKernel &operator=(CLFloorKernel &&) = default;
- /** Default destructor */
- ~CLFloorKernel() = default;
- /** Set the source, destination of the kernel
- *
- * @param[in] input Source tensor. Data type supported: F16/F32.
- * @param[out] output Destination tensor. Same as @p input
- */
- void configure(const ICLTensor *input, ICLTensor *output);
-
- /** Set the source, destination of the kernel
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data type supported: F16/F32.
- * @param[out] output Destination tensor. Same as @p input
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
-
- /** Static function to check if given info will lead to a valid configuration of @ref CLFloorKernel
- *
- * @param[in] input Source tensor info. Data type supported: F16/F32.
- * @param[in] output Destination tensor info. Same as @p input
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLFLOORKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLFUSEBATCHNORMALIZATIONKERNEL_H
-#define ARM_COMPUTE_CLFUSEBATCHNORMALIZATIONKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ICLTensor;
-
-/** OpenCL kernel to fuse the batch normalization node to a preceding convolution node */
-class CLFuseBatchNormalizationKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLFuseBatchNormalizationKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLFuseBatchNormalizationKernel(const CLFuseBatchNormalizationKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLFuseBatchNormalizationKernel &operator=(const CLFuseBatchNormalizationKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLFuseBatchNormalizationKernel(CLFuseBatchNormalizationKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLFuseBatchNormalizationKernel &operator=(CLFuseBatchNormalizationKernel &&) = default;
- /** Default destructor */
- ~CLFuseBatchNormalizationKernel() = default;
- /** Set the source, destination of the kernel
- *
- * @param[in] input_weights Input weights tensor for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC
- * @param[in] bn_mean Batch normalization layer mean tensor. Same as @p input_weights
- * @param[in] bn_var Batch normalization layer variance tensor. Same as @p input_weights
- * @param[out] fused_weights Output fused weights tensor. It can be a nullptr in case of in-place computation. Same as @p input_weights
- * @param[out] fused_bias Output fused bias tensor. It can be a nullptr in case of in-place computation and input_bias != nullptr. Same as @p input_weights
- * @param[in] input_bias (Optional) Input bias tensor for convolution or depthwise convolution layer. It can be a nullptr in case the bias tensor is not required. Same as @p input_weights
- * @param[in] bn_beta (Optional) Batch normalization layer beta tensor. It can be a nullptr in case the beta tensor is not required. Same as @p input_weights
- * @note if nullptr, bn_beta is set to 0.0
- * @param[in] bn_gamma (Optional) Batch normalization layer gamma tensor. It can be a nullptr in case the gamma tensor is not required. Same as @p input_weights
- * @note if nullptr, bn_gamma is set to 1.0
- * @param[in] epsilon (Optional) Batch normalization layer epsilon parameter. Defaults to 0.001f.
- * @param[in] fbn_type (Optional) Fused batch normalization type. Defaults to CONVOLUTION.
- */
- void configure(const ICLTensor *input_weights, const ICLTensor *bn_mean, const ICLTensor *bn_var, ICLTensor *fused_weights, ICLTensor *fused_bias,
- const ICLTensor *input_bias = nullptr, const ICLTensor *bn_beta = nullptr, const ICLTensor *bn_gamma = nullptr,
- float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION);
- /** Set the source, destination of the kernel
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input_weights Input weights tensor for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC
- * @param[in] bn_mean Batch normalization layer mean tensor. Same as @p input_weights
- * @param[in] bn_var Batch normalization layer variance tensor. Same as @p input_weights
- * @param[out] fused_weights Output fused weights tensor. It can be a nullptr in case of in-place computation. Same as @p input_weights
- * @param[out] fused_bias Output fused bias tensor. It can be a nullptr in case of in-place computation and input_bias != nullptr. Same as @p input_weights
- * @param[in] input_bias (Optional) Input bias tensor for convolution or depthwise convolution layer. It can be a nullptr in case the bias tensor is not required. Same as @p input_weights
- * @param[in] bn_beta (Optional) Batch normalization layer beta tensor. It can be a nullptr in case the beta tensor is not required. Same as @p input_weights
- * @note if nullptr, bn_beta is set to 0.0
- * @param[in] bn_gamma (Optional) Batch normalization layer gamma tensor. It can be a nullptr in case the gamma tensor is not required. Same as @p input_weights
- * @note if nullptr, bn_gamma is set to 1.0
- * @param[in] epsilon (Optional) Batch normalization layer epsilon parameter. Defaults to 0.001f.
- * @param[in] fbn_type (Optional) Fused batch normalization type. Defaults to CONVOLUTION.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input_weights, const ICLTensor *bn_mean, const ICLTensor *bn_var, ICLTensor *fused_weights, ICLTensor *fused_bias,
- const ICLTensor *input_bias = nullptr, const ICLTensor *bn_beta = nullptr, const ICLTensor *bn_gamma = nullptr,
- float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION);
- /** Static function to check if given info will lead to a valid configuration of @ref CLFuseBatchNormalizationKernel
- *
- * @param[in] input_weights Input weights tensor info for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC
- * @param[in] bn_mean Batch normalization layer mean tensor info. Same as @p input_weights
- * @param[in] bn_var Batch normalization layer variance tensor info. Same as @p input_weights
- * @param[in] fused_weights Output fused weights tensor info. It can be a nullptr in case of in-place computation. Same as @p input_weights
- * @param[in] fused_bias Output fused bias tensor info. It can be a nullptr in case of in-place computation and input_bias != nullptr. Same as @p input_weights
- * @param[in] input_bias (Optional) Input bias tensor info for convolution or depthwise convolution layer. It can be a nullptr in case the bias tensor is not required. Same as @p input_weights
- * @param[in] bn_beta (Optional) Batch normalization layer beta tensor info. It can be a nullptr in case the beta tensor is not required. Same as @p input_weights
- * @note if nullptr, bn_beta is set to 0.0
- * @param[in] bn_gamma (Optional) Batch normalization layer gamma tensor info. It can be a nullptr in case the gamma tensor is not required. Same as @p input_weights
- * @note if nullptr, bn_gamma is set to 1.0
- * @param[in] epsilon (Optional) Batch normalization layer epsilon parameter. Defaults to 0.001f.
- * @param[in] fbn_type (Optional) Fused batch normalization type. Defaults to CONVOLUTION.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input_weights, const ITensorInfo *bn_mean, const ITensorInfo *bn_var,
- const ITensorInfo *fused_weights, const ITensorInfo *fused_bias,
- const ITensorInfo *input_bias = nullptr, const ITensorInfo *bn_beta = nullptr, const ITensorInfo *bn_gamma = nullptr,
- float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input_weights;
- const ICLTensor *_input_bias;
- const ICLTensor *_bn_mean;
- const ICLTensor *_bn_var;
- const ICLTensor *_bn_gamma;
- const ICLTensor *_bn_beta;
- ICLTensor *_fused_weights;
- ICLTensor *_fused_bias;
- float _epsilon;
- bool _run_in_place_weights;
- bool _run_in_place_bias;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLFUSEBATCHNORMALIZATIONKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2019-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYNATIVEKERNEL_H
-#define ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYNATIVEKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to multiply matrices with QASYMM8/QASYMM8_SIGNED data type */
-class CLGEMMLowpMatrixMultiplyNativeKernel : public ICLKernel
-{
-public:
- /** Default Constructor */
- CLGEMMLowpMatrixMultiplyNativeKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMLowpMatrixMultiplyNativeKernel(const CLGEMMLowpMatrixMultiplyNativeKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMLowpMatrixMultiplyNativeKernel &operator=(const CLGEMMLowpMatrixMultiplyNativeKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLGEMMLowpMatrixMultiplyNativeKernel(CLGEMMLowpMatrixMultiplyNativeKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLGEMMLowpMatrixMultiplyNativeKernel &operator=(CLGEMMLowpMatrixMultiplyNativeKernel &&) = default;
- /** Initialise the kernel's input and output.
- *
- * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: QASYMM8/QASYMM8_SIGNED
- * @param[in] input1 Input tensor containing the RHS matrix. Data type supported: same as @p input0
- * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: S32
- * @param[in] lhs_info LHS matrix information used to retrieve the number of rows to be processed by each thread
- * lhs_info.m0: 2,3,4,5,6,7,8
- * lhs_info.k0: 2,3,4,8,16
- * @param[in] rhs_info RHS matrix information used to retrieve the number of columns to be processed by each thread
- * rhs_info.n0: 2,3,4,8,16
- * rhs_info.k0: same as lhs_info.k0
- * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices
- */
- void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, const GEMMReshapeInfo &gemm_info);
- /** Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: QASYMM8/QASYMM8_SIGNED
- * @param[in] input1 Input tensor containing the RHS matrix. Data type supported: same as @p input0
- * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: S32
- * @param[in] lhs_info LHS matrix information used to retrieve the number of rows to be processed by each thread
- * lhs_info.m0: 2,3,4,5,6,7,8
- * lhs_info.k0: 2,3,4,8,16
- * @param[in] rhs_info RHS matrix information used to retrieve the number of columns to be processed by each thread
- * rhs_info.n0: 2,3,4,8,16
- * rhs_info.k0: same as lhs_info.k0
- * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info,
- const GEMMReshapeInfo &gemm_info);
- /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixMultiplyNativeKernel
- *
- * @param[in] input0 Input tensor info for the LHS matrix. Data type supported: QASYMM8/QASYMM8_SIGNED
- * @param[in] input1 Input tensor info for the RHS matrix. Data type supported: same as @p input0
- * @param[in] output Output tensor info. Data type supported: S32
- * @param[in] lhs_info LHS matrix information used to retrieve the number of rows to be processed by each thread
- * lhs_info.m0: 2,3,4,5,6,7,8
- * lhs_info.k0: 2,3,4,8,16
- * @param[in] rhs_info RHS matrix information used to retrieve the number of columns to be processed by each thread
- * rhs_info.n0: 2,3,4,8,16
- * rhs_info.k0: same as lhs_info.k0
- * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info,
- const GEMMReshapeInfo &gemm_info);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input0;
- const ICLTensor *_input1;
- ICLTensor *_output;
- bool _slide_matrix_b;
- bool _reinterpret_input_as_3d;
- bool _reinterpret_output_as_3d;
- bool _use_dummy_work_items;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYNATIVEKERNEL_H*/
+++ /dev/null
-/*
- * Copyright (c) 2019-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYRESHAPEDKERNEL_H
-#define ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYRESHAPEDKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to multiply matrices when both the input matrices LHS (input0) and RHS (input1) have been reshaped
- *
- * @note The input matrices @p input0 and @p input1 must be reshaped through @ref CLGEMMReshapeLHSMatrixKernel and @ref CLGEMMReshapeRHSMatrixKernel
- */
-class CLGEMMLowpMatrixMultiplyReshapedKernel : public ICLKernel
-{
-public:
- /** Default Constructor */
- CLGEMMLowpMatrixMultiplyReshapedKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMLowpMatrixMultiplyReshapedKernel(const CLGEMMLowpMatrixMultiplyReshapedKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMLowpMatrixMultiplyReshapedKernel &operator=(const CLGEMMLowpMatrixMultiplyReshapedKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLGEMMLowpMatrixMultiplyReshapedKernel(CLGEMMLowpMatrixMultiplyReshapedKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLGEMMLowpMatrixMultiplyReshapedKernel &operator=(CLGEMMLowpMatrixMultiplyReshapedKernel &&) = default;
- /** Initialise the kernel's input and output.
- *
- * @param[in] input0 Input tensor containing the LHS reshaped matrix. Data type supported: QASYMM8/QASYMM8_SIGNED. The number of dimensions for the LHS matrix must be less or equal than 4.
- * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3.
- * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: S32
- * @param[in] lhs_info LHS matrix information used for reshaping the input0 tensor. Only the following values are supported:
- * lhs_info.m0: 2,3,4,5,6,7,8
- * lhs_info.k0: 2,3,4,8,16
- * lhs_info.transpose: false
- * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported:
- * rhs_info.n0: 2,3,4,8,16
- * rhs_info.k0: same as lhs_info.k0
- * rhs_info.transpose: true
- * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices
- *
- * @note lhs_info.k0 must be equal to rhs_info.k0
- */
- void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, const GEMMReshapeInfo &gemm_info);
- /** Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input0 Input tensor containing the LHS reshaped matrix. Data type supported: QASYMM8/QASYMM8_SIGNED. The number of dimensions for the LHS matrix must be less or equal than 4.
- * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3.
- * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: S32
- * @param[in] lhs_info LHS matrix information used for reshaping the input0 tensor. Only the following values are supported:
- * lhs_info.m0: 2,3,4,5,6,7,8
- * lhs_info.k0: 2,3,4,8,16
- * lhs_info.transpose: false
- * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported:
- * rhs_info.n0: 2,3,4,8,16
- * rhs_info.k0: same as lhs_info.k0
- * rhs_info.transpose: true
- * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices
- *
- * @note lhs_info.k0 must be equal to rhs_info.k0
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info,
- const GEMMReshapeInfo &gemm_info);
- /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixMultiplyReshapedKernel
- *
- * @param[in] input0 Input tensor info containing the LHS reshaped matrix. Data type supported: QASYMM8/QASYMM8_SIGNED. The number of dimensions for the LHS matrix must be less or equal than 4.
- * @param[in] input1 Input tensor info containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3.
- * @param[in] output Output tensor info. Data type supported: S32
- * @param[in] lhs_info LHS matrix information used for reshaping the input0 tensor. Only the following values are supported:
- * lhs_info.m0: 2,3,4,5,6,7,8
- * lhs_info.k0: 2,3,4,8,16
- * lhs_info.transpose: false
- * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported:
- * rhs_info.n0: 2,3,4,8,16
- * rhs_info.k0: 2,3,4,8,16
- * rhs_info.transpose: true
- * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices
- *
- * @note lhs_info.k0 must be equal to rhs_info.k0
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info,
- const GEMMReshapeInfo &gemm_info);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input0;
- const ICLTensor *_input1;
- ICLTensor *_output;
- bool _slide_matrix_b;
- bool _reinterpret_output_as_3d;
- unsigned int _k;
- bool _use_dummy_work_items;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYRESHAPEDKERNEL_H*/
+++ /dev/null
-/*
- * Copyright (c) 2019-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYRESHAPEDONLYRHSKERNEL_H
-#define ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYRESHAPEDONLYRHSKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/KernelDescriptors.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to multiply matrices with QASYMM8 data type when only the input matrix RHS (input1) has been reshaped
- *
- * @note The input matrix input1 must be reshaped through @ref CLGEMMReshapeRHSMatrixKernel
- * @note For fused output stage, only GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT type is supported
- */
-class CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel : public ICLKernel
-{
-public:
- /** Default Constructor */
- CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel(const CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel &operator=(const CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel(CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel &operator=(CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel &&) = default;
- /** Initialise the kernel's input and output.
- *
- * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: QASYMM8/QASYMM8_SIGNED
- * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL
- * @param[out] output Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/S32.
- * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices, output stage information and RHS/LHS info.
- * Only the following values are supported for LHS info:
- * lhs_info.m0: 2,3,4,5,6,7,8
- * lhs_info.k0: 2,3,4,8,16
- * Only the following values are supported for RHS info:
- * rhs_info.n0: 2,3,4,8,16
- * rhs_info.k0: same as lhs_info.k0
- * rhs_info.transpose: true
- * @param[in] vector_sum_col (Optional) Input row-vector of sums of all the entries in each column of matrix B.
- * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: S32
- * @param[in] vector_sum_row (Optional) Input row-vector of sums of all the entries in each row of matrix A.
- * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: S32
- * @param[in] bias (Optional) Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: S32.
- * @param[in] output_multipliers (Optional) Output multipliers tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM).
- * Supported data types: S32.
- * @param[in] output_shifts (Optional) Output shifts tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM).
- * Supported data types: S32.
- */
- void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMKernelInfo &gemm_info, const ICLTensor *vector_sum_col = nullptr,
- const ICLTensor *vector_sum_row = nullptr, const ICLTensor *bias = nullptr, const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr);
- /** Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: QASYMM8/QASYMM8_SIGNED
- * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0
- * @param[out] output Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/S32.
- * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices, output stage information and RHS/LHS info.
- * Only the following values are supported for LHS info:
- * lhs_info.m0: 2,3,4,5,6,7,8
- * lhs_info.k0: 2,3,4,8,16
- * Only the following values are supported for RHS info:
- * rhs_info.n0: 2,3,4,8,16
- * rhs_info.k0: same as lhs_info.k0
- * rhs_info.transpose: true
- * @param[in] vector_sum_col (Optional) Input row-vector of sums of all the entries in each column of matrix B.
- * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: S32
- * @param[in] vector_sum_row (Optional) Input row-vector of sums of all the entries in each row of matrix A.
- * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: S32
- * @param[in] bias (Optional) Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: S32.
- * @param[in] output_multipliers (Optional) Output multipliers tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM).
- * Supported data types: S32.
- * @param[in] output_shifts (Optional) Output shifts tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM).
- * Supported data types: S32.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMKernelInfo &gemm_info, const ICLTensor *vector_sum_col = nullptr,
- const ICLTensor *vector_sum_row = nullptr, const ICLTensor *bias = nullptr, const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr);
- /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel
- *
- * @param[in] input0 Input tensor info for the LHS matrix. Data type supported: QASYMM8/QASYMM8_SIGNED
- * @param[in] input1 Input tensor info for the RHS reshaped matrix. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL
- * @param[in] output Output tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/S32.
- * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices, output stage information and RHS/LHS info.
- * Only the following values are supported for LHS info:
- * lhs_info.m0: 2,3,4,5,6,7,8
- * lhs_info.k0: 2,3,4,8,16
- * Only the following values are supported for RHS info:
- * rhs_info.n0: 2,3,4,8,16
- * rhs_info.k0: same as lhs_info.k0
- * rhs_info.transpose: true
- * @param[in] vector_sum_col (Optional) Input row-vector info of sums of all the entries in each column of matrix B.
- * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: S32
- * @param[in] vector_sum_row (Optional) Input row-vector info of sums of all the entries in each row of matrix A.
- * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: S32
- * @param[in] bias (Optional) Biases tensor info. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: S32.
- * @param[in] output_multipliers (Optional) Output multipliers tensor info. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM).
- * Supported data types: S32.
- * @param[in] output_shifts (Optional) Output shifts tensor info. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM).
- * Supported data types: S32.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output, const GEMMKernelInfo &gemm_info, const ITensorInfo *vector_sum_col = nullptr,
- const ITensorInfo *vector_sum_row = nullptr, const ITensorInfo *bias = nullptr, const ITensorInfo *output_multipliers = nullptr,
- const ITensorInfo *output_shifts = nullptr);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input0;
- const ICLTensor *_input1;
- ICLTensor *_output;
- const ICLTensor *_vector_sum_col;
- const ICLTensor *_vector_sum_row;
- const ICLTensor *_bias;
- const ICLTensor *_output_multipliers;
- const ICLTensor *_output_shifts;
- bool _slide_matrix_b;
- bool _reinterpret_input_as_3d;
- bool _reinterpret_output_as_3d;
- bool _use_dummy_work_items;
- bool _is_quantized_per_channel;
- bool _fuse_output_stage;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYRESHAPEDONLYRHSKERNEL_H */
\ No newline at end of file
+++ /dev/null
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGEMMLOWPOFFSETCONTRIBUTIONKERNEL_H
-#define ARM_COMPUTE_CLGEMMLOWPOFFSETCONTRIBUTIONKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel used to add the offset contribution after the matrix multiplication. The computation is performed in-place
- *
- * This kernel takes a final int32 accumulator value (the output of the matrix multiplication),
- * and adds to it the offset contribution of matrix A and matrix B in-place.
- *
- * The final result is:
- *
- * mm_result[i][k] = mm_result[i][k] +
- * (vector_sum_col[k] * a_offset) +
- * (vector_sum_row[i] * b_offset) +
- * (a_offset * b_offset * k)
- *
- */
-class CLGEMMLowpOffsetContributionKernel : public ICLKernel
-{
-public:
- /** Constructor */
- CLGEMMLowpOffsetContributionKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- CLGEMMLowpOffsetContributionKernel(const CLGEMMLowpOffsetContributionKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- CLGEMMLowpOffsetContributionKernel &operator=(const CLGEMMLowpOffsetContributionKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLGEMMLowpOffsetContributionKernel(CLGEMMLowpOffsetContributionKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLGEMMLowpOffsetContributionKernel &operator=(CLGEMMLowpOffsetContributionKernel &&) = default;
- /** Initialise the kernel's input and output.
- *
- * @param[in, out] mm_result Input tensor containing the result of the matrix multiplication. Data type supported: S32
- * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B.
- * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result
- * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A.
- * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[in] k Number of matrix A columns or Matrix B rows
- * @param[in] a_offset Offset to be added to each element of the matrix A.
- * @param[in] b_offset Offset to be added to each element of the matrix B.
- */
- void configure(ICLTensor *mm_result, const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, const ICLTensor *bias, int32_t k, int32_t a_offset, int32_t b_offset);
- /** Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in, out] mm_result Input tensor containing the result of the matrix multiplication. Data type supported: S32
- * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B.
- * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result
- * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A.
- * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[in] k Number of matrix A columns or Matrix B rows
- * @param[in] a_offset Offset to be added to each element of the matrix A.
- * @param[in] b_offset Offset to be added to each element of the matrix B.
- */
- void configure(const CLCompileContext &compile_context, ICLTensor *mm_result, const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, const ICLTensor *bias, int32_t k, int32_t a_offset,
- int32_t b_offset);
- /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpOffsetContributionKernel
- *
- * @param[in] mm_result Input tensor containing the result of @ref CLGEMMLowpOffsetContributionKernel. Data type supported: S32
- * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B.
- * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result
- * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A.
- * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[in] a_offset Offset to be added to each element of the matrix A.
- * @param[in] b_offset Offset to be added to each element of the matrix B.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *mm_result, const ITensorInfo *vector_sum_col, const ITensorInfo *vector_sum_row, const ITensorInfo *bias, int32_t a_offset, int32_t b_offset);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_vector_sum_col;
- const ICLTensor *_vector_sum_row;
- ICLTensor *_mm_result;
- const ICLTensor *_bias;
-};
-} // namespace arm_compute
-
-#endif /* ARM_COMPUTE_CLGEMMLOWPOFFSETCONTRIBUTIONKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGEMMLOWPOFFSETCONTRIBUTIONOUTPUTSTAGEKERNEL_H
-#define ARM_COMPUTE_CLGEMMLOWPOFFSETCONTRIBUTIONOUTPUTSTAGEKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel used to add the offset contribution after the matrix multiplication and perform the output stage.
- *
- * This kernel takes a final int32 accumulator value (the output of the matrix multiplication), adds to it the offset contribution
- * of matrix A and matrix B and performs the output stage defined by the output_stage argument
- *
- * @note For quantized computations the output data type for auto-initialization must be passed as part of the @ref GEMMLowpOutputStageInfo.
- */
-class CLGEMMLowpOffsetContributionOutputStageKernel : public ICLKernel
-{
-public:
- /** Constructor */
- CLGEMMLowpOffsetContributionOutputStageKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- CLGEMMLowpOffsetContributionOutputStageKernel(const CLGEMMLowpOffsetContributionOutputStageKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- CLGEMMLowpOffsetContributionOutputStageKernel &operator=(const CLGEMMLowpOffsetContributionOutputStageKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLGEMMLowpOffsetContributionOutputStageKernel(CLGEMMLowpOffsetContributionOutputStageKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLGEMMLowpOffsetContributionOutputStageKernel &operator=(CLGEMMLowpOffsetContributionOutputStageKernel &&) = default;
- /** Initialise the kernel's input and output.
- *
- * @param[in] mm_result Input tensor containing the result of the matrix multiplication. Data type supported: S32
- * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B.
- * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result
- * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A.
- * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[out] output Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED.
- * @param[in] k Number of matrix A columns or Matrix B rows
- * @param[in] a_offset Offset to be added to each element of the matrix A.
- * @param[in] b_offset Offset to be added to each element of the matrix B.
- * @param[in] output_stage GEMMLowp output stage info
- * @param[in] output_multipliers Output multipliers tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM).
- * Supported data types: S32
- * @param[in] output_shifts Output shifts tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM).
- * Supported data types: S32
- */
- void configure(const ICLTensor *mm_result, const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, const ICLTensor *bias, ICLTensor *output, int32_t k, int32_t a_offset, int32_t b_offset,
- const GEMMLowpOutputStageInfo &output_stage, const ICLTensor *output_multipliers, const ICLTensor *output_shifts);
- /** Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] mm_result Input tensor containing the result of the matrix multiplication. Data type supported: S32
- * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B.
- * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result
- * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A.
- * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[out] output Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED.
- * @param[in] k Number of matrix A columns or Matrix B rows
- * @param[in] a_offset Offset to be added to each element of the matrix A.
- * @param[in] b_offset Offset to be added to each element of the matrix B.
- * @param[in] output_stage GEMMLowp output stage info
- * @param[in] output_multipliers Output multipliers tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM).
- * Supported data types: S32
- * @param[in] output_shifts Output shifts tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM).
- * Supported data types: S32
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *mm_result, const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, const ICLTensor *bias, ICLTensor *output, int32_t k,
- int32_t a_offset, int32_t b_offset,
- const GEMMLowpOutputStageInfo &output_stage, const ICLTensor *output_multipliers, const ICLTensor *output_shifts);
- /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpOffsetContributionKernel
- *
- * @param[in] mm_result Input tensor containing the result of @ref CLGEMMLowpOffsetContributionKernel. Data type supported: S32
- * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B.
- * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result
- * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A.
- * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[in] output Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED.
- * @param[in] a_offset Offset to be added to each element of the matrix A.
- * @param[in] b_offset Offset to be added to each element of the matrix B.
- * @param[in] output_stage GEMMLowp output stage info
- * @param[in] output_multipliers Output multipliers tensor info. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM).
- * Supported data types: S32
- * @param[in] output_shifts Output shifts tensor info. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM).
- * Supported data types: S32
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *mm_result, const ITensorInfo *vector_sum_col, const ITensorInfo *vector_sum_row, const ITensorInfo *bias, const ITensorInfo *output, int32_t a_offset,
- int32_t b_offset, const GEMMLowpOutputStageInfo &output_stage, const ITensorInfo *output_multipliers, const ITensorInfo *output_shifts);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_mm_result;
- const ICLTensor *_vector_sum_col;
- const ICLTensor *_vector_sum_row;
- const ICLTensor *_bias;
- ICLTensor *_output;
- const ICLTensor *_output_multipliers;
- const ICLTensor *_output_shifts;
- bool _is_quantized_per_channel;
-};
-} // namespace arm_compute
-
-#endif /* ARM_COMPUTE_CLGEMMLOWPOFFSETCONTRIBUTIONOUTPUTSTAGEKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEBYFIXEDPOINTKERNEL_H
-#define ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEBYFIXEDPOINTKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8/QASYMM8_SIGNED/QSYMM16
- *
- * This kernel takes a final int32 accumulator value (the output of the matrix multiplication), and processes it to obtain the final quantized value.
- * The following computations will be performed by the kernel:
- *
- * -# Compute fixed point multiplication between each entry of input by gemmlowp_multiplier
- * -# Add bias to final result if bias tensor is not a nullptr
- * -# Round to nearest division by a power-of-two using result_shift
- * -# Add offset to each result
- * -# Clamp the value between the specified min and max bounds
- * -# Clamp the resulting int32 values to the proper quantized range and cast to QASYMM8/QASYMM8_SIGNED/QSYMM16.
- */
-class CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel : public ICLKernel
-{
-public:
- /** Constructor */
- CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel(const CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel &operator=(const CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel(CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel &operator=(CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel &&) = default;
- /** Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input tensor. Data type supported: S32
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM16.
- * @param[in] info Output stage info. Used to pass the quantized output data type
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo *info);
- /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel
- *
- * @param[in] input Input tensor. Data type supported: S32
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[in] output Output tensor. Data type supported: Data type supported: QSYMM8/QASYMM8_SIGNED/QSYMM16.
- * @param[in] info Output stage info. Used to pass the quantized output data type
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo *info);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- const ICLTensor *_bias;
- ICLTensor *_output;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEBYFIXEDPOINTKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEBYFLOATKERNEL_H
-#define ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEBYFLOATKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ICLTensor;
-
-/** OpenCL kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8/QASYMM8_SIGNED
- *
- * This kernel takes a final int32 accumulator value (the output of the matrix multiplication), and processes it to obtain the final QASYMM8/QASYMM8_SIGNED value.
- * The following computations will be performed by the kernel:
- *
- * -# Compute fixed point multiplication between each entry of input by result_fixedpoint_multiplier
- * -# Add bias to final result if bias tensor is not a nullptr
- * -# Requantize
- * -# Add offset to each result
- * -# Clamp the value between the specified min and max bounds
- * -# Clamp the resulting int32 values to
- * - to the [0..255] range and cast to QASYMM8.
- * - to the [-128..127] range and cast to QASYMM8_SIGNED.
- */
-class CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel : public ICLKernel
-{
-public:
- /** Constructor */
- CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel(const CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel &operator=(const CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel(CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel &operator=(CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel &&) = default;
- /** Initialise the kernel's input and output.
- *
- * @param[in] input Input tensor. Data type supported: S32
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED
- * @param[in] info Output stage info. Used to pass the quantized output data type
- */
- void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo *info);
- /** Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input tensor. Data type supported: S32
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED
- * @param[in] info Output stage info. Used to pass the quantized output data type
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo *info);
- /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel
- *
- * @param[in] input Input tensor. Data type supported: S32
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED
- * @param[in] info Output stage info. Used to pass the quantized output data type
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo *info);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- const ICLTensor *_bias;
- ICLTensor *_output;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEBYFLOATKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEKERNEL_H
-#define ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8/QASYMM8_SIGNED
- *
- * This kernel takes a final int32 accumulator value (the output of the matrix multiplication), and processes it to obtain the final QASYMM8/QASYMM8_SIGNED value.
- * The following computations will be performed by the kernel:
- *
- * -# Add offset terms to final result
- * -# Multiply each entry of result by result_mult_int
- * -# Add bias to final result if bias tensor is not a nullptr
- * -# Shift the int32 accumulator by result_shift
- * -# Clamp the value between the specified min and max bounds
- * -# Clamp the resulting int32 values:
- * -# -to the [0..255] range and cast to QASYMM8.
- * -# -to the [-128..127] range and cast to QASYMM8_SIGNED.
- *
- */
-class CLGEMMLowpQuantizeDownInt32ScaleKernel : public ICLKernel
-{
-public:
- /** Constructor */
- CLGEMMLowpQuantizeDownInt32ScaleKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- CLGEMMLowpQuantizeDownInt32ScaleKernel(const CLGEMMLowpQuantizeDownInt32ScaleKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- CLGEMMLowpQuantizeDownInt32ScaleKernel &operator=(const CLGEMMLowpQuantizeDownInt32ScaleKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLGEMMLowpQuantizeDownInt32ScaleKernel(CLGEMMLowpQuantizeDownInt32ScaleKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLGEMMLowpQuantizeDownInt32ScaleKernel &operator=(CLGEMMLowpQuantizeDownInt32ScaleKernel &&) = default;
- /** Initialise the kernel's input and output.
- *
- * @param[in] input Input tensor. Data type supported: S32
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED
- * @param[in] output_stage GEMMLowp output stage metadata.
- */
- void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo *output_stage);
- /** Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input tensor. Data type supported: S32
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED
- * @param[in] output_stage GEMMLowp output stage metadata.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo *output_stage);
- /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ScaleKernel
- *
- * @param[in] input Input tensor. Data type supported: S32
- * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
- * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
- * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED
- * @param[in] output_stage GEMMLowp output stage metadata.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo *output_stage);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- const ICLTensor *_bias;
- ICLTensor *_output;
-};
-} // namespace arm_compute
-
-#endif /* ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEKERNEL_H */
\ No newline at end of file
+++ /dev/null
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGEMMLOWREDUCTIONKERNEL_H
-#define ARM_COMPUTE_CLGEMMLOWREDUCTIONKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-struct GEMMLowpReductionKernelInfo;
-
-/** Common interface for all OpenCL reduction kernels */
-class ICLGEMMLowpReductionKernel : public ICLKernel
-{
-public:
- /** Constructor */
- ICLGEMMLowpReductionKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- ICLGEMMLowpReductionKernel(const ICLGEMMLowpReductionKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers)*/
- ICLGEMMLowpReductionKernel &operator=(const ICLGEMMLowpReductionKernel &) = delete;
- /** Allow instances of this class to be moved */
- ICLGEMMLowpReductionKernel(ICLGEMMLowpReductionKernel &&) = default;
- /** Allow instances of this class to be moved */
- ICLGEMMLowpReductionKernel &operator=(ICLGEMMLowpReductionKernel &&) = default;
-
- /** Initialise the kernel's input and output.
- *
- * @param[in] input Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8.
- * @param[out] output Output row-vector of sums of all the entries in each row/col of input tensor. Data type supported: S32
- * @param[in] info Kernel metadata:
- * - k Number of matrix columns/rows depending on the type of reduction.
- * - is_reshaped True if the matrix has been reshaped.
- * - scalar Scalar value to multiply each reduced column/row by.
- * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value.
- */
- virtual void configure(const ICLTensor *input, ICLTensor *output, const GEMMLowpReductionKernelInfo &info) = 0;
- /** Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8.
- * @param[out] output Output row-vector of sums of all the entries in each row/col of input tensor. Data type supported: S32
- * @param[in] info Kernel metadata:
- * - k Number of matrix columns/rows depending on the type of reduction.
- * - is_reshaped True if the matrix has been reshaped.
- * - scalar Scalar value to multiply each reduced column/row by.
- * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value.
- */
- virtual void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const GEMMLowpReductionKernelInfo &info) = 0;
-
-protected:
- const ICLTensor *_input;
- ICLTensor *_output;
-};
-
-/** OpenCL kernel used to compute the row-vectors of sums of all the entries in each row of Matrix A.
- *
- * @note This stage is needed to handle the offset of matrix product
- * https://github.com/google/gemmlowp/blob/master/doc/low-precision.md
- */
-class CLGEMMLowpMatrixAReductionKernel : public ICLGEMMLowpReductionKernel
-{
-public:
- /** Initialise the kernel's input and output.
- *
- * @param[in] mtx_a Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8.
- * @param[out] vector_sum_row Output row-vector of sums of all the entries in each row of mtx_a. Data type supported: S32
- * @param[in] info Kernel metadata:
- * - k Number of matrix columns/rows depending on the type of reduction.
- * - is_reshaped True if the matrix has been reshaped.
- * - scalar Scalar value to multiply each reduced column/row by.
- * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value.
- */
- void configure(const ICLTensor *mtx_a, ICLTensor *vector_sum_row, const GEMMLowpReductionKernelInfo &info) override;
- /** Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] mtx_a Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8.
- * @param[out] vector_sum_row Output row-vector of sums of all the entries in each row of mtx_a. Data type supported: S32
- * @param[in] info Kernel metadata:
- * - k Number of matrix columns/rows depending on the type of reduction.
- * - is_reshaped True if the matrix has been reshaped.
- * - scalar Scalar value to multiply each reduced column/row by.
- * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *mtx_a, ICLTensor *vector_sum_row, const GEMMLowpReductionKernelInfo &info) override;
- /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixAReductionKernel
- *
- * @param[in] mtx_a Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8.
- * @param[in] vector_sum_row Output row-vector of sums of all the entries in each row of mtx_a. Data type supported: S32
- * @param[in] info Kernel metadata:
- * - k Number of matrix columns/rows depending on the type of reduction.
- * - is_reshaped True if the matrix has been reshaped.
- * - scalar Scalar value to multiply each reduced column/row by.
- * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *mtx_a, const ITensorInfo *vector_sum_row, const GEMMLowpReductionKernelInfo &info);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-};
-
-/** OpenCL kernel used to compute the row-vectors of sums of all the entries in each column of Matrix B.
- *
- * @note This stage is needed to handle the offset of matrix product
- * https://github.com/google/gemmlowp/blob/master/doc/low-precision.md
- */
-class CLGEMMLowpMatrixBReductionKernel : public ICLGEMMLowpReductionKernel
-{
-public:
- /** Initialise the kernel's input and output.
- *
- * @param[in] mtx_b Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL.
- * @param[out] vector_sum_col Output row-vector of sums of all the entries in each column of mtx_b. Data type supported: S32
- * @param[in] info Kernel metadata:
- * - k Number of matrix columns/rows depending on the type of reduction.
- * - is_reshaped True if the matrix has been reshaped.
- * - scalar Scalar value to multiply each reduced column/row by.
- * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value.
- */
- void configure(const ICLTensor *mtx_b, ICLTensor *vector_sum_col, const GEMMLowpReductionKernelInfo &info) override;
- /** Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] mtx_b Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL.
- * @param[out] vector_sum_col Output row-vector of sums of all the entries in each column of mtx_b. Data type supported: S32
- * @param[in] info Kernel metadata:
- * - k Number of matrix columns/rows depending on the type of reduction.
- * - is_reshaped True if the matrix has been reshaped.
- * - scalar Scalar value to multiply each reduced column/row by.
- * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *mtx_b, ICLTensor *vector_sum_col, const GEMMLowpReductionKernelInfo &info) override;
- /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixBReductionKernel
- *
- * @param[in] mtx_b Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL.
- * @param[in] vector_sum_col Output row-vector of sums of all the entries in each column of mtx_b. Data type supported: S32
- * @param[in] info Kernel metadata:
- * - k Number of matrix columns/rows depending on the type of reduction.
- * - is_reshaped True if the matrix has been reshaped.
- * - scalar Scalar value to multiply each reduced column/row by.
- * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *mtx_b, const ITensorInfo *vector_sum_col, const GEMMLowpReductionKernelInfo &info);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLGEMMLOWREDUCTIONKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGEMMMATRIXMULTIPLYKERNEL_H
-#define ARM_COMPUTE_CLGEMMMATRIXMULTIPLYKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to multiply two input matrices "A" and "B" and add a martix "C" if provided. All elements of the output matrix will be multiplied by alpha. In case matrix C is passed, it will be added to the previous result.
- * For the matrix C, the broadcast addition is supported if the flag "broadcast_bias" is set in the GEMMReshapeInfo object
- *
- * @note If the input tensors @p input0 and @p input1 have been reshaped respectively with @ref CLGEMMReshapeLHSMatrixKernel" and @ref CLGEMMReshapeRHSMatrixKernel,
- * the flag @p is_interleaved_transposed must be set to true
- *
- * @attention @p input1 tensor must have at least 2 dimensions (matrix)
- *
- */
-class CLGEMMMatrixMultiplyKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLGEMMMatrixMultiplyKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMMatrixMultiplyKernel(const CLGEMMMatrixMultiplyKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMMatrixMultiplyKernel &operator=(const CLGEMMMatrixMultiplyKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLGEMMMatrixMultiplyKernel(CLGEMMMatrixMultiplyKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLGEMMMatrixMultiplyKernel &operator=(CLGEMMMatrixMultiplyKernel &&) = default;
- /** Initialise the kernel's input, output and alpha
- *
- * @param[in] input0 Input tensor containing the Matrix A. Data types supported: F16/F32
- * @param[in] input1 Input tensor containing the Matrix B. Data type supported: same as @p input0
- * @param[in] input2 Input tensor containing the Matrix C (bias). Can be nullptr. Data type supported: same as @p input0
- * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0
- * @param[in] alpha Weight of the matrix product
- * @param[in] beta (Optional) Weight of vector C. Default value is 0. Only beta = 1 is currently supported.
- * @param[in] is_interleaved_transposed (Optional) True if input0 and input1 have been reshaped respectively using @ref CLGEMMReshapeLHSMatrixKernel and @ref CLGEMMReshapeRHSMatrixKernel
- * @param[in] reshape_info (Optional) GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped
- * @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy
- * @param[in] activation_info (Optional) Activation to apply after the matrix multiplication
- *
- */
- void configure(const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta = 0.f,
- bool is_interleaved_transposed = true, const GEMMReshapeInfo &reshape_info = GEMMReshapeInfo(), bool fp_mixed_precision = false, const ActivationLayerInfo &activation_info = ActivationLayerInfo());
- /** Initialise the kernel's input, output and alpha
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input0 Input tensor containing the Matrix A. Data types supported: F16/F32
- * @param[in] input1 Input tensor containing the Matrix B. Data type supported: same as @p input0
- * @param[in] input2 Input tensor containing the Matrix C (bias). Can be nullptr. Data type supported: same as @p input0
- * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0
- * @param[in] alpha Weight of the matrix product
- * @param[in] beta (Optional) Weight of vector C. Default value is 0. Only beta = 1 is currently supported.
- * @param[in] is_interleaved_transposed (Optional) True if input0 and input1 have been reshaped respectively using @ref CLGEMMReshapeLHSMatrixKernel and @ref CLGEMMReshapeRHSMatrixKernel
- * @param[in] reshape_info (Optional) GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped
- * @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy
- * @param[in] activation_info (Optional) Activation to apply after the matrix multiplication
- *
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta = 0.f,
- bool is_interleaved_transposed = true, const GEMMReshapeInfo &reshape_info = GEMMReshapeInfo(), bool fp_mixed_precision = false, const ActivationLayerInfo &activation_info = ActivationLayerInfo());
- /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixMultiplyKernel
- *
- * @param[in] input0 Input tensor containing the Matrix A info. Data types supported: F16/F32
- * @param[in] input1 Input tensor containing the Matrix B info. Data type supported: same as @p input0
- * @param[in] input2 Input tensor containing the Matrix C (bias) info. Can be nullptr. Data type supported: same as @p input0
- * @param[in] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0
- * @param[in] alpha Weight of the matrix product
- * @param[in] beta Weight of vector C. Default value is 0. Only beta = 1 is currently supported.
- * @param[in] is_interleaved_transposed True if input0 and input1 have been reshaped respectively using @ref CLGEMMReshapeLHSMatrixKernel and @ref CLGEMMReshapeRHSMatrixKernel
- * @param[in] reshape_info GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped
- * @param[in] gpu_target GPU Target
- * @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy
- * @param[in] activation_info (Optional) Activation to apply after the matrix multiplication
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float alpha, float beta,
- bool is_interleaved_transposed, const GEMMReshapeInfo &reshape_info, GPUTarget gpu_target, bool fp_mixed_precision = false, const ActivationLayerInfo &activation_info = ActivationLayerInfo());
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-public:
- const ICLTensor *_input0;
- const ICLTensor *_input1;
- const ICLTensor *_input2;
- ICLTensor *_output;
- bool _slide_matrix_b;
- bool _reinterpret_input_as_3d;
- bool _reinterpret_output_as_3d;
- bool _add_bias;
- bool _broadcast_bias;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLGEMMMATRIXMULTIPLYKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2019-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGEMMMATRIXMULTIPLYNATIVEKERNEL_H
-#define ARM_COMPUTE_CLGEMMMATRIXMULTIPLYNATIVEKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-#include "arm_compute/core/KernelDescriptors.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to multiply matrices when neither of the input matrices have been reshaped */
-class CLGEMMMatrixMultiplyNativeKernel : public ICLKernel
-{
-public:
- /** Default Constructor */
- CLGEMMMatrixMultiplyNativeKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMMatrixMultiplyNativeKernel(const CLGEMMMatrixMultiplyNativeKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMMatrixMultiplyNativeKernel &operator=(const CLGEMMMatrixMultiplyNativeKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLGEMMMatrixMultiplyNativeKernel(CLGEMMMatrixMultiplyNativeKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLGEMMMatrixMultiplyNativeKernel &operator=(CLGEMMMatrixMultiplyNativeKernel &&) = default;
- /** Initialise the kernel's input and output.
- *
- * @param[in] input0 Input tensor for the LHS matrix. Data type supported: F32. The number of dimensions for the LHS matrix must be less or equal than 4.
- * @param[in] input1 Input tensor for the RHS matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3.
- * @param[in] input2 Input tensor containing the bias matrix. Data type supported: same as @p input0.
- * @param[out] output Output tensor info. Data type supported: same as @p input0
- * @param[in] alpha Weight of the matrix product
- * @param[in] beta Weight of the matrix bias
- * @param[in] lhs_info LHS matrix information used to retrieve the number of rows and accumulations to be processed by each thread. Only the following values are supported:
- * lhs_info.m0: 1,2,3,4,5,6,7,8
- * lhs_info.k0: 2,3,4,8,16
- * @param[in] rhs_info RHS matrix information used to retrieve the number of columns and accumulations to be processed by each thread. Only the following values are supported:
- * rhs_info.n0: 2,3,4,8,16
- * rhs_info.k0: same of lhs_info.k0
- * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices
- */
- void configure(const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info,
- const GEMMRHSMatrixInfo &rhs_info,
- const GEMMKernelInfo &gemm_info);
- /** Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input0 Input tensor for the LHS matrix. Data type supported: F32. The number of dimensions for the LHS matrix must be less or equal than 4.
- * @param[in] input1 Input tensor for the RHS matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3.
- * @param[in] input2 Input tensor containing the bias matrix. Data type supported: same as @p input0.
- * @param[out] output Output tensor info. Data type supported: same as @p input0
- * @param[in] alpha Weight of the matrix product
- * @param[in] beta Weight of the matrix bias
- * @param[in] lhs_info LHS matrix information used to retrieve the number of rows and accumulations to be processed by each thread. Only the following values are supported:
- * lhs_info.m0: 1,2,3,4,5,6,7,8
- * lhs_info.k0: 2,3,4,8,16
- * @param[in] rhs_info RHS matrix information used to retrieve the number of columns and accumulations to be processed by each thread. Only the following values are supported:
- * rhs_info.n0: 2,3,4,8,16
- * rhs_info.k0: same of lhs_info.k0
- * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta,
- const GEMMLHSMatrixInfo &lhs_info,
- const GEMMRHSMatrixInfo &rhs_info,
- const GEMMKernelInfo &gemm_info);
- /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixMultiplyNativeKernel
- *
- * @param[in] input0 Input tensor info for the LHS matrix. Data type supported: F32. The number of dimensions for the LHS matrix must be less or equal than 4.
- * @param[in] input1 Input tensor info for the RHS matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3.
- * @param[in] input2 Input tensor info containing the bias matrix. Data type supported: same as @p input0.
- * @param[in] output Output tensor info. Data type supported: same as @p input0
- * @param[in] alpha Weight of the matrix product
- * @param[in] beta Weight of the matrix bias
- * @param[in] lhs_info LHS matrix information used to retrieve the number of rows and accumulations to be processed by each thread. Only the following values are supported:
- * lhs_info.m0: 1,2,3,4,5,6,7,8
- * lhs_info.k0: 2,3,4,8,16
- * @param[in] rhs_info RHS matrix information used to retrieve the number of columns and accumulations to be processed by each thread. Only the following values are supported:
- * rhs_info.n0: 2,3,4,8,16
- * rhs_info.k0: same of lhs_info.k0
- * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info,
- const GEMMRHSMatrixInfo &rhs_info,
- const GEMMKernelInfo &gemm_info);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input0;
- const ICLTensor *_input1;
- const ICLTensor *_input2;
- ICLTensor *_output;
- bool _slide_matrix_b;
- bool _reinterpret_input_as_3d;
- bool _reinterpret_output_as_3d;
- bool _use_dummy_work_items;
- bool _add_bias;
- bool _broadcast_bias;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLGEMMMATRIXMULTIPLYNATIVEKERNEL_H*/
+++ /dev/null
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGEMMMATRIXMULTIPLYRESHAPEDKERNEL_H
-#define ARM_COMPUTE_CLGEMMMATRIXMULTIPLYRESHAPEDKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-#include "arm_compute/core/KernelDescriptors.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to multiply matrices when both the input matrices LHS (input0) and RHS (input1) have been reshaped
- *
- * @note The input matrices @p input0 and @p input1 must be reshaped through @ref CLGEMMReshapeLHSMatrixKernel and @ref CLGEMMReshapeRHSMatrixKernel
- */
-class CLGEMMMatrixMultiplyReshapedKernel : public ICLKernel
-{
-public:
- /** Default Constructor */
- CLGEMMMatrixMultiplyReshapedKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMMatrixMultiplyReshapedKernel(const CLGEMMMatrixMultiplyReshapedKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMMatrixMultiplyReshapedKernel &operator=(const CLGEMMMatrixMultiplyReshapedKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLGEMMMatrixMultiplyReshapedKernel(CLGEMMMatrixMultiplyReshapedKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLGEMMMatrixMultiplyReshapedKernel &operator=(CLGEMMMatrixMultiplyReshapedKernel &&) = default;
- /** Initialise the kernel's input and output.
- *
- * @note The F16 computation also supports mixed precision through the gemm_info.fp_mixed_precision flag.
- * Mixed precision combines different floating precisions during the computation, in particular, F32 for the accumulations and F16 for the
- * multiplications. i.e. float c = (half)a * (half)b
- *
- * @note If rhs_info.export_to_cl_image = true, this OpenCL kernel will fetch the RHS data using the OpenCL read_image built-in function.
- * Reading from the OpenCL image object can increase the performance. However, since the OpenCL image object is created importing the OpenCL buffer,
- * the following conditions are required:
- * -# rhs_info.n0 can only be 4, 8 and 16
- * -# rhs_info.k0 can only be 4, 8 and 16
- * -# Data type can only be F32
- * -# The platform should support the OpenCL cl_khr_image2d_from_buffer extension
- * -# The stride Y for the input1 should satisfy the OpenCL pitch alignment requirement
- * -# input1 width should be less or equal to (CL_DEVICE_IMAGE2D_MAX_WIDTH * 4)
- * -# input1 (height * depth) should be less or equal to CL_DEVICE_IMAGE2D_MAX_HEIGHT
- *
- * @param[in] input0 Input tensor containing the LHS reshaped matrix. Data type supported: F16/F32 (only F32 if rhs_info.export_to_cl_image = true). The number of dimensions for the LHS matrix must be less or equal than 4
- * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3
- * @param[in] input2 Input tensor containing the bias matrix. Data type supported: same as @p input0.
- * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0
- * @param[in] alpha Weight of the matrix product
- * @param[in] beta Weight of the matrix bias
- * @param[in] lhs_info LHS matrix information used for reshaping the input0 tensor. Only the following values are supported:
- * lhs_info.m0: 2,3,4,5,6,7,8
- * lhs_info.k0: 2,3,4,8,16
- * lhs_info.transpose: false
- * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported:
- * rhs_info.n0: 2,3,4,8,16 (only 4, 8 and 16 if rhs_info.export_to_cl_image = true)
- * rhs_info.k0: 2,3,4,8,16 (only 4, 8 and 16 if rhs_info.export_to_cl_image = true)
- * rhs_info.transpose: true
- * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices
- *
- * @note lhs_info.k0 must be equal to rhs_info.k0
- */
- void configure(const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info,
- const GEMMRHSMatrixInfo &rhs_info,
- const GEMMKernelInfo &gemm_info);
- /** Initialise the kernel's input and output.
- *
- * @note The F16 computation also supports mixed precision through the gemm_info.fp_mixed_precision flag.
- * Mixed precision combines different floating precisions during the computation, in particular, F32 for the accumulations and F16 for the
- * multiplications. i.e. float c = (half)a * (half)b
- *
- * @note If rhs_info.export_to_cl_image = true, this OpenCL kernel will fetch the RHS data using the OpenCL read_image built-in function.
- * Reading from the OpenCL image object can increase the performance. However, since the OpenCL image object is created importing the OpenCL buffer,
- * the following conditions are required:
- * -# rhs_info.n0 can only be 4, 8 and 16
- * -# rhs_info.k0 can only be 4, 8 and 16
- * -# Data type can only be F32
- * -# The platform should support the OpenCL cl_khr_image2d_from_buffer extension
- * -# The stride Y for the input1 should satisfy the OpenCL pitch alignment requirement
- * -# input1 width should be less or equal to (CL_DEVICE_IMAGE2D_MAX_WIDTH * 4)
- * -# input1 (height * depth) should be less or equal to CL_DEVICE_IMAGE2D_MAX_HEIGHT
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input0 Input tensor containing the LHS reshaped matrix. Data type supported: F16/F32 (only F32 if rhs_info.export_to_cl_image = true). The number of dimensions for the LHS matrix must be less or equal than 4
- * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3
- * @param[in] input2 Input tensor containing the bias matrix. Data type supported: same as @p input0.
- * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0
- * @param[in] alpha Weight of the matrix product
- * @param[in] beta Weight of the matrix bias
- * @param[in] lhs_info LHS matrix information used for reshaping the input0 tensor. Only the following values are supported:
- * lhs_info.m0: 2,3,4,5,6,7,8
- * lhs_info.k0: 2,3,4,8,16
- * lhs_info.transpose: false
- * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported:
- * rhs_info.n0: 2,3,4,8,16 (only 4, 8 and 16 if rhs_info.export_to_cl_image = true)
- * rhs_info.k0: 2,3,4,8,16 (only 4, 8 and 16 if rhs_info.export_to_cl_image = true)
- * rhs_info.transpose: true
- * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices
- *
- * @note lhs_info.k0 must be equal to rhs_info.k0
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta,
- const GEMMLHSMatrixInfo &lhs_info,
- const GEMMRHSMatrixInfo &rhs_info,
- const GEMMKernelInfo &gemm_info);
- /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixMultiplyReshapedKernel
- *
- * @note The F16 computation also supports mixed precision through the gemm_info.fp_mixed_precision flag.
- * Mixed precision combines different floating precisions during the computation, in particular, F32 for the accumulations and F16 for the
- * multiplications. i.e. float c = (half)a * (half)b
- *
- * @note If rhs_info.export_to_cl_image = true, this OpenCL kernel will fetch the RHS data using the OpenCL read_image built-in function.
- * Reading from the OpenCL image object can increase the performance. However, since the OpenCL image object is created importing the OpenCL buffer,
- * the following conditions are required:
- * -# rhs_info.n0 can only be 4, 8 and 16
- * -# rhs_info.k0 can only be 4, 8 and 16
- * -# Data type can only be F32
- * -# The platform should support the OpenCL cl_khr_image2d_from_buffer extension
- * -# The stride Y for the input1 should satisfy the OpenCL pitch alignment requirement
- * -# input1 width should be less or equal to (CL_DEVICE_IMAGE2D_MAX_WIDTH * 4)
- * -# input1 (height * depth) should be less or equal to CL_DEVICE_IMAGE2D_MAX_HEIGHT
- *
- * @param[in] input0 Input tensor containing the LHS reshaped matrix. Data type supported: F16/F32 (only F32 if rhs_info.export_to_cl_image = true). The number of dimensions for the LHS matrix must be less or equal than 4
- * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3
- * @param[in] input2 Input tensor info containing the bias matrix. Data type supported: same as @p input0.
- * @param[in] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0
- * @param[in] alpha Weight of the matrix product
- * @param[in] beta Weight of the matrix bias
- * @param[in] lhs_info LHS matrix information used for reshaping the input0 tensor. Only the following values are supported:
- * lhs_info.m0: 2,3,4,5,6,7,8
- * lhs_info.k0: 2,3,4,8,16
- * lhs_info.transpose: false
- * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported:
- * rhs_info.n0: 2,3,4,8,16 (only 4, 8 and 16 if rhs_info.export_to_cl_image = true)
- * rhs_info.k0: 2,3,4,8,16 (only 4, 8 and 16 if rhs_info.export_to_cl_image = true)
- * rhs_info.transpose: true
- * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices
- *
- * @note lhs_info.k0 must be equal to rhs_info.k0
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info,
- const GEMMRHSMatrixInfo &rhs_info,
- const GEMMKernelInfo &gemm_info);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input0;
- const ICLTensor *_input1;
- const ICLTensor *_input2;
- ICLTensor *_output;
- bool _slide_matrix_b;
- bool _reinterpret_output_as_3d;
- bool _use_dummy_work_items;
- bool _add_bias;
- bool _broadcast_bias;
- bool _export_to_cl_image;
- unsigned int _k;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLGEMMMATRIXMULTIPLYRESHAPEDKERNEL_H*/
\ No newline at end of file
+++ /dev/null
-/*
- * Copyright (c) 2019-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGEMMMATRIXMULTIPLYRESHAPEDONLYRHSKERNEL_H
-#define ARM_COMPUTE_CLGEMMMATRIXMULTIPLYRESHAPEDONLYRHSKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-#include "arm_compute/core/KernelDescriptors.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to multiply matrices when only the input matrix RHS (input1) has been reshaped
- *
- * @note The input matrix input1 must be reshaped through @ref CLGEMMReshapeRHSMatrixKernel
- */
-class CLGEMMMatrixMultiplyReshapedOnlyRHSKernel : public ICLKernel
-{
-public:
- /** Default Constructor */
- CLGEMMMatrixMultiplyReshapedOnlyRHSKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMMatrixMultiplyReshapedOnlyRHSKernel(const CLGEMMMatrixMultiplyReshapedOnlyRHSKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMMatrixMultiplyReshapedOnlyRHSKernel &operator=(const CLGEMMMatrixMultiplyReshapedOnlyRHSKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLGEMMMatrixMultiplyReshapedOnlyRHSKernel(CLGEMMMatrixMultiplyReshapedOnlyRHSKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLGEMMMatrixMultiplyReshapedOnlyRHSKernel &operator=(CLGEMMMatrixMultiplyReshapedOnlyRHSKernel &&) = default;
- /** Initialise the kernel's input and output.
- *
- * @note If rhs_info.export_to_cl_image = true, this OpenCL kernel will fetch the RHS data using the OpenCL read_image built-in function.
- * Reading from the OpenCL image object can increase the performance. However, since the OpenCL image object is created importing the OpenCL buffer,
- * the following conditions are required:
- * -# rhs_info.n0 can only be 4, 8 and 16
- * -# rhs_info.k0 can only be 4, 8 and 16
- * -# Data type can only be F32
- * -# The platform should support the OpenCL cl_khr_image2d_from_buffer extension
- * -# The stride Y for the input1 should satisfy the OpenCL pitch alignment requirement
- * -# input1 width should be less or equal to (CL_DEVICE_IMAGE2D_MAX_WIDTH * 4)
- * -# input1 (height * depth) should be less or equal to CL_DEVICE_IMAGE2D_MAX_HEIGHT
- *
- * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: F16/F32 (only F32 if rhs_info.export_to_cl_image = true).
- * The number of dimensions for the LHS matrix must be less or equal than 4.
- * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3.
- * @param[in] input2 Input tensor containing the bias matrix. Data type supported: same as @p input0.
- * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0
- * @param[in] alpha Weight of the matrix product
- * @param[in] beta Weight of the matrix bias
- * @param[in] lhs_info LHS matrix information used to retrieve the number of rows to be processed by each thread. Only the following values are supported:
- * lhs_info.m0: 1,2,3,4,5,6,7,8
- * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported:
- * rhs_info.k0: 2,3,4,8,16
- * rhs_info.n0: 2,3,4,8,16
- * rhs_info.transpose: true,false
- * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices
- */
- void configure(const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info,
- const GEMMRHSMatrixInfo &rhs_info,
- const GEMMKernelInfo &gemm_info);
- /** Initialise the kernel's input and output.
- *
- * @note If rhs_info.export_to_cl_image = true, this OpenCL kernel will fetch the RHS data using the OpenCL read_image built-in function.
- * Reading from the OpenCL image object can increase the performance. However, since the OpenCL image object is created importing the OpenCL buffer,
- * the following conditions are required:
- * -# rhs_info.n0 can only be 4, 8 and 16
- * -# rhs_info.k0 can only be 4, 8 and 16
- * -# Data type can only be F32
- * -# The platform should support the OpenCL cl_khr_image2d_from_buffer extension
- * -# The stride Y for the input1 should satisfy the OpenCL pitch alignment requirement
- * -# input1 width should be less or equal to (CL_DEVICE_IMAGE2D_MAX_WIDTH * 4)
- * -# input1 (height * depth) should be less or equal to CL_DEVICE_IMAGE2D_MAX_HEIGHT
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: F16/F32 (only F32 if rhs_info.export_to_cl_image = true).
- * The number of dimensions for the LHS matrix must be less or equal than 4.
- * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3.
- * @param[in] input2 Input tensor containing the bias matrix. Data type supported: same as @p input0.
- * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0
- * @param[in] alpha Weight of the matrix product
- * @param[in] beta Weight of the matrix bias
- * @param[in] lhs_info LHS matrix information used to retrieve the number of rows to be processed by each thread. Only the following values are supported:
- * lhs_info.m0: 1,2,3,4,5,6,7,8
- * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported:
- * rhs_info.k0: 2,3,4,8,16
- * rhs_info.n0: 2,3,4,8,16
- * rhs_info.transpose: true,false
- * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta,
- const GEMMLHSMatrixInfo &lhs_info,
- const GEMMRHSMatrixInfo &rhs_info,
- const GEMMKernelInfo &gemm_info);
- /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixMultiplyReshapedOnlyRHSKernel
- *
- * @note If rhs_info.export_to_cl_image = true, this OpenCL kernel will fetch the RHS data using the OpenCL read_image built-in function.
- * Reading from the OpenCL image object can increase the performance. However, since the OpenCL image object is created importing the OpenCL buffer,
- * the following conditions are required:
- * -# rhs_info.n0 can only be 4, 8 and 16
- * -# rhs_info.k0 can only be 4, 8 and 16
- * -# Data type can only be F32
- * -# The platform should support the OpenCL cl_khr_image2d_from_buffer extension
- * -# The stride Y for the input1 should satisfy the OpenCL pitch alignment requirement
- * -# input1 width should be less or equal to (CL_DEVICE_IMAGE2D_MAX_WIDTH * 4)
- * -# input1 (height * depth) should be less or equal to CL_DEVICE_IMAGE2D_MAX_HEIGHT
- *
- * @param[in] input0 Input tensor info for the LHS matrix. Data type supported: F16/F32 (only F32 if rhs_info.export_to_cl_image = true).
- * The number of dimensions for the LHS matrix must be less or equal than 4.
- * @param[in] input1 Input tensor info for the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3.
- * @param[in] input2 Input tensor info containing the bias matrix. Data type supported: same as @p input0.
- * @param[in] output Output tensor info. Data type supported: same as @p input0
- * @param[in] alpha Weight of the matrix product
- * @param[in] beta Weight of the matrix bias
- * @param[in] lhs_info LHS matrix information used to retrieve the number of rows to be processed by each thread. Only the following values are supported:
- * lhs_info.m0: 1,2,3,4,5,6,7,8
- * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported:
- * rhs_info.k0: 2,3,4,8,16
- * rhs_info.n0: 2,3,4,8,16
- * rhs_info.transpose: true,false
- * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info,
- const GEMMRHSMatrixInfo &rhs_info,
- const GEMMKernelInfo &gemm_info);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input0;
- const ICLTensor *_input1;
- const ICLTensor *_input2;
- ICLTensor *_output;
- bool _slide_matrix_b;
- bool _reinterpret_input_as_3d;
- bool _reinterpret_output_as_3d;
- bool _use_dummy_work_items;
- bool _add_bias;
- bool _broadcast_bias;
- bool _export_to_cl_image;
- bool _has_pad_y;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLGEMMMATRIXMULTIPLYRESHAPEDONLYRHSKERNEL_H*/
+++ /dev/null
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGEMMMATRIXVECTORMULTIPLYKERNEL_H
-#define ARM_COMPUTE_CLGEMMMATRIXVECTORMULTIPLYKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the GEMM matrix vector multiply kernel. **/
-class CLGEMMMatrixVectorMultiplyKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLGEMMMatrixVectorMultiplyKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMMatrixVectorMultiplyKernel(const CLGEMMMatrixVectorMultiplyKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMMatrixVectorMultiplyKernel &operator=(const CLGEMMMatrixVectorMultiplyKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLGEMMMatrixVectorMultiplyKernel(CLGEMMMatrixVectorMultiplyKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLGEMMMatrixVectorMultiplyKernel &operator=(CLGEMMMatrixVectorMultiplyKernel &&) = default;
- /** Set the input and output of the kernel.
- *
- * @param[in] input0 The reshaped input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
- * @param[in] input1 The 2D reshaped weights tensor. Data type supported: Same as @p input.
- * @param[out] output The output 2D tensor. Data types supported: Same as @p input, S32 for QASYMM8/QASYMM8_SIGNED.
- */
- void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output);
- /** Set the input and output of the kernel.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input0 The reshaped input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
- * @param[in] input1 The 2D reshaped weights tensor. Data type supported: Same as @p input.
- * @param[out] output The output 2D tensor. Data types supported: Same as @p input, S32 for QASYMM8/QASYMM8_SIGNED.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixVectorMultiplyKernel
- *
- * @param[in] input0 The reshaped input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
- * @param[in] input1 The 2D reshaped weights tensor info. Data type supported: Same as @p input.
- * @param[in] output The output 2D tensor info. Data types supported: Same as @p input, S32 for QASYMM8/QASYMM8_SIGNED.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-private:
- const ICLTensor *_input0;
- const ICLTensor *_input1;
- ICLTensor *_output;
- int _num_rows_read_per_iteration;
- BorderSize _border_size;
-};
-} // arm_compute
-#endif /*ARM_COMPUTE_CLGEMMMATRIXVECTORMULTIPLYKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGEMMRESHAPELHSMATRIXKERNEL_H
-#define ARM_COMPUTE_CLGEMMRESHAPELHSMATRIXKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to reshape the LHS matrix when performing the matrix multiplication.
- * In particular, this function splits the input matrix in blocks of size M0xK0 (defined through GEMMLHSInfo) and
- * stores each one in the output matrix unrolling the values
- */
-class CLGEMMReshapeLHSMatrixKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLGEMMReshapeLHSMatrixKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMReshapeLHSMatrixKernel(const CLGEMMReshapeLHSMatrixKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMReshapeLHSMatrixKernel &operator=(const CLGEMMReshapeLHSMatrixKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLGEMMReshapeLHSMatrixKernel(CLGEMMReshapeLHSMatrixKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLGEMMReshapeLHSMatrixKernel &operator=(CLGEMMReshapeLHSMatrixKernel &&) = default;
- /** Initialise the kernel's input and output.
- *
- * @param[in] input Input tensor. Data types supported: All
- * @param[out] output Output tensor. Data type supported: same as @p input
- * @param[in] lhs_info LHS matrix information to be used for reshaping. This object contains all the necessary
- * information to reshape the input tensor. Only the following values are supported:
- * lhs_info.m0: 2,3,4,5,6,7,8
- * lhs_info.k0: 2,3,4,8,16
- * lhs_info.v0: greater than 0
- * lhs_info.transpose: true, false
- * lhs_info.interleave: true, false
- * @param[in] reinterpret_input_as_3d (Optional) True if the input has to be reinterpreted as 3D tensor
- */
- void configure(const ICLTensor *input, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, bool reinterpret_input_as_3d = false);
- /** Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input tensor. Data types supported: All
- * @param[out] output Output tensor. Data type supported: same as @p input
- * @param[in] lhs_info LHS matrix information to be used for reshaping. This object contains all the necessary
- * information to reshape the input tensor. Only the following values are supported:
- * lhs_info.m0: 2,3,4,5,6,7,8
- * lhs_info.k0: 2,3,4,8,16
- * lhs_info.v0: greater than 0
- * lhs_info.transpose: true, false
- * lhs_info.interleave: true, false
- * @param[in] reinterpret_input_as_3d (Optional) True if the input has to be reinterpreted as 3D tensor
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, bool reinterpret_input_as_3d = false);
- /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMReshapeLHSMatrixKernel
- *
- * @param[in] input Input tensor info. Data types supported: All
- * @param[in] output Output tensor info which stores the interleaved matrix. Data type supported: same as @p input.
- * @param[in] lhs_info LHS matrix information to be used for reshaping. This object contains all the necessary
- * information to reshape the input tensor. Only the following values are supported:
- * lhs_info.m0: 2,3,4,5,6,7,8
- * lhs_info.k0: 2,3,4,8,16
- * lhs_info.v0: greater than 0
- * lhs_info.transpose: true, false
- * lhs_info.interleave: true, false
- * @param[in] reinterpret_input_as_3d True if the input has to be reinterpreted as 3D tensor
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const GEMMLHSMatrixInfo &lhs_info, bool reinterpret_input_as_3d);
-
- // Inherited methods overridden
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
- bool _reinterpret_input_as_3d;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLGEMMRESHAPELHSMATRIXKERNEL_H */
\ No newline at end of file
+++ /dev/null
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGEMMRESHAPERHSMATRIXKERNEL_H
-#define ARM_COMPUTE_CLGEMMRESHAPERHSMATRIXKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to reshape the RHS matrix when performing the matrix multiplication
- * In particular, this kernel splits the input matrix in blocks of size K0xN0 and stores each one in
- * the output matrix unrolling the values */
-class CLGEMMReshapeRHSMatrixKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLGEMMReshapeRHSMatrixKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMReshapeRHSMatrixKernel(const CLGEMMReshapeRHSMatrixKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGEMMReshapeRHSMatrixKernel &operator=(const CLGEMMReshapeRHSMatrixKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLGEMMReshapeRHSMatrixKernel(CLGEMMReshapeRHSMatrixKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLGEMMReshapeRHSMatrixKernel &operator=(CLGEMMReshapeRHSMatrixKernel &&) = default;
- /** Initialise the kernel's input and output.
- *
- * @note If rhs_info.export_to_cl_image = true, this OpenCL kernel will guarantee the OpenCL pitch alignment for the output tensor,
- * required to create a OpenCL image object from buffer in @ref CLGEMMMatrixMultiplyReshapedKernel and in @ref CLGEMMMatrixMultiplyReshapedOnlyRHSKernel
- * Since the OpenCL image object is created importing the OpenCL buffer, the following conditions are required:
- * -# rhs_info.n0 can only be 4, 8 and 16
- * -# rhs_info.k0 can only be 4, 8 and 16
- * -# Data type can only be F32, F16
- * -# The platform should support the OpenCL cl_khr_image2d_from_buffer extension
- * -# output width should be less or equal to (CL_DEVICE_IMAGE2D_MAX_WIDTH * 4)
- * -# output (height * depth) should be less or equal to CL_DEVICE_IMAGE2D_MAX_HEIGHT
- * -# The output tensor should be only consumed by @ref CLGEMMMatrixMultiplyReshapedKernel or @ref CLGEMMMatrixMultiplyReshapedOnlyRHSKernel
- *
- * @param[in] input Input tensor. Data types supported: All
- * @param[out] output Output tensor. Data type supported: same as @p input
- * @param[in] rhs_info RHS matrix information to be used for reshaping. This object contains all the necessary
- * information to reshape the input tensor. Only the following values are supported:
- * rhs_info.n0: 2,3,4,8,16 (only 4, 8 and 16 if rhs_info.export_to_cl_image == true)
- * rhs_info.k0: 1,2,3,4,8,16 (k0 = 1 only if rhs_info.transpose = false), (only 4, 8 and 16 if rhs_info.export_to_cl_image == true)
- * rhs_info.h0: greater than 0
- * rhs_info.transpose: true, false
- * rhs_info.interleave: true, false
- */
- void configure(const ICLTensor *input, ICLTensor *output, const GEMMRHSMatrixInfo &rhs_info);
- /** Initialise the kernel's input and output.
- *
- * @note If rhs_info.export_to_cl_image = true, this OpenCL kernel will guarantee the OpenCL pitch alignment for the output tensor,
- * required to create a OpenCL image object from buffer in @ref CLGEMMMatrixMultiplyReshapedKernel and in @ref CLGEMMMatrixMultiplyReshapedOnlyRHSKernel
- * Since the OpenCL image object is created importing the OpenCL buffer, the following conditions are required:
- * -# rhs_info.n0 can only be 4, 8 and 16
- * -# rhs_info.k0 can only be 4, 8 and 16
- * -# Data type can only be F32, F16
- * -# The platform should support the OpenCL cl_khr_image2d_from_buffer extension
- * -# output width should be less or equal to (CL_DEVICE_IMAGE2D_MAX_WIDTH * 4)
- * -# output (height * depth) should be less or equal to CL_DEVICE_IMAGE2D_MAX_HEIGHT
- * -# The output tensor should be only consumed by @ref CLGEMMMatrixMultiplyReshapedKernel or @ref CLGEMMMatrixMultiplyReshapedOnlyRHSKernel
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input tensor. Data types supported: All
- * @param[out] output Output tensor. Data type supported: same as @p input
- * @param[in] rhs_info RHS matrix information to be used for reshaping. This object contains all the necessary
- * information to reshape the input tensor. Only the following values are supported:
- * rhs_info.n0: 2,3,4,8,16 (only 4, 8 and 16 if rhs_info.export_to_cl_image == true)
- * rhs_info.k0: 1,2,3,4,8,16 (k0 = 1 only if rhs_info.transpose = false), (only 4, 8 and 16 if rhs_info.export_to_cl_image == true)
- * rhs_info.h0: greater than 0
- * rhs_info.transpose: true, false
- * rhs_info.interleave: true, false
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const GEMMRHSMatrixInfo &rhs_info);
- /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMReshapeRHSMatrixKernel
- *
- * @note If rhs_info.export_to_cl_image = true, this OpenCL kernel will guarantee the OpenCL pitch alignment for the output tensor,
- * required to create a OpenCL image object from buffer in @ref CLGEMMMatrixMultiplyReshapedKernel and in @ref CLGEMMMatrixMultiplyReshapedOnlyRHSKernel
- * Since the OpenCL image object is created importing the OpenCL buffer, the following conditions are required:
- * -# rhs_info.n0 can only be 4, 8 and 16
- * -# rhs_info.k0 can only be 4, 8 and 16
- * -# Data type can only be F32, F16
- * -# The platform should support the OpenCL cl_khr_image2d_from_buffer extension
- * -# output width should be less or equal to (CL_DEVICE_IMAGE2D_MAX_WIDTH * 4)
- * -# output (height * depth) should be less or equal to CL_DEVICE_IMAGE2D_MAX_HEIGHT
- * -# The output tensor should be only consumed by @ref CLGEMMMatrixMultiplyReshapedKernel or @ref CLGEMMMatrixMultiplyReshapedOnlyRHSKernel
- *
- * @param[in] input Input tensor info. Data types supported: All
- * @param[in] output Output tensor info which stores the interleaved matrix. Data type supported: same as @p input.
- * @param[in] rhs_info RHS matrix information to be used for reshaping. This object contains all the necessary
- * information to reshape the input tensor. Only the following values are supported:
- * rhs_info.n0: 2,3,4,8,16 (only 4, 8 and 16 if rhs_info.export_to_cl_image == true)
- * rhs_info.k0: 1,2,3,4,8,16 (k0 = 1 only if rhs_info.transpose = false),(only 4, 8 and 16 if rhs_info.export_to_cl_image == true)
- * rhs_info.h0: greater than 0
- * rhs_info.transpose: true, false
- * rhs_info.interleave: true, false
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const GEMMRHSMatrixInfo &rhs_info);
-
- // Inherited methods overridden
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLGEMMRESHAPERHSMATRIXKERNEL_H */
\ No newline at end of file
+++ /dev/null
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGATHERKERNEL_H
-#define ARM_COMPUTE_CLGATHERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the kernel to perform tensor reshaping */
-class CLGatherKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLGatherKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGatherKernel(const CLGatherKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGatherKernel &operator=(const CLGatherKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLGatherKernel(CLGatherKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLGatherKernel &operator=(CLGatherKernel &&) = default;
- /** Default destructor */
- ~CLGatherKernel() = default;
- /** Initialise the kernel's inputs and outputs
- *
- * @param[in] input Source tensor. Supported tensor rank: up to 4. Data type supported: All.
- * @param[in] indices Indices tensor. Supported tensor rank: up to 1. Must be one of the following types: U32/S32. Each value must be in range [0, input.shape[@p axis])
- * @param[out] output Destination tensor. Data type supported: Same as @p input
- * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Negative values wrap around. Defaults to 0
- */
- void configure(const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, int axis = 0);
- /** Initialise the kernel's inputs and outputs
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Supported tensor rank: up to 4. Data type supported: All.
- * @param[in] indices Indices tensor. Supported tensor rank: up to 1. Must be one of the following types: U32/S32. Each value must be in range [0, input.shape[@p axis])
- * @param[out] output Destination tensor. Data type supported: Same as @p input
- * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Negative values wrap around. Defaults to 0
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, int axis = 0);
-
- /** Static function to check if given info will lead to a valid configuration of @ref CLGatherKernel
- *
- * @param[in] input Source tensor info. Supported tensor rank: up to 4. Data type supported: All.
- * @param[in] indices Indices tensor info. Supported tensor rank: up to 4. Must be one of the following types: U32/S32. Each value must be in range [0, input.shape[@p axis])
- * @param[in] output Destination tensor info. Data type supported: Same as @p input
- * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Negative values wrap around. Defaults to 0
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, int axis = 0);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input; /**< Source tensor */
- const ICLTensor *_indices; /**< Indices tensor */
- ICLTensor *_output; /**< Destination tensor */
- int _axis; /**< Axis index */
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLGATHERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGAUSSIAN3X3KERNEL_H
-#define ARM_COMPUTE_CLGAUSSIAN3X3KERNEL_H
-
-#include "arm_compute/core/CL/ICLSimple2DKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the Gaussian 3x3 filter kernel.
- *
- */
-class CLGaussian3x3Kernel : public ICLSimple2DKernel
-{
-public:
- /** Initialise the kernel's input and output.
- *
- * @param[in] input An input tensor. Data types supported: U8
- * @param[out] output The output tensor. Data types supported: U8.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined);
- /** Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input An input tensor. Data types supported: U8
- * @param[out] output The output tensor. Data types supported: U8.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined);
-
- // Inherited methods overridden:
- BorderSize border_size() const override;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLGAUSSIAN3X3KERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGAUSSIAN5X5KERNEL_H
-#define ARM_COMPUTE_CLGAUSSIAN5X5KERNEL_H
-
-#include "arm_compute/core/CL/kernels/CLConvolutionKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the kernel to run the horizontal pass of 5x5 Gaussian filter on a tensor. */
-class CLGaussian5x5HorKernel : public CLSeparableConvolution5x5HorKernel
-{
-public:
- /** Initialise the kernel's source, destination and border.
- *
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor. Data types supported: S16.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined);
- /** Initialise the kernel's source, destination and border.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor. Data types supported: S16.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined);
-
-private:
- //Make the configure method of the parent class private
- using CLSeparableConvolution5x5HorKernel::configure;
-};
-
-/** Interface for the kernel to run the vertical pass of 5x5 Gaussian filter on a tensor. */
-class CLGaussian5x5VertKernel : public CLSeparableConvolution5x5VertKernel
-{
-public:
- /** Initialise the kernel's source, destination and border.
- *
- * @param[in] input Input tensor(output of horizontal pass). Data types supported: S16.
- * @param[out] output Destination tensor. Data types supported: U8.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined);
- /** Initialise the kernel's source, destination and border.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input tensor(output of horizontal pass). Data types supported: S16.
- * @param[out] output Destination tensor. Data types supported: U8.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined);
-
-private:
- //Make the configure method of the parent class private
- using CLSeparableConvolution5x5VertKernel::configure;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLGAUSSIAN5X5KERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGAUSSIANPYRAMIDKERNEL_H
-#define ARM_COMPUTE_CLGAUSSIANPYRAMIDKERNEL_H
-
-#include "arm_compute/core/CL/ICLSimpleKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to perform a Gaussian filter and half scaling across width (horizontal pass) */
-class CLGaussianPyramidHorKernel : public ICLSimpleKernel
-{
-public:
- /** Default constructor */
- CLGaussianPyramidHorKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGaussianPyramidHorKernel(const CLGaussianPyramidHorKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGaussianPyramidHorKernel &operator=(const CLGaussianPyramidHorKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLGaussianPyramidHorKernel(CLGaussianPyramidHorKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLGaussianPyramidHorKernel &operator=(CLGaussianPyramidHorKernel &&) = default;
- /** Default destructor */
- ~CLGaussianPyramidHorKernel() = default;
-
- /** Initialise the kernel's source, destination and border mode.
- *
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor. Output should have half the input width. Data types supported: U16.
- */
- void configure(const ICLTensor *input, ICLTensor *output);
- /** Initialise the kernel's source, destination and border mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor. Output should have half the input width. Data types supported: U16.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-private:
- int _l2_load_offset;
-};
-
-/** OpenCL kernel to perform a Gaussian filter and half scaling across height (vertical pass) */
-class CLGaussianPyramidVertKernel : public ICLSimpleKernel
-{
-public:
- /** Default constructor */
- CLGaussianPyramidVertKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGaussianPyramidVertKernel(const CLGaussianPyramidVertKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLGaussianPyramidVertKernel &operator=(const CLGaussianPyramidVertKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLGaussianPyramidVertKernel(CLGaussianPyramidVertKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLGaussianPyramidVertKernel &operator=(CLGaussianPyramidVertKernel &&) = default;
- /** Default destructor */
- ~CLGaussianPyramidVertKernel() = default;
-
- /** Initialise the kernel's source, destination and border mode.
- *
- * @param[in] input Source tensor. Data types supported: U16.
- * @param[out] output Destination tensor. Output should have half the input height. Data types supported: U8.
- */
- void configure(const ICLTensor *input, ICLTensor *output);
- /** Initialise the kernel's source, destination and border mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U16.
- * @param[out] output Destination tensor. Output should have half the input height. Data types supported: U8.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-private:
- int _t2_load_offset;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLGAUSSIANPYRAMIDKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2019-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLGENERATEPROPOSALSLAYERKERNEL_H
-#define ARM_COMPUTE_CLGENERATEPROPOSALSLAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for Compute All Anchors kernel */
-class CLComputeAllAnchorsKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLComputeAllAnchorsKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLComputeAllAnchorsKernel(const CLComputeAllAnchorsKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLComputeAllAnchorsKernel &operator=(const CLComputeAllAnchorsKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLComputeAllAnchorsKernel(CLComputeAllAnchorsKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLComputeAllAnchorsKernel &operator=(CLComputeAllAnchorsKernel &&) = default;
- /** Default destructor */
- ~CLComputeAllAnchorsKernel() = default;
-
- /** Set the input and output tensors.
- *
- * @param[in] anchors Source tensor. Original set of anchors of size (4, A), where A is the number of anchors. Data types supported: QSYMM16/F16/F32
- * @param[out] all_anchors Destination tensor. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input
- * @param[in] info Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo
- *
- */
- void configure(const ICLTensor *anchors, ICLTensor *all_anchors, const ComputeAnchorsInfo &info);
- /** Set the input and output tensors.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] anchors Source tensor. Original set of anchors of size (4, A), where A is the number of anchors. Data types supported: QSYMM16/F16/F32
- * @param[out] all_anchors Destination tensor. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input
- * @param[in] info Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo
- *
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *anchors, ICLTensor *all_anchors, const ComputeAnchorsInfo &info);
-
- /** Static function to check if given info will lead to a valid configuration of @ref CLComputeAllAnchorsKernel
- *
- * @param[in] anchors Source tensor info. Original set of anchors of size (4, A), where A is the number of anchors. Data types supported: QSYMM16/F16/F32
- * @param[in] all_anchors Destination tensor info. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input
- * @param[in] info Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo
- *
- * @return a Status
- */
- static Status validate(const ITensorInfo *anchors, const ITensorInfo *all_anchors, const ComputeAnchorsInfo &info);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_anchors;
- ICLTensor *_all_anchors;
-};
-} // arm_compute
-#endif // ARM_COMPUTE_CLGENERATEPROSPOSALSLAYERKERNEL_H
+++ /dev/null
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLHOGDESCRIPTORKERNEL_H
-#define ARM_COMPUTE_CLHOGDESCRIPTORKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/IHOG.h"
-#include "arm_compute/core/Size2D.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** OpenCL kernel to perform HOG Orientation Binning */
-class CLHOGOrientationBinningKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLHOGOrientationBinningKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLHOGOrientationBinningKernel(const CLHOGOrientationBinningKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLHOGOrientationBinningKernel &operator=(const CLHOGOrientationBinningKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLHOGOrientationBinningKernel(CLHOGOrientationBinningKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLHOGOrientationBinningKernel &operator=(CLHOGOrientationBinningKernel &&) = default;
- /** Default destructor */
- ~CLHOGOrientationBinningKernel() = default;
-
- /** Initialise the kernel's inputs, output and HOG's metadata
- *
- * @param[in] input_magnitude Input tensor which stores the magnitude of the gradient for each pixel. Data type supported: S16.
- * @param[in] input_phase Input tensor which stores the phase of the gradient for each pixel. Data type supported: U8
- * @param[out] output Output tensor which stores the local HOG for each cell. DataType supported: F32. Number of channels supported: equal to the number of histogram bins per cell
- * @param[in] hog_info HOG's metadata
- */
- void configure(const ICLTensor *input_magnitude, const ICLTensor *input_phase, ICLTensor *output, const HOGInfo *hog_info);
- /** Initialise the kernel's inputs, output and HOG's metadata
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input_magnitude Input tensor which stores the magnitude of the gradient for each pixel. Data type supported: S16.
- * @param[in] input_phase Input tensor which stores the phase of the gradient for each pixel. Data type supported: U8
- * @param[out] output Output tensor which stores the local HOG for each cell. DataType supported: F32. Number of channels supported: equal to the number of histogram bins per cell
- * @param[in] hog_info HOG's metadata
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input_magnitude, const ICLTensor *input_phase, ICLTensor *output, const HOGInfo *hog_info);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input_magnitude;
- const ICLTensor *_input_phase;
- ICLTensor *_output;
- Size2D _cell_size;
-};
-
-/** OpenCL kernel to perform HOG block normalization */
-class CLHOGBlockNormalizationKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLHOGBlockNormalizationKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLHOGBlockNormalizationKernel(const CLHOGBlockNormalizationKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLHOGBlockNormalizationKernel &operator=(const CLHOGBlockNormalizationKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLHOGBlockNormalizationKernel(CLHOGBlockNormalizationKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLHOGBlockNormalizationKernel &operator=(CLHOGBlockNormalizationKernel &&) = default;
- /** Default destructor */
- ~CLHOGBlockNormalizationKernel() = default;
-
- /** Initialise the kernel's input, output and HOG's metadata
- *
- * @param[in] input Input tensor which stores the local HOG for each cell. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per cell
- * @param[out] output Output tensor which stores the normalised blocks. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block
- * @param[in] hog_info HOG's metadata
- */
- void configure(const ICLTensor *input, ICLTensor *output, const HOGInfo *hog_info);
- /** Initialise the kernel's input, output and HOG's metadata
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input tensor which stores the local HOG for each cell. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per cell
- * @param[out] output Output tensor which stores the normalised blocks. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block
- * @param[in] hog_info HOG's metadata
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const HOGInfo *hog_info);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
- Size2D _num_cells_per_block_stride;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLHOGDESCRIPTORKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLHOGDETECTORKERNEL_H
-#define ARM_COMPUTE_CLHOGDETECTORKERNEL_H
-
-#include "arm_compute/core/CL/ICLArray.h"
-#include "arm_compute/core/CL/ICLHOG.h"
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/CL/OpenCL.h"
-
-namespace cl
-{
-class Buffer;
-}
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to perform HOG detector kernel using linear SVM */
-class CLHOGDetectorKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLHOGDetectorKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLHOGDetectorKernel(const CLHOGDetectorKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLHOGDetectorKernel &operator=(const CLHOGDetectorKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLHOGDetectorKernel(CLHOGDetectorKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLHOGDetectorKernel &operator=(CLHOGDetectorKernel &&) = default;
- /** Default destructor */
- ~CLHOGDetectorKernel() = default;
-
- /** Initialise the kernel's input, HOG data-object, detection window, the stride of the detection window, the threshold and index of the object to detect
- *
- * @param[in] input Input tensor which stores the HOG descriptor obtained with @ref CLHOGOrientationBinningKernel. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block
- * @param[in] hog HOG data object used by @ref CLHOGOrientationBinningKernel and @ref CLHOGBlockNormalizationKernel
- * @param[out] detection_windows Array of @ref DetectionWindow. This array stores all the detected objects
- * @param[in] num_detection_windows Number of detected objects
- * @param[in] detection_window_stride Distance in pixels between 2 consecutive detection windows in x and y directions.
- * It must be multiple of the hog->info()->block_stride()
- * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane
- * @param[in] idx_class (Optional) Index of the class used for evaluating which class the detection window belongs to
- */
- void configure(const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, cl::Buffer *num_detection_windows, const Size2D &detection_window_stride, float threshold = 0.0f,
- uint16_t idx_class = 0);
- /** Initialise the kernel's input, HOG data-object, detection window, the stride of the detection window, the threshold and index of the object to detect
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input tensor which stores the HOG descriptor obtained with @ref CLHOGOrientationBinningKernel. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block
- * @param[in] hog HOG data object used by @ref CLHOGOrientationBinningKernel and @ref CLHOGBlockNormalizationKernel
- * @param[out] detection_windows Array of @ref DetectionWindow. This array stores all the detected objects
- * @param[in] num_detection_windows Number of detected objects
- * @param[in] detection_window_stride Distance in pixels between 2 consecutive detection windows in x and y directions.
- * It must be multiple of the hog->info()->block_stride()
- * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane
- * @param[in] idx_class (Optional) Index of the class used for evaluating which class the detection window belongs to
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, cl::Buffer *num_detection_windows,
- const Size2D &detection_window_stride, float threshold = 0.0f,
- uint16_t idx_class = 0);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue);
-
-private:
- const ICLTensor *_input;
- ICLDetectionWindowArray *_detection_windows;
- cl::Buffer *_num_detection_windows;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLHOGDETECTORKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLHARRISCORNERSKERNEL_H
-#define ARM_COMPUTE_CLHARRISCORNERSKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ICLTensor;
-using ICLImage = ICLTensor;
-
-/** Interface for the harris score kernel.
- *
- * @note The implementation supports 3, 5, and 7 for the block_size.
- */
-class CLHarrisScoreKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLHarrisScoreKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLHarrisScoreKernel(const CLHarrisScoreKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLHarrisScoreKernel &operator=(const CLHarrisScoreKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLHarrisScoreKernel(CLHarrisScoreKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLHarrisScoreKernel &operator=(CLHarrisScoreKernel &&) = default;
- /** Default destructor */
- ~CLHarrisScoreKernel() = default;
-
- /** Setup the kernel parameters
- *
- * @param[in] input1 Source image (gradient X). Data types supported S16, S32. (Must be the same as input2)
- * @param[in] input2 Source image (gradient Y). Data types supported S16, S32. (Must be the same as input1)
- * @param[out] output Destination image (harris score). Data types supported F32
- * @param[in] block_size The block window size used to compute the Harris Corner score. Supports: 3, 5 and 7
- * @param[in] norm_factor Normalization factor to use accordingly with the gradient size (Must be different from 0)
- * @param[in] strength_thresh Minimum threshold with which to eliminate Harris Corner scores (computed using the normalized Sobel kernel).
- * @param[in] sensitivity Sensitivity threshold k from the Harris-Stephens equation.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLImage *input1, const ICLImage *input2, ICLImage *output,
- int32_t block_size, float norm_factor, float strength_thresh, float sensitivity,
- bool border_undefined);
- /** Setup the kernel parameters
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input1 Source image (gradient X). Data types supported S16, S32. (Must be the same as input2)
- * @param[in] input2 Source image (gradient Y). Data types supported S16, S32. (Must be the same as input1)
- * @param[out] output Destination image (harris score). Data types supported F32
- * @param[in] block_size The block window size used to compute the Harris Corner score. Supports: 3, 5 and 7
- * @param[in] norm_factor Normalization factor to use accordingly with the gradient size (Must be different from 0)
- * @param[in] strength_thresh Minimum threshold with which to eliminate Harris Corner scores (computed using the normalized Sobel kernel).
- * @param[in] sensitivity Sensitivity threshold k from the Harris-Stephens equation.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLImage *input1, const ICLImage *input2, ICLImage *output,
- int32_t block_size, float norm_factor, float strength_thresh, float sensitivity,
- bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-protected:
- const ICLImage *_input1; /**< Source image - Gx component */
- const ICLImage *_input2; /**< Source image - Gy component */
- ICLImage *_output; /**< Source image - Harris score */
- float _sensitivity; /**< Sensitivity value */
- float _strength_thresh; /**< Threshold value */
- float _norm_factor; /**< Normalization factor */
- BorderSize _border_size; /**< Border size */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLHARRISCORNERSKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2019-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef ARM_COMPUTE_CLHEIGHTCONCATENATELAYERKERNEL_H
-#define ARM_COMPUTE_CLHEIGHTCONCATENATELAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-/** Interface for the height concatenate kernel.
- * The input tensor will be concatenated into the output tensor.
- */
-class CLHeightConcatenateLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLHeightConcatenateLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLHeightConcatenateLayerKernel(const CLHeightConcatenateLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLHeightConcatenateLayerKernel &operator=(const CLHeightConcatenateLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLHeightConcatenateLayerKernel(CLHeightConcatenateLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLHeightConcatenateLayerKernel &operator=(CLHeightConcatenateLayerKernel &&) = default;
- /** Default destructor */
- ~CLHeightConcatenateLayerKernel() = default;
- /** Initialise the kernel's inputs and output
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input tensor. Data types supported: All.
- * @param[in] height_offset The starting offset on the Y axis for the output tensor.
- * @param[out] output Output tensor. Data types supported: Same as @p input.
- *
- */
- void configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int height_offset, ITensorInfo *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLHeightConcatenateLayerKernel
- *
- * @param[in] input Input tensor info. Data types supported: All.
- * @param[in] height_offset The starting offset on the Y axis for the output tensor.
- * @param[in] output Output tensor info. Data types supported: Same as @p input.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, unsigned int height_offset, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
-
-private:
- unsigned int _height_offset;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLHEIGHTCONCATENATELAYERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLHISTOGRAMKERNEL_H
-#define ARM_COMPUTE_CLHISTOGRAMKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLDistribution1D;
-class ICLTensor;
-using ICLImage = ICLTensor;
-
-/** Interface to run the histogram kernel. This kernel processes the part of image with width can be divided by 16.
- * If the image width is not a multiple of 16, remaining pixels have to be processed with the @ref CLHistogramBorderKernel
- */
-class CLHistogramKernel : public ICLKernel
-{
-public:
- /** Constructor */
- CLHistogramKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLHistogramKernel(const CLHistogramKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLHistogramKernel &operator=(const CLHistogramKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLHistogramKernel(CLHistogramKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLHistogramKernel &operator=(CLHistogramKernel &&) = default;
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] input Source image. Data types supported: U8.
- * @param[out] output Destination distribution.
- */
- void configure(const ICLImage *input, ICLDistribution1D *output);
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source image. Data types supported: U8.
- * @param[out] output Destination distribution.
- */
- void configure(const CLCompileContext &compile_context, const ICLImage *input, ICLDistribution1D *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLImage *_input;
- ICLDistribution1D *_output;
-};
-
-/** Interface to run the histogram kernel to handle the leftover part of image
- *
- */
-class CLHistogramBorderKernel : public ICLKernel
-{
-public:
- /** Constructor */
- CLHistogramBorderKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLHistogramBorderKernel(const CLHistogramBorderKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLHistogramBorderKernel &operator=(const CLHistogramBorderKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLHistogramBorderKernel(CLHistogramBorderKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLHistogramBorderKernel &operator=(CLHistogramBorderKernel &&) = default;
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] input Source image. Data types supported: U8.
- * @param[out] output Destination distribution.
- */
- void configure(const ICLImage *input, ICLDistribution1D *output);
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source image. Data types supported: U8.
- * @param[out] output Destination distribution.
- */
- void configure(const CLCompileContext &compile_context, const ICLImage *input, ICLDistribution1D *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLImage *_input;
- ICLDistribution1D *_output;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLHISTOGRAMKERNEL_H*/
+++ /dev/null
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLIM2COLKERNEL_H
-#define ARM_COMPUTE_CLIM2COLKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Size2D.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the im2col reshape kernel.
- *
- * Rearranges image blocks into columns. It is used to strip out each convolution block to a single column.
- * It is used to transform a convolution to a plain matrix multiplication.
- *
- * For example taking into account the image below and assuming 3x3 image blocks with stride of 1 we have:
- * @f[
- * \left( \begin{array}{cccc}
- * a00 & a01 & a02 & a03 \\
- * a10 & a11 & a12 & a13 \\
- * a20 & a21 & a22 & a23 \\
- * a30 & a31 & a32 & a33 \\
- * \end{array} \right)
- * =
- * \left( \begin{array}{ccccccccc}
- * a00 & a01 & a02 & a10 & a11 & a12 & a20 & a21 & a22 \\
- * a01 & a02 & a03 & a11 & a12 & a13 & a21 & a22 & a23 \\
- * a10 & a11 & a12 & a20 & a21 & a22 & a30 & a31 & a32 \\
- * a11 & a12 & a13 & a21 & a22 & a23 & a31 & a32 & a33 \\
- * \end{array} \right)
- * @f]
- */
-class CLIm2ColKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLIm2ColKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLIm2ColKernel(const CLIm2ColKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLIm2ColKernel &operator=(const CLIm2ColKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLIm2ColKernel(CLIm2ColKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLIm2ColKernel &operator=(CLIm2ColKernel &&) = default;
- /** Set the input and output of the kernel.
- *
- * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
- * @param[out] output The output tensor. First 2 lower dimensions represent a transform of each 3D input,
- * while every dimension above represents a batch. Data types supported: Same as @p input
- * @param[in] kernel_dims The kernel dimensions (width and height).
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- * @param[in] has_bias In case biases are provided expands the matrix with 1.
- * This is valid only for non-quantized inputs.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution.
- * Number of groups other than 1 is only supported for NCHW data layout.
- * Number of groups should be multiple to the number of channels.
- */
- void configure(const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation = Size2D(1U, 1U),
- unsigned int num_groups = 1);
- /** Set the input and output of the kernel.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
- * @param[out] output The output tensor. First 2 lower dimensions represent a transform of each 3D input,
- * while every dimension above represents a batch. Data types supported: Same as @p input
- * @param[in] kernel_dims The kernel dimensions (width and height).
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- * @param[in] has_bias In case biases are provided expands the matrix with 1.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias,
- const Size2D &dilation = Size2D(1U, 1U),
- unsigned int num_groups = 1);
- /** Static function to check if given info will lead to a valid configuration of @ref CLIm2ColKernel
- *
- * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
- * @param[in] output The output tensor. First 2 lower dimensions represent a transform of each 3D input,
- * while every dimension above represents a batch. Data types supported: Same as @p input
- * @param[in] kernel_dims The kernel dimensions (width and height).
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- * @param[in] has_bias In case biases are provided expands the matrix with 1.
- * This is valid only for non-quantized inputs.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution.
- * Number of groups other than 1 is only supported for NCHW data layout.
- * Number of groups should be multiple to the number of channels.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation = Size2D(1U, 1U),
- unsigned int num_groups = 1);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-public:
- const ICLTensor *_input;
- ICLTensor *_output;
- DataLayout _data_layout;
- std::pair<unsigned int, unsigned int> _convolved_dims;
- unsigned int _num_elems_processed_per_iteration;
- Size2D _kernel_dims;
- PadStrideInfo _conv_info;
- unsigned int _num_groups;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLIM2COLKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2019-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYERKERNEL_H
-#define ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-#include "arm_compute/core/KernelDescriptors.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ICLTensor;
-
-/** Interface for performing an instance normalization */
-class CLInstanceNormalizationLayerKernel : public ICLKernel
-{
-public:
- /** Constructor */
- CLInstanceNormalizationLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLInstanceNormalizationLayerKernel(const CLInstanceNormalizationLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLInstanceNormalizationLayerKernel &operator=(const CLInstanceNormalizationLayerKernel &) = delete;
- /** Default Move Constructor. */
- CLInstanceNormalizationLayerKernel(CLInstanceNormalizationLayerKernel &&) = default;
- /** Default move assignment operator */
- CLInstanceNormalizationLayerKernel &operator=(CLInstanceNormalizationLayerKernel &&) = default;
- /** Default destructor */
- ~CLInstanceNormalizationLayerKernel() = default;
-
- /** Set the input and output tensors.
- *
- * @param[in, out] input Source tensor. Data types supported: F16/F32. Data layout supported: NCHW, NHWC
- * In case of @p output tensor = nullptr this tensor will store the result of the normalization.
- * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input.
- * @param[in] info Kernel meta-data descriptor
- */
- void configure(ICLTensor *input, ICLTensor *output, const InstanceNormalizationLayerKernelInfo &info);
- /** Set the input and output tensors.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in, out] input Source tensor. Data types supported: F16/F32. Data layout supported: NCHW, NHWC
- * In case of @p output tensor = nullptr this tensor will store the result of the normalization.
- * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input.
- * @param[in] info Kernel meta-data descriptor
- */
- void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const InstanceNormalizationLayerKernelInfo &info);
-
- /** Static function to check if given info will lead to a valid configuration of @ref CLInstanceNormalizationLayer.
- *
- * @param[in] input Source tensor info. Data types supported: F16/F32. Data layout supported: NHWC, NCHW
- * @param[in] output Destination tensor info. Data types and data layouts supported: same as @p input.
- * @param[in] info Kernel meta-data descriptor
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const InstanceNormalizationLayerKernelInfo &info);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- ICLTensor *_input;
- ICLTensor *_output;
- bool _run_in_place;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLINTEGRALIMAGEKERNEL_H
-#define ARM_COMPUTE_CLINTEGRALIMAGEKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/CL/ICLSimple2DKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface to run the horizontal pass of the integral image kernel. */
-class CLIntegralImageHorKernel : public ICLSimple2DKernel
-{
-public:
- /** Initialise the kernel's input and output.
- *
- * @param[in] input An input tensor. Data types supported: U8
- * @param[out] output Destination tensor, Data types supported: U32.
- */
- void configure(const ICLTensor *input, ICLTensor *output);
- /** Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input An input tensor. Data types supported: U8
- * @param[out] output Destination tensor, Data types supported: U32.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
-};
-
-/** Interface to run the vertical pass of the integral image kernel. */
-class CLIntegralImageVertKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLIntegralImageVertKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLIntegralImageVertKernel(const CLIntegralImageVertKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLIntegralImageVertKernel &operator=(const CLIntegralImageVertKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLIntegralImageVertKernel(CLIntegralImageVertKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLIntegralImageVertKernel &operator=(CLIntegralImageVertKernel &&) = default;
- /** Initialise the kernel's input and output.
- *
- * @param[in,out] in_out The input/output tensor. Data types supported: U32
- */
- void configure(ICLTensor *in_out);
- /** Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in,out] in_out The input/output tensor. Data types supported: U32
- */
- void configure(const CLCompileContext &compile_context, ICLTensor *in_out);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- ICLTensor *_in_out;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLINTEGRALIMAGEKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLL2NORMALIZELAYERKERNEL_H
-#define ARM_COMPUTE_CLL2NORMALIZELAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for performing a L2 normalize on a given axis given the square sum of it in this axis */
-class CLL2NormalizeLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLL2NormalizeLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLL2NormalizeLayerKernel(const CLL2NormalizeLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLL2NormalizeLayerKernel &operator=(const CLL2NormalizeLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLL2NormalizeLayerKernel(CLL2NormalizeLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLL2NormalizeLayerKernel &operator=(CLL2NormalizeLayerKernel &&) = default;
- /** Default destructor */
- ~CLL2NormalizeLayerKernel() = default;
-
- /** Set the input and output tensors.
- *
- * @param[in] input Source tensor. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC.
- * @param[in] sum Sum values tensor. Data types supported: same as @p input.
- * Sum will have the same number of dimensions as input.
- * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input.
- * Output will have the same number of dimensions as input.
- * @param[in] axis Axis along which to reduce. Negative values wrap around. Maximum supported actual reduction axis : 2
- * @param[in] epsilon Lower bound value for the normalization.
- */
- void configure(const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, int axis, float epsilon);
- /** Set the input and output tensors.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC.
- * @param[in] sum Sum values tensor. Data types supported: same as @p input.
- * Sum will have the same number of dimensions as input.
- * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input.
- * Output will have the same number of dimensions as input.
- * @param[in] axis Axis along which to reduce. Negative values wrap around. Maximum supported actual reduction axis : 2
- * @param[in] epsilon Lower bound value for the normalization.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, int axis, float epsilon);
-
- /** Static function to check if given info will lead to a valid configuration of @ref CLL2NormalizeLayerKernel.
- *
- * @param[in] input Source tensor info. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC.
- * @param[in] sum Sum values tensor info. Data types supported: same as @p input.
- * Sum will have the same number of dimensions as input.
- * @param[in] output Destination tensor info. Data types and data layouts supported: Same as @p input.
- * Output will have the same number of dimensions as input.
- * @param[in] axis Axis along which to reduce. Negative values wrap around. Maximum supported actual reduction axis : 2
- * @param[in] epsilon Lower bound value for the normalization.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, int axis, float epsilon);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- const ICLTensor *_sum;
- ICLTensor *_output;
- unsigned int _actual_axis;
- float _epsilon;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLL2NORMALIZELAYERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLLKTRACKERKERNEL_H
-#define ARM_COMPUTE_CLLKTRACKERKERNEL_H
-
-#include "arm_compute/core/CL/ICLArray.h"
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-#include <cstddef>
-#include <cstdint>
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Internal keypoint structure for Lucas-Kanade Optical Flow */
-struct CLLKInternalKeypoint
-{
- float x{ 0.f }; /**< x coordinate of the keypoint */
- float y{ 0.f }; /**< y coordinate of the keypoint */
- float tracking_status{ 0.f }; /**< the tracking status of the keypoint */
- float dummy{ 0.f }; /**< Dummy field, to make sure the data structure 128-bit align, so that GPU can use vload4 */
-};
-
-/** Structure for storing Spatial Gradient Matrix and the minimum eigenvalue for each keypoint */
-struct CLCoefficientTable
-{
- float A11; /**< iA11 * FLT_SCALE */
- float A12; /**< iA11 * FLT_SCALE */
- float A22; /**< iA11 * FLT_SCALE */
- float min_eig; /**< Minimum eigenvalue */
-};
-
-/** Structure for storing ival, ixval and iyval for each point inside the window */
-struct CLOldValue
-{
- int16_t ival; /**< ival extracts from old image */
- int16_t ixval; /**< ixval extracts from scharr Gx image */
- int16_t iyval; /**< iyval extracts from scharr Gy image */
- int16_t dummy; /**< Dummy field, to make sure the data structure 128-bit align, so that GPU can use vload4 */
-};
-
-/** Interface for OpenCL Array of Internal Key Points. */
-using ICLLKInternalKeypointArray = ICLArray<CLLKInternalKeypoint>;
-/** Interface for OpenCL Array of Coefficient Tables. */
-using ICLCoefficientTableArray = ICLArray<CLCoefficientTable>;
-/** Interface for OpenCL Array of Old Values. */
-using ICLOldValArray = ICLArray<CLOldValue>;
-
-/** Interface to run the initialization step of LKTracker */
-class CLLKTrackerInitKernel : public ICLKernel
-{
-public:
- /** Initialise the kernel input and output
- *
- * @param[in] old_points Pointer to the @ref ICLKeyPointArray storing old key points
- * @param[in] new_points_estimates Pointer to the @ref ICLKeyPointArray storing new estimates key points
- * @param[out] old_points_internal Pointer to the array of internal @ref CLLKInternalKeypoint old points
- * @param[out] new_points_internal Pointer to the array of internal @ref CLLKInternalKeypoint new points
- * @param[in] use_initial_estimate The flag to indicate whether the initial estimated position should be used
- * @param[in] level The pyramid level
- * @param[in] num_levels The number of pyramid levels
- * @param[in] pyramid_scale Scale factor used for generating the pyramid
- */
- void configure(const ICLKeyPointArray *old_points, const ICLKeyPointArray *new_points_estimates,
- ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal,
- bool use_initial_estimate, size_t level, size_t num_levels, float pyramid_scale);
- /** Initialise the kernel input and output
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] old_points Pointer to the @ref ICLKeyPointArray storing old key points
- * @param[in] new_points_estimates Pointer to the @ref ICLKeyPointArray storing new estimates key points
- * @param[out] old_points_internal Pointer to the array of internal @ref CLLKInternalKeypoint old points
- * @param[out] new_points_internal Pointer to the array of internal @ref CLLKInternalKeypoint new points
- * @param[in] use_initial_estimate The flag to indicate whether the initial estimated position should be used
- * @param[in] level The pyramid level
- * @param[in] num_levels The number of pyramid levels
- * @param[in] pyramid_scale Scale factor used for generating the pyramid
- */
- void configure(const CLCompileContext &compile_context, const ICLKeyPointArray *old_points, const ICLKeyPointArray *new_points_estimates,
- ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal,
- bool use_initial_estimate, size_t level, size_t num_levels, float pyramid_scale);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-};
-
-/** Interface to run the finalize step of LKTracker, where it truncates the coordinates stored in new_points array */
-class CLLKTrackerFinalizeKernel : public ICLKernel
-{
-public:
- /** Initialise the kernel input and output
- *
- * @param[in] new_points_internal Pointer to the array of internal @ref CLLKInternalKeypoint new points
- * @param[out] new_points Pointer to the @ref ICLKeyPointArray storing new key points
- */
- void configure(ICLLKInternalKeypointArray *new_points_internal, ICLKeyPointArray *new_points);
- /** Initialise the kernel input and output
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] new_points_internal Pointer to the array of internal @ref CLLKInternalKeypoint new points
- * @param[out] new_points Pointer to the @ref ICLKeyPointArray storing new key points
- */
- void configure(const CLCompileContext &compile_context, ICLLKInternalKeypointArray *new_points_internal, ICLKeyPointArray *new_points);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-};
-
-/** Interface to run the first stage of LKTracker, where A11, A12, A22, min_eig, ival, ixval and iyval are computed */
-class CLLKTrackerStage0Kernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLLKTrackerStage0Kernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLLKTrackerStage0Kernel(const CLLKTrackerStage0Kernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLLKTrackerStage0Kernel &operator=(const CLLKTrackerStage0Kernel &) = delete;
- /** Allow instances of this class to be moved */
- CLLKTrackerStage0Kernel(CLLKTrackerStage0Kernel &&) = default;
- /** Allow instances of this class to be moved */
- CLLKTrackerStage0Kernel &operator=(CLLKTrackerStage0Kernel &&) = default;
- /** Initialise the kernel input and output
- *
- * @param[in] old_input Pointer to the input old tensor. Data types supported: U8
- * @param[in] old_scharr_gx Pointer to the input scharr X tensor. Data types supported: S16
- * @param[in] old_scharr_gy Pointer to the input scharr Y tensor. Data types supported: S16
- * @param[in] old_points_internal Pointer to the array of CLLKInternalKeypoint old points
- * @param[in, out] new_points_internal Pointer to the array of CLLKInternalKeypoint new points
- * @param[out] coeff_table Pointer to the array holding the Spatial Gradient coefficients
- * @param[out] old_ival Pointer to the array holding internal values
- * @param[in] window_dimension The size of the window on which to perform the algorithm
- * @param[in] level The pyramid level
- */
- void configure(const ICLTensor *old_input, const ICLTensor *old_scharr_gx, const ICLTensor *old_scharr_gy,
- ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal,
- ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival,
- size_t window_dimension, size_t level);
- /** Initialise the kernel input and output
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] old_input Pointer to the input old tensor. Data types supported: U8
- * @param[in] old_scharr_gx Pointer to the input scharr X tensor. Data types supported: S16
- * @param[in] old_scharr_gy Pointer to the input scharr Y tensor. Data types supported: S16
- * @param[in] old_points_internal Pointer to the array of CLLKInternalKeypoint old points
- * @param[in, out] new_points_internal Pointer to the array of CLLKInternalKeypoint new points
- * @param[out] coeff_table Pointer to the array holding the Spatial Gradient coefficients
- * @param[out] old_ival Pointer to the array holding internal values
- * @param[in] window_dimension The size of the window on which to perform the algorithm
- * @param[in] level The pyramid level
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *old_input, const ICLTensor *old_scharr_gx, const ICLTensor *old_scharr_gy,
- ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal,
- ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival,
- size_t window_dimension, size_t level);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_old_input;
- const ICLTensor *_old_scharr_gx;
- const ICLTensor *_old_scharr_gy;
-};
-
-/** Interface to run the second stage of LKTracker, where the motion vectors of the given points are computed */
-class CLLKTrackerStage1Kernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLLKTrackerStage1Kernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLLKTrackerStage1Kernel(const CLLKTrackerStage1Kernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLLKTrackerStage1Kernel &operator=(const CLLKTrackerStage1Kernel &) = delete;
- /** Allow instances of this class to be moved */
- CLLKTrackerStage1Kernel(CLLKTrackerStage1Kernel &&) = default;
- /** Allow instances of this class to be moved */
- CLLKTrackerStage1Kernel &operator=(CLLKTrackerStage1Kernel &&) = default;
- /** Initialise the kernel input and output
- *
- * @param[in] new_input Pointer to the input new tensor. Data types supported: U8
- * @param[in, out] new_points_internal Pointer to the array of CLLKInternalKeypoint for new points
- * @param[in] coeff_table Pointer to the array holding the Spatial Gradient coefficients
- * @param[in] old_ival Pointer to the array holding internal values
- * @param[in] termination The criteria to terminate the search of each keypoint.
- * @param[in] epsilon The error for terminating the algorithm
- * @param[in] num_iterations The maximum number of iterations before terminating the algorithm
- * @param[in] window_dimension The size of the window on which to perform the algorithm
- * @param[in] level The pyramid level
- */
- void configure(const ICLTensor *new_input, ICLLKInternalKeypointArray *new_points_internal, ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival,
- Termination termination, float epsilon, size_t num_iterations, size_t window_dimension, size_t level);
- /** Initialise the kernel input and output
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] new_input Pointer to the input new tensor. Data types supported: U8
- * @param[in, out] new_points_internal Pointer to the array of CLLKInternalKeypoint for new points
- * @param[in] coeff_table Pointer to the array holding the Spatial Gradient coefficients
- * @param[in] old_ival Pointer to the array holding internal values
- * @param[in] termination The criteria to terminate the search of each keypoint.
- * @param[in] epsilon The error for terminating the algorithm
- * @param[in] num_iterations The maximum number of iterations before terminating the algorithm
- * @param[in] window_dimension The size of the window on which to perform the algorithm
- * @param[in] level The pyramid level
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *new_input, ICLLKInternalKeypointArray *new_points_internal, ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival,
- Termination termination, float epsilon, size_t num_iterations, size_t window_dimension, size_t level);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_new_input;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLLKTRACKERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLLOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H
-#define ARM_COMPUTE_CLLOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to multiply each row of first tensor with low 2 dimensions of second tensor.
- *
- * @attention The second input tensor must have at least 2 dimensions (matrix)
- *
- */
-class CLLocallyConnectedMatrixMultiplyKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLLocallyConnectedMatrixMultiplyKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLLocallyConnectedMatrixMultiplyKernel(const CLLocallyConnectedMatrixMultiplyKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLLocallyConnectedMatrixMultiplyKernel &operator=(const CLLocallyConnectedMatrixMultiplyKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLLocallyConnectedMatrixMultiplyKernel(CLLocallyConnectedMatrixMultiplyKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLLocallyConnectedMatrixMultiplyKernel &operator=(CLLocallyConnectedMatrixMultiplyKernel &&) = default;
- /** Initialise the kernel's input, output and alpha
- *
- * @param[in] input0 First input tensor. Data types supported: F32
- * @param[in] input1 Second input tensor. Data type supported: same as @p input0
- * @param[out] output Output tensor to store the result. Data type supported: same as @p input0
- */
- void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output);
- /** Initialise the kernel's input, output and alpha
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input0 First input tensor. Data types supported: F32
- * @param[in] input1 Second input tensor. Data type supported: same as @p input0
- * @param[out] output Output tensor to store the result. Data type supported: same as @p input0
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLLocallyConnectedMatrixMultiplyKernel
- *
- * @param[in] input0 First input tensor info. Data types supported: F32
- * @param[in] input1 Second input tensor info. Data type supported: same as @p input0
- * @param[in] output Output tensor info. Data type supported: same as @p input0
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input0;
- const ICLTensor *_input1;
- ICLTensor *_output;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLLOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLMAGNITUDEPHASEKERNEL_H
-#define ARM_COMPUTE_CLMAGNITUDEPHASEKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Template interface for the kernel to compute magnitude and phase.
- *
- */
-class CLMagnitudePhaseKernel : public ICLKernel
-{
-public:
- /** Default constructor. */
- CLMagnitudePhaseKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLMagnitudePhaseKernel(const CLMagnitudePhaseKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLMagnitudePhaseKernel &operator=(const CLMagnitudePhaseKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLMagnitudePhaseKernel(CLMagnitudePhaseKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLMagnitudePhaseKernel &operator=(CLMagnitudePhaseKernel &&) = default;
- /** Initialise the kernel's input, output.
- *
- * @note At least one of output1 or output2 must be set.
- *
- * @param[in] gx The input gradient X tensor. Data types supported: S16/S32.
- * @param[in] gy The input gradient Y tensor. Data types supported: S16/S32.
- * @param[out] magnitude (Optional) The output tensor - Magnitude. Data types supported: S16/S32.
- * @param[out] phase (Optional) The output tensor - Phase. Data types supported: U8.
- * @param[in] mag_type (Optional) Magnitude calculation type. Default: L2NORM.
- * @param[in] phase_type (Optional) Phase calculation type. Default: SIGNED.
- */
- void configure(const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase,
- MagnitudeType mag_type = MagnitudeType::L2NORM, PhaseType phase_type = PhaseType::SIGNED);
- /** Initialise the kernel's input, output.
- *
- * @note At least one of output1 or output2 must be set.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] gx The input gradient X tensor. Data types supported: S16/S32.
- * @param[in] gy The input gradient Y tensor. Data types supported: S16/S32.
- * @param[out] magnitude (Optional) The output tensor - Magnitude. Data types supported: S16/S32.
- * @param[out] phase (Optional) The output tensor - Phase. Data types supported: U8.
- * @param[in] mag_type (Optional) Magnitude calculation type. Default: L2NORM.
- * @param[in] phase_type (Optional) Phase calculation type. Default: SIGNED.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase,
- MagnitudeType mag_type = MagnitudeType::L2NORM, PhaseType phase_type = PhaseType::SIGNED);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_gx; /**< Input gradient X. */
- const ICLTensor *_gy; /**< Input gradient Y. */
- ICLTensor *_magnitude; /**< Output - Magnitude. */
- ICLTensor *_phase; /**< Output - Phase. */
- bool _run_mag; /**< Calculate magnitude ? */
- bool _run_phase; /**< Calculate phase ? */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLMAGNITUDEPHASEKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLMAXUNPOOLINGLAYERKERNEL_H
-#define ARM_COMPUTE_CLMAXUNPOOLINGLAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the pooling layer kernel */
-class CLMaxUnpoolingLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLMaxUnpoolingLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLMaxUnpoolingLayerKernel(const CLMaxUnpoolingLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLMaxUnpoolingLayerKernel &operator=(const CLMaxUnpoolingLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLMaxUnpoolingLayerKernel(CLMaxUnpoolingLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLMaxUnpoolingLayerKernel &operator=(CLMaxUnpoolingLayerKernel &&) = default;
- /** Default destructor */
- ~CLMaxUnpoolingLayerKernel() = default;
- /** Set the input and output tensors.
- *
- * @note Output shape must be equal to the shape of the original input to pool.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] indices Tensor containing the offset to store the input elements in the output tensor.
- * @ref CLPoolingLayerKernel with indices should precede this function in order to
- * properly reconstruct the output tensor.
- * The tensor shape of this tensor has to be equal to the input tensor shape. Data type supported: U32.
- * @param[out] output Destination tensor. Data types supported: Same as @p input.
- * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, const PoolingLayerInfo &pool_info);
- /** Static function to check if given info will lead to a valid configuration of @ref CLMaxUnpoolingLayerKernel
- *
- * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] output Destination tensor info. Data types supported: Same as @p input.
- * @param[in] indices TensorInfo associated to the tensor containing the offset to store the input elements in the output tensor.
- * @ref CLPoolingLayerKernel with indices should precede this function in order to
- * properly reconstruct the output tensor.
- * The tensor shape of this tensor has to be equal to the input tensor shape. Data type supported: U32.
- * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, const PoolingLayerInfo &pool_info);
-
- // Inherited methods overridden
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
- const ICLTensor *_indices;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLMAXUNPOOLINGLAYERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLMEANSTDDEVKERNEL_H
-#define ARM_COMPUTE_CLMEANSTDDEVKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace cl
-{
-class Buffer;
-}
-
-namespace arm_compute
-{
-class ICLTensor;
-using ICLImage = ICLTensor;
-
-/** Interface for the kernel to calculate mean and standard deviation of input image pixels. */
-class CLMeanStdDevKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLMeanStdDevKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLMeanStdDevKernel(const CLMeanStdDevKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLMeanStdDevKernel &operator=(const CLMeanStdDevKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLMeanStdDevKernel(CLMeanStdDevKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLMeanStdDevKernel &operator=(CLMeanStdDevKernel &&) = default;
- /** Initialise the kernel's input and outputs.
- *
- * @param[in] input Input image. Data types supported: U8.
- * @param[out] mean Input average pixel value.
- * @param[out] global_sum Keeps global sum of pixel values (Buffer size: 1 cl_ulong).
- * @param[out] stddev (Optional) Output standard deviation of pixel values.
- * @param[out] global_sum_squared (Optional if stddev is not set, required if stddev is set) Keeps global sum of squared pixel values (Buffer size: 1 cl_ulong).
- */
- void configure(const ICLImage *input, float *mean, cl::Buffer *global_sum, float *stddev = nullptr, cl::Buffer *global_sum_squared = nullptr);
- /** Initialise the kernel's input and outputs.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input image. Data types supported: U8.
- * @param[out] mean Input average pixel value.
- * @param[out] global_sum Keeps global sum of pixel values (Buffer size: 1 cl_ulong).
- * @param[out] stddev (Optional) Output standard deviation of pixel values.
- * @param[out] global_sum_squared (Optional if stddev is not set, required if stddev is set) Keeps global sum of squared pixel values (Buffer size: 1 cl_ulong).
- */
- void configure(const CLCompileContext &compile_context, const ICLImage *input, float *mean, cl::Buffer *global_sum, float *stddev = nullptr, cl::Buffer *global_sum_squared = nullptr);
- /** Static function to check if given info will lead to a valid configuration of @ref CLMeanStdDevKernel.
- *
- * @param[in] input Input image info. Data types supported: U8.
- * @param[in] mean Input average pixel value.
- * @param[in] global_sum Keeps global sum of pixel values.
- * @param[in] stddev (Optional) Output standard deviation of pixel values.
- * @param[in] global_sum_squared (Optional if stddev is not set, required if stddev is set) Keeps global sum of squared pixel values.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, float *mean, cl::Buffer *global_sum, float *stddev = nullptr, cl::Buffer *global_sum_squared = nullptr);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
- BorderSize border_size() const override;
-
-private:
- const ICLImage *_input;
- float *_mean;
- float *_stddev;
- cl::Buffer *_global_sum;
- cl::Buffer *_global_sum_squared;
- BorderSize _border_size;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLMEANSTDDEVKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2019-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLMEANSTDDEVNORMALIZATIONKERNEL_H
-#define ARM_COMPUTE_CLMEANSTDDEVNORMALIZATIONKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the kernel to normalize the input 2D tensor across the first dimension with respect to mean and standard deviation of the same dimension. */
-class CLMeanStdDevNormalizationKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLMeanStdDevNormalizationKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLMeanStdDevNormalizationKernel(const CLMeanStdDevNormalizationKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLMeanStdDevNormalizationKernel &operator=(const CLMeanStdDevNormalizationKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLMeanStdDevNormalizationKernel(CLMeanStdDevNormalizationKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLMeanStdDevNormalizationKernel &operator=(CLMeanStdDevNormalizationKernel &&) = default;
- /** Default destructor */
- ~CLMeanStdDevNormalizationKernel() = default;
- /** Initialise the kernel's input and outputs.
- *
- * @note If the output tensor is a nullptr, the normalization will be performed in-place.
- *
- * @param[in, out] input Source tensor with 2 dimensions. In case of @p output tensor = nullptr,
- * this tensor will store the result of the normalization. Data types supported: F16/F32.
- * @param[out] output (Optional) Destination tensor. It can be nullptr in case of in-place computation. Data type supported: same as @p input
- * @param[in] epsilon (Optional) Small float to avoid division by zero in case of zero standard deviation. Defaults to 1e-8.
- */
- void configure(ICLTensor *input, ICLTensor *output = nullptr, float epsilon = 1e-8f);
- /** Initialise the kernel's input and outputs.
- *
- * @note If the output tensor is a nullptr, the normalization will be performed in-place.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in, out] input Source tensor with 2 dimensions. In case of @p output tensor = nullptr,
- * this tensor will store the result of the normalization. Data types supported: F16/F32.
- * @param[out] output (Optional) Destination tensor. It can be nullptr in case of in-place computation. Data type supported: same as @p input
- * @param[in] epsilon (Optional) Small float to avoid division by zero in case of zero standard deviation. Defaults to 1e-8.
- */
- void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output = nullptr, float epsilon = 1e-8f);
- /** Static function to check if given info will lead to a valid configuration of @ref CLMeanStdDevNormalizationKernel
- *
- * @param[in] input Source tensor info with 2 dimensions. In case of @p output tensor info = nullptr,
- * this tensor will store the result of the normalization. Data types supported: F16/F32.
- * @param[in] output (Optional) Destination tensor info. It can be nullptr in case of in-place computation. Data type supported: same as @p input
- * @param[in] epsilon (Optional) Small float to avoid division by zero in case of zero standard deviation. Defaults to 1e-8.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output = nullptr, float epsilon = 1e-8f);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- ICLTensor *_input;
- ICLTensor *_output;
- bool _run_in_place;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLMEANSTDDEVNORMALIZATIONKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLMEDIAN3X3KERNEL_H
-#define ARM_COMPUTE_CLMEDIAN3X3KERNEL_H
-
-#include "arm_compute/core/CL/ICLSimple2DKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the median 3x3 filter kernel.
- *
- */
-class CLMedian3x3Kernel : public ICLSimple2DKernel
-{
-public:
- /** Initialise the kernel's input and output.
- *
- * @param[in] input An input tensor. Data types supported: U8
- * @param[out] output The output tensor. Data types supported: U8.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined);
- /** Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input An input tensor. Data types supported: U8
- * @param[out] output The output tensor. Data types supported: U8.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined);
-
- // Inherited methods overridden:
- BorderSize border_size() const override;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLMEDIAN3X3KERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLMEMSETKERNEL_H
-#define ARM_COMPUTE_CLMEMSETKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/PixelValue.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for filling the planes of a tensor */
-class CLMemsetKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLMemsetKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLMemsetKernel(const CLMemsetKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLMemsetKernel &operator=(const CLMemsetKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLMemsetKernel(CLMemsetKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLMemsetKernel &operator=(CLMemsetKernel &&) = default;
- /** Default destructor */
- ~CLMemsetKernel() = default;
-
- /** Initialise the kernel's tensor and filling value
- *
- * @param[in,out] tensor Input tensor to fill. Supported data types: All.
- * @param[in] constant_value The value used to fill the planes of the tensor
- * @param[in] window Window to be used in case setting only part of a tensor. Default is nullptr.
- */
- void configure(ICLTensor *tensor, const PixelValue &constant_value, Window *window = nullptr);
- /** Initialise the kernel's tensor and filling value
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in,out] tensor Input tensor to fill. Supported data types: All.
- * @param[in] constant_value The value used to fill the planes of the tensor
- * @param[in] window Window to be used in case setting only part of a tensor. Default is nullptr.
- */
- void configure(const CLCompileContext &compile_context, ICLTensor *tensor, const PixelValue &constant_value, Window *window = nullptr);
- /** Static function to check if given info will lead to a valid configuration of @ref CLMemsetKernel
- *
- * @param[in] tensor Source tensor info. Data types supported: All.
- * @param[in] constant_value The value used to fill the planes of the tensor
- * @param[in] window Window to be used in case setting only part of a tensor. Default is nullptr.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *tensor, const PixelValue &constant_value, Window *window = nullptr);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- ICLTensor *_tensor;
- Window _full_window;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLMEMSETRKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLMINMAXLAYERKERNEL_H
-#define ARM_COMPUTE_CLMINMAXLAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the kernel to perform min max search on a 3D tensor.
- */
-class CLMinMaxLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLMinMaxLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLMinMaxLayerKernel(const CLMinMaxLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLMinMaxLayerKernel &operator=(const CLMinMaxLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLMinMaxLayerKernel(CLMinMaxLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLMinMaxLayerKernel &operator=(CLMinMaxLayerKernel &&) = default;
- /** Initialise the kernel's input and output.
- *
- * @param[in] input Input tensor with at least 3 dimensions. The dimensions over the third will be interpreted as batches.Data types supported: F32.
- * @param[out] output Output tensor with shape [2, batches, ...] which stores the minimum and maximum values for each 3D input tensor.
- * The dimensions over the second must match the batched dimensions of the input tensor. Data types supported: F32.
- */
- void configure(const ICLTensor *input, ICLTensor *output);
- /** Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input tensor with at least 3 dimensions. The dimensions over the third will be interpreted as batches.Data types supported: F32.
- * @param[out] output Output tensor with shape [2, batches, ...] which stores the minimum and maximum values for each 3D input tensor.
- * The dimensions over the second must match the batched dimensions of the input tensor. Data types supported: F32.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLMinMaxLayerKernel
- *
- * @param[in] input Input tensor info. Data types supported: F32.
- * @param[in] output Output tensor info with shape [2, batches, ...] which stores the minimum and maximum values for each 3D input tensor.
- * The dimensions over the second must match the batched dimensions of the input tensor. Data types supported: F32.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-
- /** Resets global minimum and maximum
- *
- * @param[in,out] queue Command queue on which to map and unmap the min_max tensor
- */
- void reset(cl::CommandQueue &queue);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLMINMAXLAYERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLMINMAXLOCATIONKERNEL_H
-#define ARM_COMPUTE_CLMINMAXLOCATIONKERNEL_H
-
-#include "arm_compute/core/CL/ICLArray.h"
-#include "arm_compute/core/CL/ICLKernel.h"
-
-#include <array>
-
-namespace arm_compute
-{
-class ICLTensor;
-using ICLImage = ICLTensor;
-
-/** Interface for the kernel to perform min max search on an image.
- */
-class CLMinMaxKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLMinMaxKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLMinMaxKernel(const CLMinMaxKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLMinMaxKernel &operator=(const CLMinMaxKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLMinMaxKernel(CLMinMaxKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLMinMaxKernel &operator=(CLMinMaxKernel &&) = default;
- /** Initialise the kernel's input and output.
- *
- * @param[in] input Input Image. Data types supported: U8/S16/F32.
- * @param[out] min_max Buffer of 2 elements to store the min value at position 0 and the max value at position 1. Data type supported: S32 if input type is U8/S16, F32 if input type is F32.
- */
- void configure(const ICLImage *input, cl::Buffer *min_max);
- /** Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input Image. Data types supported: U8/S16/F32.
- * @param[out] min_max Buffer of 2 elements to store the min value at position 0 and the max value at position 1. Data type supported: S32 if input type is U8/S16, F32 if input type is F32.
- */
- void configure(const CLCompileContext &compile_context, const ICLImage *input, cl::Buffer *min_max);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input; /**< Input image. */
- cl::Buffer *_min_max; /**< Minimum/maximum value. */
- std::array<int, 2> _data_type_max_min; /**< Maximum and minimum data type value respectively. */
-};
-
-/** Interface for the kernel to find min max locations of an image.
- */
-class CLMinMaxLocationKernel : public ICLKernel
-{
-public:
- /** Constructor */
- CLMinMaxLocationKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLMinMaxLocationKernel(const CLMinMaxLocationKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLMinMaxLocationKernel &operator=(const CLMinMaxLocationKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLMinMaxLocationKernel(CLMinMaxLocationKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLMinMaxLocationKernel &operator=(CLMinMaxLocationKernel &&) = default;
- /** Initialise the kernel's input and outputs.
- *
- * @note When locations of min and max occurrences are requested, the reported number of locations is limited to the given array size.
- *
- * @param[in] input Input image. Data types supported: U8/S16/F32.
- * @param[out] min_max Buffer of 2 elements to store the min value at position 0 and the max value at position 1. Data type supported: S32 if input type is U8/S16, F32 if input type is F32.
- * @param[out] min_max_count Buffer of 2 elements to store the min value occurrences at position 0 and the max value occurrences at position 1. Data type supported: S32
- * @param[out] min_loc (Optional) Array of Coordinates2D used to store minimum value locations.
- * @param[out] max_loc (Optional) Array of Coordinates2D used to store maximum value locations.
- */
- void configure(const ICLImage *input, cl::Buffer *min_max, cl::Buffer *min_max_count,
- ICLCoordinates2DArray *min_loc = nullptr, ICLCoordinates2DArray *max_loc = nullptr);
- /** Initialise the kernel's input and outputs.
- *
- * @note When locations of min and max occurrences are requested, the reported number of locations is limited to the given array size.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input image. Data types supported: U8/S16/F32.
- * @param[out] min_max Buffer of 2 elements to store the min value at position 0 and the max value at position 1. Data type supported: S32 if input type is U8/S16, F32 if input type is F32.
- * @param[out] min_max_count Buffer of 2 elements to store the min value occurrences at position 0 and the max value occurrences at position 1. Data type supported: S32
- * @param[out] min_loc (Optional) Array of Coordinates2D used to store minimum value locations.
- * @param[out] max_loc (Optional) Array of Coordinates2D used to store maximum value locations.
- */
- void configure(const CLCompileContext &compile_context, const ICLImage *input, cl::Buffer *min_max, cl::Buffer *min_max_count,
- ICLCoordinates2DArray *min_loc = nullptr, ICLCoordinates2DArray *max_loc = nullptr);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLImage *_input; /**< Input image. */
- cl::Buffer *_min_max_count; /**< Minimum/maximum value occurrences. */
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLMINMAXLOCATIONKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLNONLINEARFILTERKERNEL_H
-#define ARM_COMPUTE_CLNONLINEARFILTERKERNEL_H
-
-#include "arm_compute/core/CL/ICLSimple2DKernel.h"
-#include "arm_compute/core/Types.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the kernel to apply a non-linear filter */
-class CLNonLinearFilterKernel : public ICLSimple2DKernel
-{
-public:
- /** Default constructor */
- CLNonLinearFilterKernel();
- /** Set the source, destination and border mode of the kernel
- *
- * @param[in] input Source tensor. Data types supported: U8
- * @param[out] output Destination tensor. Data types supported: U8
- * @param[in] function Non linear function to perform
- * @param[in] mask_size Mask size. Supported sizes: 3, 5
- * @param[in] pattern Mask pattern
- * @param[in] mask The given mask. Will be used only if pattern is specified to PATTERN_OTHER
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *input, ICLTensor *output, NonLinearFilterFunction function,
- unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask,
- bool border_undefined);
- /** Set the source, destination and border mode of the kernel
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8
- * @param[out] output Destination tensor. Data types supported: U8
- * @param[in] function Non linear function to perform
- * @param[in] mask_size Mask size. Supported sizes: 3, 5
- * @param[in] pattern Mask pattern
- * @param[in] mask The given mask. Will be used only if pattern is specified to PATTERN_OTHER
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, NonLinearFilterFunction function,
- unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask,
- bool border_undefined);
-
- // Inherited methods overridden:
- BorderSize border_size() const override;
-
-private:
- BorderSize _border_size; /**< Border size */
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLNONLINEARFILTERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLNONMAXIMASUPPRESSION3x3KERNEL_H
-#define ARM_COMPUTE_CLNONMAXIMASUPPRESSION3x3KERNEL_H
-
-#include "arm_compute/core/CL/ICLSimple2DKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface to perform Non-Maxima suppression over a 3x3 window using OpenCL
- *
- * @note Used by @ref CLFastCorners and @ref CLHarrisCorners
- */
-class CLNonMaximaSuppression3x3Kernel : public ICLSimple2DKernel
-{
-public:
- /** Initialise the kernel's sources, destinations and border mode.
- *
- * @param[in] input Source tensor. Data types supported: U8, F32. (Must be the same as the output tensor)
- * @param[out] output Destination tensor. Data types supported: U8, F32. (Must be the same as the input tensor)
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined);
- /** Initialise the kernel's sources, destinations and border mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8, F32. (Must be the same as the output tensor)
- * @param[out] output Destination tensor. Data types supported: U8, F32. (Must be the same as the input tensor)
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined);
-
- // Inherited methods overridden:
- BorderSize border_size() const override;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLNONMAXIMASUPPRESSION3x3KERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLNORMALIZATIONLAYERKERNEL_H
-#define ARM_COMPUTE_CLNORMALIZATIONLAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the normalization layer kernel.
- */
-class CLNormalizationLayerKernel : public ICLKernel
-{
-public:
- /** Constructor */
- CLNormalizationLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLNormalizationLayerKernel(const CLNormalizationLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLNormalizationLayerKernel &operator=(const CLNormalizationLayerKernel &) = delete;
- /** Default Move Constructor. */
- CLNormalizationLayerKernel(CLNormalizationLayerKernel &&) = default;
- /** Default move assignment operator */
- CLNormalizationLayerKernel &operator=(CLNormalizationLayerKernel &&) = default;
- /** Set the input and output tensors.
- *
- * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM],
- * and an optional 4th dimension for batch of inputs. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC.
- * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data types supported: same as @p input.
- * Data layouts supported: same as @p input.
- * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters.
- */
- void configure(const ICLTensor *input, ICLTensor *output, NormalizationLayerInfo norm_info);
- /** Set the input and output tensors.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM],
- * and an optional 4th dimension for batch of inputs. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC.
- * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data types supported: same as @p input.
- * Data layouts supported: same as @p input.
- * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, NormalizationLayerInfo norm_info);
- /** Static function to check if given info will lead to a valid configuration of @ref CLNormalizationLayerKernel
- *
- * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM],
- * and an optional 4th dimension for batch of inputs. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC.
- * @param[in] output Destination tensor. Output will have the same number of dimensions as input. Data types supported: same as @p input.
- * Data layouts supported: same as @p input.
- * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, NormalizationLayerInfo norm_info);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
- BorderSize _border_size;
- bool _is_norm_across_width;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLNORMALIZATIONLAYERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLNORMALIZEPLANARYUVLAYERKERNEL_H
-#define ARM_COMPUTE_CLNORMALIZEPLANARYUVLAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the NormalizePlanarYUV layer kernel. */
-class CLNormalizePlanarYUVLayerKernel : public ICLKernel
-{
-public:
- /** Constructor */
- CLNormalizePlanarYUVLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLNormalizePlanarYUVLayerKernel(const CLNormalizePlanarYUVLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLNormalizePlanarYUVLayerKernel &operator=(const CLNormalizePlanarYUVLayerKernel &) = delete;
- /** Default Move Constructor. */
- CLNormalizePlanarYUVLayerKernel(CLNormalizePlanarYUVLayerKernel &&) = default;
- /** Default move assignment operator */
- CLNormalizePlanarYUVLayerKernel &operator=(CLNormalizePlanarYUVLayerKernel &&) = default;
- /** Default destructor */
- ~CLNormalizePlanarYUVLayerKernel() = default;
-
- /** Set the input and output tensors.
- *
- * @param[in] input Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, channels].
- * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[out] output Destination tensor. Data type supported: same as @p input
- * @param[in] mean Mean values tensor. 1 dimension with size equal to the number of input channels. Data types supported: same as @p input
- * @param[in] std Standard deviation values tensor. 1 dimension with size equal to the number of input channels.
- * Data types supported: same as @p input
- */
- void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *std);
- /** Set the input and output tensors.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, channels].
- * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[out] output Destination tensor. Data type supported: same as @p input
- * @param[in] mean Mean values tensor. 1 dimension with size equal to the number of input channels. Data types supported: same as @p input
- * @param[in] std Standard deviation values tensor. 1 dimension with size equal to the number of input channels.
- * Data types supported: same as @p input
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *std);
- /** Static function to check if given info will lead to a valid configuration of @ref CLNormalizePlanarYUVLayerKernel
- *
- * @param[in] input Source tensor info. 3 lower dimensions represent a single input with dimensions [width, height, channels].
- * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[out] output Destination tensor info. Data type supported: same as @p input
- * @param[in] mean Mean values tensor info. 1 dimension with size equal to the number of input channels. Data types supported: same as @p input
- * @param[in] std Standard deviation values tensor info. 1 dimension with size equal to the number of input channels.
- * Data types supported: same as @p input
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *mean, const ITensorInfo *std);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
- const ICLTensor *_mean;
- const ICLTensor *_std;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLNORMALIZEPLANARYUVLAYERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2019-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLPADLAYERKERNEL_H
-#define ARM_COMPUTE_CLPADLAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the PadLayer function. */
-class CLPadLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLPadLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLPadLayerKernel(const CLPadLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLPadLayerKernel &operator=(const CLPadLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLPadLayerKernel(CLPadLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLPadLayerKernel &operator=(CLPadLayerKernel &&) = default;
- /** Default destructor */
- ~CLPadLayerKernel() = default;
- /** Set the input and output tensor.
- *
- * @param[in] input Source tensor. Data types supported: All.
- * @param[out] output Output tensor. Data type supported: same as @p input
- * @param[in] padding The padding for each spatial dimension of the input tensor. The pair padding[i]
- * specifies the front and the end padding in the i-th dimension.
- * @param[in] constant_value (Optional) Constant value to be used for the padding.
- * @param[in] mode (Optional) Controls whether the padding should be filled with @p constant_value using CONSTANT,
- * or reflect the input, either including the border values (SYMMETRIC) or not (REFLECT).
- */
- void configure(const ICLTensor *input, ICLTensor *output, const PaddingList &padding, PixelValue constant_value = PixelValue(), PaddingMode mode = PaddingMode::CONSTANT);
- /** Set the input and output tensor.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: All.
- * @param[out] output Output tensor. Data type supported: same as @p input
- * @param[in] padding The padding for each spatial dimension of the input tensor. The pair padding[i]
- * specifies the front and the end padding in the i-th dimension.
- * @param[in] constant_value (Optional) Constant value to be used for the padding.
- * @param[in] mode (Optional) Controls whether the padding should be filled with @p constant_value using CONSTANT,
- * or reflect the input, either including the border values (SYMMETRIC) or not (REFLECT).
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PaddingList &padding, PixelValue constant_value = PixelValue(),
- PaddingMode mode = PaddingMode::CONSTANT);
- /** Static function to check if given info will lead to a valid configuration of @ref CLPadLayerKernel
- *
- * @param[in] input Source tensor info. Data types supported: All.
- * @param[in] output Output tensor info. Data type supported: same as @p input
- * @param[in] padding The padding for each spatial dimension of the input tensor. The pair padding[i]
- * specifies the front and the end padding in the i-th dimension.
- * @param[in] constant_value (Optional) Constant value to be used for the padding.
- * @param[in] mode (Optional) Controls whether the padding should be filled with @p constant_value using CONSTANT,
- * or reflect the input, either including the border values (SYMMETRIC) or not (REFLECT).
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PaddingList &padding, PixelValue constant_value = PixelValue(), PaddingMode mode = PaddingMode::CONSTANT);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
- int _input_start_x;
- int _input_start_y;
- bool _4d_enabled;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLPADLAYERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLPERMUTEKERNEL_H
-#define ARM_COMPUTE_CLPERMUTEKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to perform tensor permutation.
- *
- * Permutes given a permutation vector
- */
-class CLPermuteKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLPermuteKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLPermuteKernel(const CLPermuteKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLPermuteKernel &operator=(const CLPermuteKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLPermuteKernel(CLPermuteKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLPermuteKernel &operator=(CLPermuteKernel &&) = default;
- /** Set the input and output of the kernel.
- *
- * @note Arbitrary permutation vectors are supported with rank not greater than 4
- *
- * @param[in] input The input tensor to permute. Data types supported: All.
- * @param[in] output The output tensor. Data types supported: Same as @p input
- * @param[in] perm Permutation vector
- */
- void configure(const ICLTensor *input, ICLTensor *output, const PermutationVector &perm);
- /** Set the input and output of the kernel.
- *
- * @note Arbitrary permutation vectors are supported with rank not greater than 4
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input The input tensor to permute. Data types supported: All.
- * @param[in] output The output tensor. Data types supported: Same as @p input
- * @param[in] perm Permutation vector
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PermutationVector &perm);
- /** Static function to check if given info will lead to a valid configuration of @ref CLPermuteKernel
- *
- * @note Arbitrary permutation vectors are supported with rank not greater than 4
- *
- * @param[in] input First tensor input info. Data types supported: All.
- * @param[in] output Output tensor info. Data types supported: same as @p input.
- * @param[in] perm Permutation vector
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PermutationVector &perm);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
- PermutationVector _perm;
-};
-} // arm_compute
-#endif /*ARM_COMPUTE_CLPERMUTEKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLPIXELWISEMULTIPLICATIONKERNEL_H
-#define ARM_COMPUTE_CLPIXELWISEMULTIPLICATIONKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ICLTensor;
-
-/** Interface for the pixelwise multiplication kernel. */
-class CLPixelWiseMultiplicationKernel : public ICLKernel
-{
-public:
- /** Default constructor.*/
- CLPixelWiseMultiplicationKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLPixelWiseMultiplicationKernel(const CLPixelWiseMultiplicationKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLPixelWiseMultiplicationKernel &operator=(const CLPixelWiseMultiplicationKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLPixelWiseMultiplicationKernel(CLPixelWiseMultiplicationKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLPixelWiseMultiplicationKernel &operator=(CLPixelWiseMultiplicationKernel &&) = default;
- /** Initialise the kernel's input, output and border mode.
- *
- * Valid configurations (Input1,Input2) -> Output :
- *
- * - (U8,U8) -> U8
- * - (U8,U8) -> S16
- * - (U8,S16) -> S16
- * - (S16,U8) -> S16
- * - (S16,S16) -> S16
- * - (F16,F16) -> F16
- * - (F32,F32) -> F32
- * - (QASYMM8,QASYMM8) -> QASYMM8
- * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED
- * - (QSYMM16,QSYMM16) -> QSYMM16
- * - (QSYMM16,QSYMM16) -> S32
- *
- * @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
- * @param[in] input2 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
- * @param[out] output The output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
- * @param[in] scale Scale to apply after multiplication.
- * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15.
- * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate
- * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest even.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- */
- void configure(ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, float scale,
- ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
- /** Initialise the kernel's input, output and border mode.
- *
- * Valid configurations (Input1,Input2) -> Output :
- *
- * - (U8,U8) -> U8
- * - (U8,U8) -> S16
- * - (U8,S16) -> S16
- * - (S16,U8) -> S16
- * - (S16,S16) -> S16
- * - (F16,F16) -> F16
- * - (F32,F32) -> F32
- * - (QASYMM8,QASYMM8) -> QASYMM8
- * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED
- * - (QSYMM16,QSYMM16) -> QSYMM16
- * - (QSYMM16,QSYMM16) -> S32
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
- * @param[in] input2 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
- * @param[out] output The output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
- * @param[in] scale Scale to apply after multiplication.
- * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15.
- * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate
- * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest even.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- */
- void configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, float scale,
- ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
- /** Static function to check if given info will lead to a valid configuration of @ref CLPixelWiseMultiplicationKernel
- *
- * Valid configurations (Input1,Input2) -> Output :
- *
- * - (U8,U8) -> U8
- * - (U8,U8) -> S16
- * - (U8,S16) -> S16
- * - (S16,U8) -> S16
- * - (S16,S16) -> S16
- * - (F16,F16) -> F16
- * - (F32,F32) -> F32
- * - (QASYMM8,QASYMM8) -> QASYMM8
- * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED
- * - (QSYMM16,QSYMM16) -> QSYMM16
- * - (QSYMM16,QSYMM16) -> S32
- *
- * @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
- * @param[in] input2 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
- * @param[in] output The output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
- * @param[in] scale Scale to apply after multiplication.
- * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15.
- * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate
- * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest even.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float scale,
- ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
-
- // Inherited methods overridden:
- void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-private:
- const ITensorInfo *_input1;
- const ITensorInfo *_input2;
- ITensorInfo *_output;
-};
-
-/** Interface for the complex pixelwise multiplication kernel. */
-class CLComplexPixelWiseMultiplicationKernel : public ICLKernel
-{
-public:
- /** Default constructor.*/
- CLComplexPixelWiseMultiplicationKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLComplexPixelWiseMultiplicationKernel(const CLComplexPixelWiseMultiplicationKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLComplexPixelWiseMultiplicationKernel &operator=(const CLComplexPixelWiseMultiplicationKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLComplexPixelWiseMultiplicationKernel(CLComplexPixelWiseMultiplicationKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLComplexPixelWiseMultiplicationKernel &operator=(CLComplexPixelWiseMultiplicationKernel &&) = default;
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] input1 An input tensor info. Data types supported: F32. Number of channels supported: 2.
- * @param[in] input2 An input tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1.
- * @param[out] output The output tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- */
- void configure(ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
- /** Initialise the kernel's input, output and border mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input1 An input tensor info. Data types supported: F32. Number of channels supported: 2.
- * @param[in] input2 An input tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1.
- * @param[out] output The output tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- */
- void configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
- /** Static function to check if given info will lead to a valid configuration of @ref CLComplexPixelWiseMultiplicationKernel
- *
- * @param[in] input1 An input tensor info. Data types supported: F32. Number of channels supported: 2.
- * @param[in] input2 An input tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1.
- * @param[in] output The output tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
-
- // Inherited methods overridden:
- void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-private:
- const ITensorInfo *_input1;
- const ITensorInfo *_input2;
- ITensorInfo *_output;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLPIXELWISEMULTIPLICATIONKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLPOOLINGLAYERKERNEL_H
-#define ARM_COMPUTE_CLPOOLINGLAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-#include "arm_compute/core/Error.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the pooling layer kernel */
-class CLPoolingLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLPoolingLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLPoolingLayerKernel(const CLPoolingLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLPoolingLayerKernel &operator=(const CLPoolingLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLPoolingLayerKernel(CLPoolingLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLPoolingLayerKernel &operator=(CLPoolingLayerKernel &&) = default;
- /** Default destructor */
- ~CLPoolingLayerKernel() = default;
-
- /** Set the input and output tensors.
- *
- *
- * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[out] output Destination tensor. Data types supported: Same as @p input.
- * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
- * @param[out] indices (optional) The indices of the maximal values. Data type supported: U32.
- */
- void configure(const ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info, ICLTensor *indices = nullptr);
- /** Set the input and output tensors.
- *
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[out] output Destination tensor. Data types supported: Same as @p input.
- * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
- * @param[out] indices (optional) The indices of the maximal values. Data type supported: U32.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info, ICLTensor *indices = nullptr);
- /** Static function to check if given info will lead to a valid configuration of @ref CLPoolingLayerKernel
- *
- * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] output Destination tensor info. Data types supported: Same as @p input.
- * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
- * @param[in] indices (optional) The indices of the maximal values. Data type supported: U32.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, const ITensorInfo *indices = nullptr);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-public:
- const ICLTensor *_input;
- ICLTensor *_output;
- ICLTensor *_indices;
- PoolingLayerInfo _pool_info;
- DataLayout _data_layout;
- BorderSize _border_size;
- unsigned int _num_elems_processed_per_iteration;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLPOOLINGLAYERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLPRIORBOXLAYERKERNEL_H
-#define ARM_COMPUTE_CLPRIORBOXLAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the PriorBox layer kernel. */
-class CLPriorBoxLayerKernel : public ICLKernel
-{
-public:
- /** Constructor */
- CLPriorBoxLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLPriorBoxLayerKernel(const CLPriorBoxLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLPriorBoxLayerKernel &operator=(const CLPriorBoxLayerKernel &) = delete;
- /** Default Move Constructor. */
- CLPriorBoxLayerKernel(CLPriorBoxLayerKernel &&) = default;
- /** Default move assignment operator */
- CLPriorBoxLayerKernel &operator=(CLPriorBoxLayerKernel &&) = default;
- /** Default destructor */
- ~CLPriorBoxLayerKernel() = default;
-
- /** Set the input and output tensors.
- *
- * @param[in] input1 First source tensor. Data types supported: F32. Data layouts supported: NCHW/NHWC.
- * @param[in] input2 Second source tensor. Data types and layouts supported: same as @p input1
- * @param[out] output Destination tensor. Output dimensions are [W * H * num_priors * 4, 2]. Data types and layouts supported: same as @p input1
- * @param[in] info Prior box layer info.
- * @param[in] min Minimum prior box values
- * @param[in] max Maximum prior box values
- * @param[in] aspect_ratios Aspect ratio values
- */
- void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const PriorBoxLayerInfo &info, cl::Buffer *min, cl::Buffer *max, cl::Buffer *aspect_ratios);
- /** Set the input and output tensors.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input1 First source tensor. Data types supported: F32. Data layouts supported: NCHW/NHWC.
- * @param[in] input2 Second source tensor. Data types and layouts supported: same as @p input1
- * @param[out] output Destination tensor. Output dimensions are [W * H * num_priors * 4, 2]. Data types and layouts supported: same as @p input1
- * @param[in] info Prior box layer info.
- * @param[in] min Minimum prior box values
- * @param[in] max Maximum prior box values
- * @param[in] aspect_ratios Aspect ratio values
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const PriorBoxLayerInfo &info, cl::Buffer *min, cl::Buffer *max,
- cl::Buffer *aspect_ratios);
- /** Static function to check if given info will lead to a valid configuration of @ref CLPriorBoxLayerKernel
- *
- * @param[in] input1 First source tensor info. Data types supported: F32. Data layouts supported: NCHW/NHWC.
- * @param[in] input2 Second source tensor info. Data types and layouts supported: same as @p input1
- * @param[in] output Destination tensor info. Output dimensions are [W * H * num_priors * 4, 2]. Data type supported: same as @p input1
- * @param[in] info Prior box layer info.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const PriorBoxLayerInfo &info);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input1;
- const ICLTensor *_input2;
- ICLTensor *_output;
- PriorBoxLayerInfo _info;
- int _num_priors;
- cl::Buffer *_min;
- cl::Buffer *_max;
- cl::Buffer *_aspect_ratios;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLPRIORBOXLAYERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLQLSTMLAYERVNORMALIZATIONKERNEL_H
-#define ARM_COMPUTE_CLQLSTMLAYERVNORMALIZATIONKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the kernel to do layer normalization. */
-class CLQLSTMLayerNormalizationKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLQLSTMLayerNormalizationKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLQLSTMLayerNormalizationKernel(const CLQLSTMLayerNormalizationKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLQLSTMLayerNormalizationKernel &operator=(const CLQLSTMLayerNormalizationKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLQLSTMLayerNormalizationKernel(CLQLSTMLayerNormalizationKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLQLSTMLayerNormalizationKernel &operator=(CLQLSTMLayerNormalizationKernel &&) = default;
- /** Default destructor */
- ~CLQLSTMLayerNormalizationKernel() = default;
- /** Initialise the kernel's input and outputs.
- *
- * @param[in] input Source tensor with 2 dimensions. Data types supported: QSYMM16.
- * @param[out] output Destination tensor. Data type supported: same as @p input
- * @param[in] weight Weight tensor. Data types supported: Same as @p input.
- * @param[in] bias Bias tensor. Data types supported: S32.
- *
- */
- void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *weight, const ICLTensor *bias);
- /** Initialise the kernel's input and outputs.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor with 2 dimensions. Data types supported: QSYMM16.
- * @param[out] output Destination tensor. Data type supported: same as @p input
- * @param[in] weight Weight tensor. Data types supported: Same as @p input.
- * @param[in] bias Bias tensor. Data types supported: S32.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *weight, const ICLTensor *bias);
- /** Static function to check if given info will lead to a valid configuration of @ref CLQLSTMLayerNormalizationKernel
- *
- * @param[in] input Source tensor info with 2 dimensions. Data types supported: QSYMM16.
- * @param[in] output Destination info tensor. Data type supported: same as @p input
- * @param[in] weight Weight info tensor. Data types supported: Same as @p input.
- * @param[in] bias Bias tensor info. Data types supported: S32.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *weight, const ITensorInfo *bias);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- const ICLTensor *_weight;
- const ICLTensor *_bias;
- ICLTensor *_output;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLQLSTMLAYERVNORMALIZATIONKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLQUANTIZATIONLAYERKERNEL_H
-#define ARM_COMPUTE_CLQUANTIZATIONLAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the quantization layer kernel.
- *
- * @note The implementation supports only 3D input tensors.
- */
-class CLQuantizationLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLQuantizationLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLQuantizationLayerKernel(const CLQuantizationLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLQuantizationLayerKernel &operator=(const CLQuantizationLayerKernel &) = delete;
- /** Default Move Constructor. */
- CLQuantizationLayerKernel(CLQuantizationLayerKernel &&) = default;
- /** Default move assignment operator */
- CLQuantizationLayerKernel &operator=(CLQuantizationLayerKernel &&) = default;
- /** Default destructor */
- ~CLQuantizationLayerKernel() = default;
- /** Set the input, output.
- *
- * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16.
- * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16.
- *
- * @note Output auto initialization is not supported by this kernel
- */
- void configure(const ICLTensor *input, ICLTensor *output);
- /** Set the input, output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16.
- * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16.
- *
- * @note Output auto initialization is not supported by this kernel
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLQuantizationLayerKernel
- *
- * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16.
- * @param[in] output Destination tensor info with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLQUANTIZATIONLAYERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLROIALIGNLAYERKERNEL_H
-#define ARM_COMPUTE_CLROIALIGNLAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLArray.h"
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the RoIAlign kernel.
- */
-class CLROIAlignLayerKernel : public ICLKernel
-{
-public:
- /** Constructor */
- CLROIAlignLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLROIAlignLayerKernel(const CLROIAlignLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLROIAlignLayerKernel &operator=(const CLROIAlignLayerKernel &) = delete;
- /** Default Move Constructor. */
- CLROIAlignLayerKernel(CLROIAlignLayerKernel &&) = default;
- /** Default move assignment operator. */
- CLROIAlignLayerKernel &operator=(CLROIAlignLayerKernel &&) = default;
- /** Default destructor */
- ~CLROIAlignLayerKernel() = default;
-
- /** Set the input and output tensors.
- *
- * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner
- * as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ].
- * Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8/QASYMM8_SIGNED, otherwise same as @p input
- * @param[out] output Destination tensor. Data types supported: Same as @p input.
- * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo.
- *
- * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled
- * width and pooled height.
- * @note The z dimensions of @p output tensor and @p input tensor must be the same.
- * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array.
- */
- void configure(const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info);
- /** Set the input and output tensors.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner
- * as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ].
- * Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8/QASYMM8_SIGNED, otherwise same as @p input
- * @param[out] output Destination tensor. Data types supported: Same as @p input.
- * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo.
- *
- * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled
- * width and pooled height.
- * @note The z dimensions of @p output tensor and @p input tensor must be the same.
- * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info);
- /** Static function to check if given info will lead to a valid configuration of @ref CLROIAlignLayerKernel
- *
- * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
- * @param[in] rois ROIs tensor info. Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8/QASYMM8_SIGNED,
- * otherwise same as @p input
- * @param[in] output Destination tensor info. Data types supported: Same as @p input.
- * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo.
- *
- * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled
- * width and pooled height.
- * @note The z dimensions of @p output tensor and @p input tensor must be the same.
- * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array.
- *
- * @return a Status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *rois, ITensorInfo *output, const ROIPoolingLayerInfo &pool_info);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue);
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
- const ICLTensor *_rois;
- ROIPoolingLayerInfo _pool_info;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLROIALIGNLAYERKERNEL_H*/
+++ /dev/null
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLROIPOOLINGLAYERKERNEL_H
-#define ARM_COMPUTE_CLROIPOOLINGLAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-#include "arm_compute/core/CL/ICLArray.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the ROI pooling layer kernel */
-class CLROIPoolingLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLROIPoolingLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLROIPoolingLayerKernel(const CLROIPoolingLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLROIPoolingLayerKernel &operator=(const CLROIPoolingLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLROIPoolingLayerKernel(CLROIPoolingLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLROIPoolingLayerKernel &operator=(CLROIPoolingLayerKernel &&) = default;
- /** Default destructor */
- ~CLROIPoolingLayerKernel() = default;
-
- /** Set the input and output tensors.
- *
- * @param[in] input Source tensor. Data types supported: F16/F32.
- * @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner
- * as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ]. Data types supported: U16
- * @param[out] output Destination tensor. Data types supported: Same as @p input.
- * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo.
- *
- * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled
- * width and pooled height.
- * @note The z dimensions of @p output tensor and @p input tensor must be the same.
- * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array.
- */
- void configure(const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info);
- /** Set the input and output tensors.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: F16/F32.
- * @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner
- * as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ]. Data types supported: U16
- * @param[out] output Destination tensor. Data types supported: Same as @p input.
- * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo.
- *
- * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled
- * width and pooled height.
- * @note The z dimensions of @p output tensor and @p input tensor must be the same.
- * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- const ICLTensor *_rois;
- ICLTensor *_output;
- ROIPoolingLayerInfo _pool_info;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLROIPOOLINGLAYERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLRANGEKERNEL_H
-#define ARM_COMPUTE_CLRANGEKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Kernel class for Range
- *
- * range generates a 1-D tensor containing a sequence of numbers that begins at 'start' and extends by increments
- * of 'step' up to but not including 'end'.
- */
-class CLRangeKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLRangeKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLRangeKernel(const CLRangeKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLRangeKernel &operator=(const CLRangeKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLRangeKernel(CLRangeKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLRangeKernel &operator=(CLRangeKernel &&) = default;
- /** Default destructor */
- ~CLRangeKernel() = default;
- /** Initialize the kernel's output tensor, start, end and step of the sequence.
- *
- * @param[out] output Output tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
- * @param[in] start The starting value of the sequence.
- * @param[in] end The ending (not including) value of the sequence.
- * @param[in] step The gap between each pair of values in the sequence.
- */
- void configure(ICLTensor *output, float start, float end, float step);
- /** Initialize the kernel's output tensor, start, end and step of the sequence.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[out] output Output tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
- * @param[in] start The starting value of the sequence.
- * @param[in] end The ending (not including) value of the sequence.
- * @param[in] step The gap between each pair of values in the sequence.
- */
- void configure(const CLCompileContext &compile_context, ICLTensor *output, float start, float end, float step);
- /** Static function to check if given info will lead to a valid configuration of @ref CLRangeKernel
- *
- * @param[in] output Output tensor info. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
- * @param[in] start The starting value of the sequence.
- * @param[in] end The ending (not including) value of the sequence.
- * @param[in] step The gap between each pair of values in the sequence.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *output, float start, float end, float step);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- float _start; /**< Start of sequence */
- float _end; /**< End of sequence */
- float _step; /**< Increment/step value */
- ICLTensor *_output; /**< Destination tensor */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLRANGEKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLREDUCTIONOPERATIONKERNEL_H
-#define ARM_COMPUTE_CLREDUCTIONOPERATIONKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the reduction operation kernel
- */
-class CLReductionOperationKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLReductionOperationKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLReductionOperationKernel(const CLReductionOperationKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLReductionOperationKernel &operator=(const CLReductionOperationKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLReductionOperationKernel(CLReductionOperationKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLReductionOperationKernel &operator=(CLReductionOperationKernel &&) = default;
- /** Default destructor */
- ~CLReductionOperationKernel() = default;
-
- /** Set the input and output tensors.
- *
- * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/S32/F16/F32.
- * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input.
- * Output will have the same number of dimensions as input.
- * @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1,2,3
- * @param[in] op Reduction operation to perform. Operations supported: MEAN_SUM, PROD, SUM_SQUARE, SUM, MIN, MAX
- * @param[in] width (Optional) In case of x-axis we also need to provide the width of the input image.
- */
- void configure(const ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op, unsigned int width = 0);
- /** Set the input and output tensors.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/S32/F16/F32.
- * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input.
- * Output will have the same number of dimensions as input.
- * @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1,2,3
- * @param[in] op Reduction operation to perform. Operations supported: MEAN_SUM, PROD, SUM_SQUARE, SUM, MIN, MAX
- * @param[in] width (Optional) In case of x-axis we also need to provide the width of the input image.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op, unsigned int width = 0);
-
- /** Static function to check if given info will lead to a valid configuration of @ref CLReductionOperationKernel.
- *
- * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/S32/F16/F32.
- * @param[in] output Destination tensor info. Data types and data layouts supported: Same as @p input.
- * Output will have the same number of dimensions as input.
- * @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1,2,3
- * @param[in] op Reduction operation to perform. Operations supported: MEAN_SUM, PROD, SUM_SQUARE, SUM, MIN, MAX
- * @param[in] width (Optional) In case of x-axis we also need to provide the width of the input image.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op, unsigned int width = 0);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
- unsigned int _reduction_axis;
- ReductionOperation _op;
- BorderSize _border_size;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLREDUCTIONOPERATIONKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLREMAPKERNEL_H
-#define ARM_COMPUTE_CLREMAPKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to perform a remap on a tensor */
-class CLRemapKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLRemapKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLRemapKernel(const CLRemapKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLRemapKernel &operator=(const CLRemapKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLRemapKernel(CLRemapKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLRemapKernel &operator=(CLRemapKernel &&) = default;
- /** Initialize the kernel's input, output and border mode.
- *
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[in] map_x Map for X coordinates. Data types supported: F32.
- * @param[in] map_y Map for Y coordinates. Data types supported: F32.
- * @param[out] output Destination tensor. Data types supported: U8. All but the lowest two dimensions must be the same size as in the input tensor, i.e. remapping is only performed within the XY-plane.
- * @param[in] policy The interpolation type.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output, InterpolationPolicy policy, bool border_undefined);
- /** Initialize the kernel's input, output and border mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[in] map_x Map for X coordinates. Data types supported: F32.
- * @param[in] map_y Map for Y coordinates. Data types supported: F32.
- * @param[out] output Destination tensor. Data types supported: U8. All but the lowest two dimensions must be the same size as in the input tensor, i.e. remapping is only performed within the XY-plane.
- * @param[in] policy The interpolation type.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output, InterpolationPolicy policy, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
- const ICLTensor *_map_x;
- const ICLTensor *_map_y;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLREMAPKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLREORGLAYERKERNEL_H
-#define ARM_COMPUTE_CLREORGLAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to perform a reorg layer */
-class CLReorgLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLReorgLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers). */
- CLReorgLayerKernel(const CLReorgLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers). */
- CLReorgLayerKernel &operator=(const CLReorgLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLReorgLayerKernel(CLReorgLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLReorgLayerKernel &operator=(CLReorgLayerKernel &&) = default;
- /** Initialize the kernel's input, output.
- *
- * @param[in] input Source tensor. Data types supported: All.
- * @param[out] output Destination tensor with tensor shape:
- * [width_input / stride, height_input / stride, channels_input * stride * stride, batch_size]. This means the output has
- * the same number of input elements. Data types supported: same as @p input.
- * @param[in] stride Stride value to use for reorganizing the values in the output tensor.
- * It defines the spatial distance between 2 consecutive pixels in the x and y direction
- */
- void configure(const ICLTensor *input, ICLTensor *output, int32_t stride);
- /** Initialize the kernel's input, output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: All.
- * @param[out] output Destination tensor with tensor shape:
- * [width_input / stride, height_input / stride, channels_input * stride * stride, batch_size]. This means the output has
- * the same number of input elements. Data types supported: same as @p input.
- * @param[in] stride Stride value to use for reorganizing the values in the output tensor.
- * It defines the spatial distance between 2 consecutive pixels in the x and y direction
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t stride);
- /** Static function to check if given info will lead to a valid configuration of @ref CLReorgLayerKernel
- *
- * @param[in] input Source tensor. Data types supported: All.
- * @param[in] output Destination tensor with tensor shape:
- * [width_input / stride, height_input / stride, channels_input * stride * stride, batch_size]. This means the output has
- * the same number of input elements. Data types supported: same as @p input. Data types supported: same as @p input.
- * @param[in] stride Stride value to use for reorganizing the values in the output tensor
- * It defines the spatial distance between 2 consecutive pixels in the x and y direction
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t stride);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLREORGLAYERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLRESHAPELAYERKERNEL_H
-#define ARM_COMPUTE_CLRESHAPELAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the kernel to perform tensor reshaping */
-class CLReshapeLayerKernel : public ICLKernel
-{
-public:
- /** Set the input and output of the kernel
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor info. Data type supported: All.
- * @param[out] output Destination tensor info. Data type supported: Same as @p input
- */
- void configure(const CLCompileContext &compile_context, const ITensorInfo *input, ITensorInfo *output);
-
- /** Static function to check if given info will lead to a valid configuration of @ref CLReshapeLayerKernel
- *
- * @param[in] input Source tensor info. Data type supported: All
- * @param[in] output Destination tensor info. Data type supported: Same as @p input
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLRESHAPELAYERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLREVERSEKERNEL_H
-#define ARM_COMPUTE_CLREVERSEKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the reverse kernel */
-class CLReverseKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLReverseKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLReverseKernel(const CLReverseKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLReverseKernel &operator=(const CLReverseKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLReverseKernel(CLReverseKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLReverseKernel &operator=(CLReverseKernel &&) = default;
- /** Default destructor */
- ~CLReverseKernel() = default;
- /** Initialise the kernel's inputis and output
- *
- * @param[in] input Input tensor. Data types supported: All.
- * @param[out] output Output tensor. Data type supported: Same as @p input
- * @param[in] axis Axis tensor. Contains the indices of the dimensions to reverse. Data type supported: U32
- */
- void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *axis);
- /** Initialise the kernel's inputis and output
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input tensor. Data types supported: All.
- * @param[out] output Output tensor. Data type supported: Same as @p input
- * @param[in] axis Axis tensor. Contains the indices of the dimensions to reverse. Data type supported: U32
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *axis);
-
- /** Static function to check if given info will lead to a valid configuration of @ref CLReverseKernel
- *
- * @param[in] input Input tensor info. Data types supported: All.
- * @param[in] output Output tensor info. Data type supported: Same as @p input
- * @param[in] axis Axis tensor info. Contains the indices of the dimensions to reverse. Data type supported: U32
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *axis);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-public:
- const ICLTensor *_input;
- ICLTensor *_output;
- const ICLTensor *_axis;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLREVERSEKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLSCALEKERNEL_H
-#define ARM_COMPUTE_CLSCALEKERNEL_H
-
-#include "arm_compute/core/CL/ICLSimple2DKernel.h"
-#include "arm_compute/core/KernelDescriptors.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the scale kernel */
-class CLScaleKernel : public ICLSimple2DKernel
-{
-public:
- /** Initialise the kernel's inputs, output and interpolation policy
- *
- * @param[in] input Source tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32
- * @param[out] output Destination tensor. Data types supported: Same as @p input
- * All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
- * @param[in] info @ref ScaleKernelInfo Kernel descriptor to be used to configure.
- */
- void configure(const ICLTensor *input, ICLTensor *output, const ScaleKernelInfo &info);
- /** Initialise the kernel's inputs, output and interpolation policy
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32
- * @param[out] output Destination tensor. Data types supported: Same as @p input
- * All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
- * @param[in] info @ref ScaleKernelInfo Kernel descriptor to be used to configure.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ScaleKernelInfo &info);
-
- /** Static function to check if given info will lead to a valid configuration of @ref CLScaleKernel
- *
- * @param[in] input Source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32
- * @param[in] output Destination tensor info. Data types supported: Same as @p input
- * All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
- * @param[in] info @ref ScaleKernelInfo Kernel descriptor to be used to validate
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ScaleKernelInfo &info);
- /** Input tensor accessor.
- *
- * @return Pointer to input tensor.
- */
- const ICLTensor *input() const;
- /** Output tensor accessor.
- *
- * @return Pointer to output tensor.
- */
- const ICLTensor *output() const;
-
- // Inherited methods overridden:
- BorderSize border_size() const override;
- void run(const Window &window, cl::CommandQueue &queue) override;
-
- // Getter for interpolation policy
- InterpolationPolicy get_interpolation_policy() const
- {
- return _interpolation_policy;
- }
-
-private:
- InterpolationPolicy _interpolation_policy = InterpolationPolicy::BILINEAR;
- DataLayout _data_layout = DataLayout::UNKNOWN;
- bool _align_corners = false;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLSCALEKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLSCHARR3X3KERNEL_H
-#define ARM_COMPUTE_CLSCHARR3X3KERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the kernel to run a 3x3 Scharr filter on a tensor.
- *
- * @f[
- * \mathbf{G}_x=\begin{vmatrix}
- * -3 & 0 & +3\\
- * -10& 0 & +10\\
- * -3 & 0 & +3
- * \end{vmatrix}
- * @f]
- * @f[
- * \mathbf{G}_y=\begin{vmatrix}
- * -3 & -10 & -3\\
- * 0 & 0 & 0\\
- * +3 & +10 & +3
- * \end{vmatrix}
- * @f]
- */
-class CLScharr3x3Kernel : public ICLKernel
-{
-public:
- /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */
- CLScharr3x3Kernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLScharr3x3Kernel(const CLScharr3x3Kernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLScharr3x3Kernel &operator=(const CLScharr3x3Kernel &) = delete;
- /** Allow instances of this class to be moved */
- CLScharr3x3Kernel(CLScharr3x3Kernel &&) = default;
- /** Allow instances of this class to be moved */
- CLScharr3x3Kernel &operator=(CLScharr3x3Kernel &&) = default;
- /** Initialise the kernel's source, destination and border.
- *
- * @note At least one of output_x or output_y must be set.
- *
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16.
- * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
- /** Initialise the kernel's source, destination and border.
- *
- * @note At least one of output_x or output_y must be set.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16.
- * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-private:
- bool _run_scharr_x; /**< Do we need to run Scharr X ? */
- bool _run_scharr_y; /**< Do we need to run Scharr Y ? */
- const ICLTensor *_input; /**< Input image */
- ICLTensor *_output_x; /**< Output image for scharr X */
- ICLTensor *_output_y; /**< Output image for scharr Y */
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLSCHARR3X3KERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLSELECTKERNEL_H
-#define ARM_COMPUTE_CLSELECTKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ICLTensor;
-
-/** OpenCL interface for executing the select kernel
- *
- * Select is computed by:
- * @f[ output(i) = condition(i) ? x(i) : y(i) @f]
- **/
-class CLSelectKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLSelectKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLSelectKernel(const CLSelectKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLSelectKernel &operator=(const CLSelectKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLSelectKernel(CLSelectKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLSelectKernel &operator=(CLSelectKernel &&) = default;
- /** Default destructor */
- ~CLSelectKernel() = default;
- /** Initialise the kernel's inputs and output.
- *
- * @param[in] c Condition input tensor. Data types supported: U8.
- * @param[in] x First input tensor. Data types supported: All.
- * @param[out] y Second input tensor. Data types supported: Same as @p x
- * @param[in] output Output tensor. Data types supported: Same as @p x.
- */
- void configure(const ICLTensor *c, const ICLTensor *x, const ICLTensor *y, ICLTensor *output);
- /** Initialise the kernel's inputs and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] c Condition input tensor. Data types supported: U8.
- * @param[in] x First input tensor. Data types supported: All.
- * @param[out] y Second input tensor. Data types supported: Same as @p x
- * @param[in] output Output tensor. Data types supported: Same as @p x.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *c, const ICLTensor *x, const ICLTensor *y, ICLTensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLSelectKernel
- *
- * @param[in] c Condition input tensor. Data types supported: U8.
- * @param[in] x First input tensor. Data types supported: All.
- * @param[in] y Second input tensor. Data types supported: Same as @p x
- * @param[in] output Output tensor. Data types supported: Same as @p x.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *c, const ITensorInfo *x, const ITensorInfo *y, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_c; /**< Condition tensor */
- const ICLTensor *_x; /**< Source tensor 1 */
- const ICLTensor *_y; /**< Source tensor 2 */
- ICLTensor *_output; /**< Destination tensor */
- bool _has_same_rank; /**< Flag that indicates if condition tensor and other inputs have the same rank */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLWHEREKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLSOBEL3X3KERNEL_H
-#define ARM_COMPUTE_CLSOBEL3X3KERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the kernel to run a 3x3 Sobel filter on a tensor. */
-class CLSobel3x3Kernel : public ICLKernel
-{
-public:
- /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */
- CLSobel3x3Kernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLSobel3x3Kernel(const CLSobel3x3Kernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLSobel3x3Kernel &operator=(const CLSobel3x3Kernel &) = delete;
- /** Allow instances of this class to be moved */
- CLSobel3x3Kernel(CLSobel3x3Kernel &&) = default;
- /** Allow instances of this class to be moved */
- CLSobel3x3Kernel &operator=(CLSobel3x3Kernel &&) = default;
- /** Default destructor */
- ~CLSobel3x3Kernel() = default;
- /** Initialise the kernel's source, destination and border.
- *
- * @note At least one of output_x or output_y must be set.
- *
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16.
- * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
- /** Initialise the kernel's source, destination and border.
- *
- * @note At least one of output_x or output_y must be set.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16.
- * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-private:
- const ICLTensor *_input; /**< Input tensor */
- ICLTensor *_output_x; /**< Output tensor for Sobel X */
- ICLTensor *_output_y; /**< Output tensor for Sobel Y */
- bool _run_sobel_x; /**< Do we need to run Sobel X ? */
- bool _run_sobel_y; /**< Do we need to run Sobel Y ? */
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLSOBEL3X3KERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLSOBEL5X5KERNEL_H
-#define ARM_COMPUTE_CLSOBEL5X5KERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the kernel to run the horizontal pass of 5x5 Sobel filter on a tensor. */
-class CLSobel5x5HorKernel : public ICLKernel
-{
-public:
- /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */
- CLSobel5x5HorKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLSobel5x5HorKernel(const CLSobel5x5HorKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLSobel5x5HorKernel &operator=(const CLSobel5x5HorKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLSobel5x5HorKernel(CLSobel5x5HorKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLSobel5x5HorKernel &operator=(CLSobel5x5HorKernel &&) = default;
- /** Default destructor */
- ~CLSobel5x5HorKernel() = default;
-
- /** Initialise the kernel's source, destination and border.
- *
- * @note At least one of output_x or output_y must be set.
- *
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16.
- * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
- /** Initialise the kernel's source, destination and border.
- *
- * @note At least one of output_x or output_y must be set.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16.
- * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-private:
- const ICLTensor *_input; /**< Input tensor */
- ICLTensor *_output_x; /**< X output of horizontal pass */
- ICLTensor *_output_y; /**< Y output of horizontal pass */
- bool _run_sobel_x; /**< Do we need to run Sobel X ? */
- bool _run_sobel_y; /**< Do we need to run Sobel Y ? */
- BorderSize _border_size; /**< Border size */
-};
-
-/** Interface for the kernel to run the vertical pass of 5x5 Sobel filter on a tensor. */
-class CLSobel5x5VertKernel : public ICLKernel
-{
-public:
- /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */
- CLSobel5x5VertKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLSobel5x5VertKernel(const CLSobel5x5VertKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLSobel5x5VertKernel &operator=(const CLSobel5x5VertKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLSobel5x5VertKernel(CLSobel5x5VertKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLSobel5x5VertKernel &operator=(CLSobel5x5VertKernel &&) = default;
- /** Default destructor */
- ~CLSobel5x5VertKernel() = default;
-
- /** Initialise the kernel's source, destination and border.
- *
- * @note At least one of output_x or output_y must be set and the corresponding input.
- *
- * @param[in] input_x (Optional) Input for X (X output of horizontal pass). Data types supported: S16.
- * @param[in] input_y (Optional) Input for Y (Y output of horizontal pass). Data types supported: S16.
- * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16.
- * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
- /** Initialise the kernel's source, destination and border.
- *
- * @note At least one of output_x or output_y must be set and the corresponding input.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input_x (Optional) Input for X (X output of horizontal pass). Data types supported: S16.
- * @param[in] input_y (Optional) Input for Y (Y output of horizontal pass). Data types supported: S16.
- * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16.
- * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-private:
- const ICLTensor *_input_x; /**< X input (X output of the horizontal pass) */
- const ICLTensor *_input_y; /**< Y input (Y output of the horizontal pass) */
- ICLTensor *_output_x; /**< X output of sobel */
- ICLTensor *_output_y; /**< Y output of sobel */
- bool _run_sobel_x; /**< Do we need to run sobel X? */
- bool _run_sobel_y; /**< Do we need to run sobel Y? */
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLSOBEL5X5KERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLSOBEL7X7KERNEL_H
-#define ARM_COMPUTE_CLSOBEL7X7KERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the kernel to run the horizontal pass of 7x7 Sobel filter on a tensor. */
-class CLSobel7x7HorKernel : public ICLKernel
-{
-public:
- /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */
- CLSobel7x7HorKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLSobel7x7HorKernel(const CLSobel7x7HorKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLSobel7x7HorKernel &operator=(const CLSobel7x7HorKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLSobel7x7HorKernel(CLSobel7x7HorKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLSobel7x7HorKernel &operator=(CLSobel7x7HorKernel &&) = default;
- /** Default destructor */
- ~CLSobel7x7HorKernel() = default;
-
- /** Initialise the kernel's source, destination and border.
- *
- * @note At least one of output_x or output_y must be set.
- *
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S32.
- * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S32.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
- /** Initialise the kernel's source, destination and border.
- *
- * @note At least one of output_x or output_y must be set.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S32.
- * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S32.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-private:
- const ICLTensor *_input; /**< Input tensor */
- ICLTensor *_output_x; /**< X output of horizontal pass */
- ICLTensor *_output_y; /**< Y output of horizontal pass */
- bool _run_sobel_x; /**< Do we need to run Sobel X ? */
- bool _run_sobel_y; /**< Do we need to run Sobel Y ? */
- BorderSize _border_size; /**< Border size */
-};
-
-/** Interface for the kernel to run the vertical pass of 7x7 Sobel filter on a tensor. */
-class CLSobel7x7VertKernel : public ICLKernel
-{
-public:
- /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */
- CLSobel7x7VertKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLSobel7x7VertKernel(const CLSobel7x7VertKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLSobel7x7VertKernel &operator=(const CLSobel7x7VertKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLSobel7x7VertKernel(CLSobel7x7VertKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLSobel7x7VertKernel &operator=(CLSobel7x7VertKernel &&) = default;
- /** Default destructor */
- ~CLSobel7x7VertKernel() = default;
-
- /** Initialise the kernel's source, destination and border.
- *
- * @note At least one of output_x or output_y must be set and the corresponding input.
- *
- * @param[in] input_x (Optional) Input for X (X output of horizontal pass). Data types supported: S32.
- * @param[in] input_y (Optional) Input for Y (Y output of horizontal pass). Data types supported: S32.
- * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S32.
- * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S32.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
- /** Initialise the kernel's source, destination and border.
- *
- * @note At least one of output_x or output_y must be set and the corresponding input.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input_x (Optional) Input for X (X output of horizontal pass). Data types supported: S32.
- * @param[in] input_y (Optional) Input for Y (Y output of horizontal pass). Data types supported: S32.
- * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S32.
- * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S32.
- * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-private:
- const ICLTensor *_input_x; /**< X input (X output of the horizontal pass) */
- const ICLTensor *_input_y; /**< Y input (Y output of the horizontal pass) */
- ICLTensor *_output_x; /**< X output of sobel */
- ICLTensor *_output_y; /**< Y output of sobel */
- bool _run_sobel_x; /**< Do we need to run sobel X? */
- bool _run_sobel_y; /**< Do we need to run sobel Y? */
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLSOBEL7X7KERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLSOFTMAXLAYERKERNEL_H
-#define ARM_COMPUTE_CLSOFTMAXLAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLSimple3DKernel.h"
-#include "arm_compute/core/KernelDescriptors.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for max, shifting, exponentiating and summing the logits */
-class CLLogits1DMaxShiftExpSumKernel : public ICLKernel
-{
-public:
- /** Info for whether a parallel reduction will be run and the vector size of the execution. */
- using ParallelReductionInfo = std::tuple<bool, unsigned int>;
-
-public:
- /** Default constructor */
- CLLogits1DMaxShiftExpSumKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLLogits1DMaxShiftExpSumKernel(const CLLogits1DMaxShiftExpSumKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLLogits1DMaxShiftExpSumKernel &operator=(const CLLogits1DMaxShiftExpSumKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLLogits1DMaxShiftExpSumKernel(CLLogits1DMaxShiftExpSumKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLLogits1DMaxShiftExpSumKernel &operator=(CLLogits1DMaxShiftExpSumKernel &&) = default;
- /** Set the input and output tensors.
- *
- * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
- * @param[in,out] max Max values tensor. Data types supported: same as @p input
- * @param[out] output Destination tensor. Data types supported: same as @p input
- * @param[out] sum Sum of 1D logits tensor. Data types supported: same as @p input
- * @param[in] info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo.
- */
- void configure(const ICLTensor *input, ICLTensor *max, ICLTensor *output, ICLTensor *sum, const SoftmaxKernelInfo &info);
- /** Set the input and output tensors.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
- * @param[in,out] max Max values tensor. Data types supported: same as @p input
- * @param[out] output Destination tensor. Data types supported: same as @p input
- * @param[out] sum Sum of 1D logits tensor. Data types supported: same as @p input
- * @param[in] info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *max, ICLTensor *output, ICLTensor *sum, const SoftmaxKernelInfo &info);
- /** Static function to check if given info will lead to a valid configuration of @ref CLLogits1DMaxShiftExpSumKernel
- *
- * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
- * @param[in] max Max values tensor. Data types supported: same as @p input
- * @param[in] output Destination tensor. Data types supported: same as @p input
- * @param[in] sum Sum of 1D logits tensor. Data types supported: same as @p input
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *max, const ITensorInfo *output, const ITensorInfo *sum);
- /** Checks if the given size is eligible for parallel reduction
- *
- * @note Serial reduction is launched for width < (_grid_size * _serial_vector_size).
- * @note Parallel reduction is launched for width >= (_grid_size * _serial_vector_size) and vector_size is forced to 4.
- *
- * @param[in] size Size to check
- *
- * @return A two-element tuple where the first element is a boolean specifying if a parallel reduction will be run,
- * while the second element is the vector size of the execution.
- */
- static ParallelReductionInfo is_parallel_reduction(size_t size);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_max;
- ICLTensor *_output;
- ICLTensor *_sum;
-
-private:
- static const unsigned int _grid_size;
- static const unsigned int _serial_vector_size;
- static const unsigned int _parallel_vector_size;
-};
-/** Interface for calculating the final step of the Softmax Layer where each logit value is multiplied by the inverse of the sum of the logits. */
-class CLLogits1DNormKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLLogits1DNormKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLLogits1DNormKernel(const CLLogits1DNormKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLLogits1DNormKernel &operator=(const CLLogits1DNormKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLLogits1DNormKernel(CLLogits1DNormKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLLogits1DNormKernel &operator=(CLLogits1DNormKernel &&) = default;
- /** Set the input and output tensors.
- *
- * @param[in] input Source tensor. Data types supported: S32/F16/F32. If this kernel is used for log softmax, only F32/F16 is supported.
- * @param[in] sum Sum tensor. Dimensions should be dim(input)-1. Data types supported: same as @p input
- * @param[out] output Destination tensor. Data types supported: QASYMM8/QASYMM8_SIGNED for S32 @p input, or same as @p input
- * @param[in] info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo.
- */
- void configure(const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, const SoftmaxKernelInfo &info);
- /** Set the input and output tensors.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: S32/F16/F32. If this kernel is used for log softmax, only F32/F16 is supported.
- * @param[in] sum Sum tensor. Dimensions should be dim(input)-1. Data types supported: same as @p input
- * @param[out] output Destination tensor. Data types supported: QASYMM8/QASYMM8_SIGNED for S32 @p input, or same as @p input
- * @param[in] info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, const SoftmaxKernelInfo &info);
- /** Static function to check if given info will lead to a valid configuration of @ref CLLogits1DNormKernel
- *
- * @param[in] input Source tensor. Data types supported: S32/F16/F32. If this kernel is used for log softmax, only F32/F16 is supported.
- * @param[in] sum Sum tensor. Dimensions should be dim(input)-1. Data types supported: same as @p input
- * @param[in] output Destination tensor. Data types supported: QASYMM8 for S32 @p input, or same as @p input
- * @param[in] info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, const SoftmaxKernelInfo &info);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- const ICLTensor *_sum;
- ICLTensor *_output;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLSOFTMAXLAYERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLSPACETOBATCHLAYERKERNEL_H
-#define ARM_COMPUTE_CLSPACETOBATCHLAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the space to batch kernel */
-class CLSpaceToBatchLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLSpaceToBatchLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLSpaceToBatchLayerKernel(const CLSpaceToBatchLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLSpaceToBatchLayerKernel &operator=(const CLSpaceToBatchLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLSpaceToBatchLayerKernel(CLSpaceToBatchLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLSpaceToBatchLayerKernel &operator=(CLSpaceToBatchLayerKernel &&) = default;
- /** Default destructor */
- ~CLSpaceToBatchLayerKernel() = default;
- /** Initialise the kernel's inputs and output.
- *
- * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
- * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32
- * @param[in] paddings 2-D tensor with shape [2, M]. Data types supported: S32
- * @param[out] output Tensor output. Data types supported: same as @p input
- */
- void configure(const ICLTensor *input, const ICLTensor *block_shape, const ICLTensor *paddings, ICLTensor *output);
- /** Initialise the kernel's inputs and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
- * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32
- * @param[in] paddings 2-D tensor with shape [2, M]. Data types supported: S32
- * @param[out] output Tensor output. Data types supported: same as @p input
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *block_shape, const ICLTensor *paddings, ICLTensor *output);
- /** Initialise the kernel's input and output. (Static block shape and paddings)
- *
- * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
- * @param[in] block_shape_x Block shape x value.
- * @param[in] block_shape_y Block shape y value.
- * @param[in] padding_left The left padding of the output tensor.
- * @param[in] padding_right The right padding of the output tensor.
- * @param[out] output Tensor output. Data types supported: same as @p input
- */
- void configure(const ICLTensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, ICLTensor *output);
- /** Initialise the kernel's input and output. (Static block shape and paddings)
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
- * @param[in] block_shape_x Block shape x value.
- * @param[in] block_shape_y Block shape y value.
- * @param[in] padding_left The left padding of the output tensor.
- * @param[in] padding_right The right padding of the output tensor.
- * @param[out] output Tensor output. Data types supported: same as @p input
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, ICLTensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLSpaceToBatchLayerKernel
- *
- * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
- * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32
- * @param[in] paddings 2-D tensor with shape [2, M]. Data types supported: S32
- * @param[in] output Tensor output. Data types supported: same as @p input
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *paddings, const ITensorInfo *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLSpaceToBatchLayerKernel (Static block shape and paddings)
- *
- * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
- * @param[in] block_shape_x Block shape x value.
- * @param[in] block_shape_y Block shape y value.
- * @param[in] padding_left The left padding of the output tensor.
- * @param[in] padding_right The right padding of the output tensor.
- * @param[in] output Tensor output. Data types supported: same as @p input
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input; /**< Source tensor */
- const ICLTensor *_block_shape; /**< Block shape tensor */
- const ICLTensor *_paddings; /**< Paddings tensor */
- ICLTensor *_output; /**< Destination tensor */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLSPACETOBATCHLAYERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2019-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLSPACETODEPTHLAYERKERNEL_H
-#define ARM_COMPUTE_CLSPACETODEPTHLAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the space to depth kernel */
-class CLSpaceToDepthLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLSpaceToDepthLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLSpaceToDepthLayerKernel(const CLSpaceToDepthLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLSpaceToDepthLayerKernel &operator=(const CLSpaceToDepthLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLSpaceToDepthLayerKernel(CLSpaceToDepthLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLSpaceToDepthLayerKernel &operator=(CLSpaceToDepthLayerKernel &&) = default;
- /** Default destructor */
- ~CLSpaceToDepthLayerKernel() = default;
- /** Initialise the kernel's inputs and output.
- *
- * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
- * @param[out] output Tensor output. Data types supported: same as @p input
- * @param[in] block_shape Block shape value.
- */
- void configure(const ICLTensor *input, ICLTensor *output, int32_t block_shape);
- /** Initialise the kernel's inputs and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
- * @param[out] output Tensor output. Data types supported: same as @p input
- * @param[in] block_shape Block shape value.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t block_shape);
- /** Static function to check if given info will lead to a valid configuration of @ref CLSpaceToDepthLayerKernel.
- *
- * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All.
- * @param[in] output Tensor output info. Data types supported: same as @p input
- * @param[in] block_shape Block shape value.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input; /**< Source tensor */
- ICLTensor *_output; /**< Destination tensor */
- int32_t _block_shape; /**< Block shape */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLSPACETODEPTHLAYERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef ARM_COMPUTE_CLSTACKLAYERKERNEL_H
-#define ARM_COMPUTE_CLSTACKLAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to stacks a rank-R tensor into one with rank-(R+1) along the axis dimension.*/
-class CLStackLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLStackLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLStackLayerKernel(const CLStackLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLStackLayerKernel &operator=(const CLStackLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLStackLayerKernel(CLStackLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLStackLayerKernel &operator=(CLStackLayerKernel &&) = default;
- /** Default destructor */
- ~CLStackLayerKernel() = default;
- /** Initialise the kernel's inputs and output
- *
- * @note Supported input tensor rank: up to 4
- *
- * @param[in] input Input tensor. Data types supported: All.
- * @param[in] axis The dimension to stack the tensors along. It must be smaller than the number of input dimensions.
- * @param[in] idx_input Index of the input tensor in the list of tensors to stack.
- * All tensors in the list must have the same shape
- * @param[in] num_tensors Number of tensors to stack
- * @param[out] output Output tensor. Data types supported: Same as @p input.
- *
- */
- void configure(const ICLTensor *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, ICLTensor *output);
- /** Initialise the kernel's inputs and output
- *
- * @note Supported input tensor rank: up to 4
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input tensor. Data types supported: All.
- * @param[in] axis The dimension to stack the tensors along. It must be smaller than the number of input dimensions.
- * @param[in] idx_input Index of the input tensor in the list of tensors to stack.
- * All tensors in the list must have the same shape
- * @param[in] num_tensors Number of tensors to stack
- * @param[out] output Output tensor. Data types supported: Same as @p input.
- *
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, ICLTensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLStackLayerKernel
- *
- * @note Supported input tensor rank: up to 4
- *
- * @param[in] input Input tensor info. Data types supported: All.
- * @param[in] axis The dimension to stack the tensors along. It must be smaller than the number of input dimensions.
- * @param[in] idx_input Index of the input tensor in the list of tensors to stack
- * All tensors in the list must have the same shape
- * @param[in] num_tensors Number of tensors to stack
- * @param[in] output Output tensor info. Data types supported: Same as @p input.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLSTACKLAYERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL_STRIDED_SLICE_KERNEL_H
-#define ARM_COMPUTE_CL_STRIDED_SLICE_KERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-/** Interface for the kernel to perform tensor strided slicing */
-class CLStridedSliceKernel : public ICLKernel
-{
-public:
- /** Configure kernel
- *
- * @note Supported tensor rank: up to 4
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor info. Data type supported: All.
- * @param[out] output Destination tensor info. Data type supported: Same as @p input
- * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input).
- * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input).
- * @param[in] strides The strides of the dimensions of the input tensor to be sliced. The length must be of rank(input).
- * @param[in] begin_mask If the ith bit of begin_mask is set, starts[i] is ignored and the fullest possible range in that dimension is used instead.
- * @param[in] end_mask If the ith bit of end_mask is set, ends[i] is ignored and the fullest possible range in that dimension is used instead.
- * @param[in] shrink_axis_mask If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1.
- * A slice of size 1 starting from starts[i] in the dimension must be preserved.
- */
- void configure(const CLCompileContext &compile_context, const ITensorInfo *input, ITensorInfo *output,
- const Coordinates &starts, const Coordinates &ends, const BiStrides &strides,
- int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask);
-
- /** Static function to check if given info will lead to a valid configuration of @ref CLStridedSliceKernel
- *
- * @note Supported tensor rank: up to 4
- *
- * @param[in] input Source tensor. Data type supported: All.
- * @param[in] output Destination tensor. Data type supported: Same as @p input
- * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input).
- * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input).
- * @param[in] strides The strides of the dimensions of the input tensor to be sliced. The length must be of rank(input).
- * @param[in] begin_mask If the ith bit of begin_mask is set, starts[i] is ignored and the fullest possible range in that dimension is used instead.
- * @param[in] end_mask If the ith bit of end_mask is set, ends[i] is ignored and the fullest possible range in that dimension is used instead.
- * @param[in] shrink_axis_mask If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1.
- * A slice of size 1 starting from starts[i] in the dimension must be preserved.
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output,
- const Coordinates &starts, const Coordinates &ends, const BiStrides &strides,
- int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask);
-
- // Inherited methods overridden:
- void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CL_STRIDED_SLICE_KERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLTABLELOOKUPKERNEL_H
-#define ARM_COMPUTE_CLTABLELOOKUPKERNEL_H
-
-#include "arm_compute/core/CL/ICLSimple2DKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-class ICLLut;
-
-/** Interface for the kernel to perform table lookup calculations. */
-class CLTableLookupKernel : public ICLSimple2DKernel
-{
-public:
- /** Initialise the kernel's input, lut and output.
- *
- * @param[in] input An input tensor. Data types supported: U8, S16.
- * @param[in] lut The input LUT. Data types supported: U8, S16.
- * @param[out] output The output tensor. Data types supported: U8, S16.
- */
- void configure(const ICLTensor *input, const ICLLut *lut, ICLTensor *output);
- /** Initialise the kernel's input, lut and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input An input tensor. Data types supported: U8, S16.
- * @param[in] lut The input LUT. Data types supported: U8, S16.
- * @param[out] output The output tensor. Data types supported: U8, S16.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLLut *lut, ICLTensor *output);
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLTABLELOOKUPKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLTHRESHOLDKERNEL_H
-#define ARM_COMPUTE_CLTHRESHOLDKERNEL_H
-
-#include "arm_compute/core/CL/ICLSimple2DKernel.h"
-#include "arm_compute/core/KernelDescriptors.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ICLTensor;
-
-/** Interface for the thresholding kernel. */
-class CLThresholdKernel : public ICLSimple2DKernel
-{
-public:
- /**Initialise the kernel's input, output and threshold parameters.
- *
- * @param[in] input An input tensor. Data types supported: U8
- * @param[out] output The output tensor. Data types supported: U8.
- * @param[in] info Threshold descriptor
- */
- void configure(const ICLTensor *input, ICLTensor *output, const ThresholdKernelInfo &info);
- /**Initialise the kernel's input, output and threshold parameters.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input An input tensor. Data types supported: U8
- * @param[out] output The output tensor. Data types supported: U8.
- * @param[in] info Threshold descriptor
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ThresholdKernelInfo &info);
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NETHRESHOLDKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLTILEKERNEL_H
-#define ARM_COMPUTE_CLTILEKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to perform a Tile operation */
-class CLTileKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLTileKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLTileKernel(const CLTileKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLTileKernel &operator=(const CLTileKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLTileKernel(CLTileKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLTileKernel &operator=(CLTileKernel &&) = default;
- /** Default destructor */
- ~CLTileKernel() = default;
- /** Set the source, destination of the kernel
- *
- * @param[in] input Source tensor. Data type supported: All.
- * @param[in] multiples Contains the number of times the input tensor should be replicated on the given dimension.
- * Cannot have more than 4 elements (tiling in dimensions greater than 4 is not supported).
- * @param[out] output Destination tensor. Same as @p input
- *
- */
- void configure(const ICLTensor *input, ICLTensor *output, const Multiples &multiples);
- /** Set the source, destination of the kernel
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data type supported: All.
- * @param[in] multiples Contains the number of times the input tensor should be replicated on the given dimension.
- * Cannot have more than 4 elements (tiling in dimensions greater than 4 is not supported).
- * @param[out] output Destination tensor. Same as @p input
- *
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Multiples &multiples);
- /** Static function to check if given info will lead to a valid configuration of @ref CLTileKernel
- *
- * @param[in] input Source tensor info. Data type supported: All.
- * @param[in] multiples Contains the number of times the input tensor should be replicated on the given dimension.
- * Cannot have more than 4 elements (tiling in dimensions greater than 4 is not supported).
- * @param[in] output Destination tensor info. Same as @p input
- *
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Multiples &multiples);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLTILEKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLTRANSPOSEKERNEL_H
-#define ARM_COMPUTE_CLTRANSPOSEKERNEL_H
-
-#include "arm_compute/core/CL/ICLSimple2DKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel which transposes the elements of a matrix.
- *
- * [width, height, batch] -> [height, width, batch]
- *
- */
-class CLTransposeKernel : public ICLSimple2DKernel
-{
-public:
- /** Initialise the kernel's input and output.
- *
- * @param[in] input Input tensor. Data types supported: All.
- * @param[out] output Output tensor. Data type supported: Same as @p input
- */
- void configure(const ICLTensor *input, ICLTensor *output);
- /** Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input tensor. Data types supported: All.
- * @param[out] output Output tensor. Data type supported: Same as @p input
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLTransposeKernel
- *
- * @param[in] input Input tensor. Data types supported: All.
- * @param[in] output Output tensor. Data type supported: Same as @p input
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLTRANSPOSEKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLUPSAMPLELAYERKERNEL_H
-#define ARM_COMPUTE_CLUPSAMPLELAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the UpsampleLayer kernel on OpenCL. */
-class CLUpsampleLayerKernel : public ICLKernel
-{
-public:
- /** Constructor */
- CLUpsampleLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLUpsampleLayerKernel(const CLUpsampleLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLUpsampleLayerKernel &operator=(const CLUpsampleLayerKernel &) = delete;
- /** Default Move Constructor. */
- CLUpsampleLayerKernel(CLUpsampleLayerKernel &&) = default;
- /** Default move assignment operator */
- CLUpsampleLayerKernel &operator=(CLUpsampleLayerKernel &&) = default;
- /** Default destructor */
- ~CLUpsampleLayerKernel() = default;
-
- /** Initialise the kernel's input and output.
- *
- * @param[in] input Source tensor. Data types supported: All.
- * @param[out] output Destination tensor. Data types supported: same as @p input.
- * @param[in] info Contains stride information described in @ref Size2D.
- * @param[in] upsampling_policy Defines the policy to fill the intermediate pixels.
- */
- void configure(const ICLTensor *input, ICLTensor *output, const Size2D &info, const InterpolationPolicy upsampling_policy);
- /** Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: All.
- * @param[out] output Destination tensor. Data types supported: same as @p input.
- * @param[in] info Contains stride information described in @ref Size2D.
- * @param[in] upsampling_policy Defines the policy to fill the intermediate pixels.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Size2D &info, const InterpolationPolicy upsampling_policy);
- /** Static function to check if given info will lead to a valid configuration of @ref CLUpsampleLayerKernel
- *
- * @param[in] input Source tensor info. Data types supported: All.
- * @param[in] output Destination tensor info. Data types supported: same as @p input.
- * @param[in] info Contains stride information described in @ref Size2D.
- * @param[in] upsampling_policy Defines the policy to fill the intermediate pixels.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &info, const InterpolationPolicy upsampling_policy);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
- Size2D _info;
- DataLayout _data_layout;
- unsigned int _num_elems_processed_per_iteration_input_x;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLUPSAMPLELAYERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLWARPAFFINEKERNEL_H
-#define ARM_COMPUTE_CLWARPAFFINEKERNEL_H
-
-#include "arm_compute/core/CL/ICLSimple2DKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the warp affine kernel.*/
-class CLWarpAffineKernel : public ICLSimple2DKernel
-{
-public:
- /** Initialize the function's source, destination, interpolation policy and border_mode.
- *
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor, Data types supported: U8.
- * @param[in] matrix The perspective matrix. Must be 2x3 of type float
- * The matrix argument requires 9 values, the last 3 values are ignored.
- * @param[in] policy The interpolation type.
- */
- void configure(const ICLTensor *input, ICLTensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy);
- /** Initialize the function's source, destination, interpolation policy and border_mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor, Data types supported: U8.
- * @param[in] matrix The perspective matrix. Must be 2x3 of type float
- * The matrix argument requires 9 values, the last 3 values are ignored.
- * @param[in] policy The interpolation type.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy);
-
- // Inherited methods overridden:
- BorderSize border_size() const override;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLWARPAFFINEKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLWARPERSPECTIVEKERNEL_H
-#define ARM_COMPUTE_CLWARPERSPECTIVEKERNEL_H
-
-#include "arm_compute/core/CL/ICLSimple2DKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-/** Interface for the warp perspective kernel.*/
-class CLWarpPerspectiveKernel : public ICLSimple2DKernel
-{
-public:
- /** Initialize the function's source, destination, interpolation policy and border_mode.
- *
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor, Data types supported: U8.
- * @param[in] matrix The perspective matrix. Must be 3x3 of type float.
- * @param[in] policy The interpolation type.
- */
- void configure(const ICLTensor *input, ICLTensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy);
- /** Initialize the function's source, destination, interpolation policy and border_mode.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. Data types supported: U8.
- * @param[out] output Destination tensor, Data types supported: U8.
- * @param[in] matrix The perspective matrix. Must be 3x3 of type float.
- * @param[in] policy The interpolation type.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy);
-
- // Inherited methods overridden:
- BorderSize border_size() const override;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLWARPERSPECTIVEKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLWEIGHTSRESHAPEKERNEL_H
-#define ARM_COMPUTE_CLWEIGHTSRESHAPEKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-/** OpenCL kernel to perform reshaping on the weights used by convolution and locally connected layer
- *
- * Rearranges each 3-dimensional kernel to a single row leading to a matrix with linearized kernels.
- * In combination with the @ref CLIm2ColKernel can transform a convolution to a matrix multiplication.
- *
- * For example assuming a 3D weight kernel of 3x3 dimensions and depth of 2 we have:
- * @f[
- * \left( \begin{array}{ccc}
- * a000 & a001 & a002 \\
- * a010 & a011 & a012 \\
- * a020 & a021 & a022 \\
- * \end{array} \right)
- * \left( \begin{array}{ccc}
- * a100 & a101 & a102 \\
- * a110 & a111 & a112 \\
- * a120 & a121 & a122 \\
- * \end{array} \right)
- * \rightarrow
- * \left( \begin{array}{ccccccccc}
- * a000 & a001 & a002 & a010 & a011 & a012 & a020 & a021 & a022 & a100 & a101 & a102 & a110 & a111 & a112 & a120 & a121 & a122 \\
- * \end{array} \right)
- * @f]
- */
-class CLWeightsReshapeKernel : public ICLKernel
-{
-public:
- /** Constructor.*/
- CLWeightsReshapeKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLWeightsReshapeKernel(const CLWeightsReshapeKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLWeightsReshapeKernel &operator=(const CLWeightsReshapeKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLWeightsReshapeKernel(CLWeightsReshapeKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLWeightsReshapeKernel &operator=(CLWeightsReshapeKernel &&) = default;
- /** Default destructor */
- ~CLWeightsReshapeKernel() = default;
- /** Set the input and output of the kernel.
- *
- * @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared,
- * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared. Data types supported: All
- * @param[in] biases The shared biases tensor to append. Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with
- * dimensions [OFM, num_patches] if unshared. Data types supported: F16/F32, for quantized types this must be nullptr.
- * @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types.
- * @param[out] output The output tensor. Should be a 2D Tensor if there are no groups and the weights are not shared; a 3D Tensor otherwise.
- * Data types supported: Same as @p input
- * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout
- * Number of groups greater than one are only supported for NCHW data layout, and the number of weights must be a multiple of it.
- */
- void configure(const ICLTensor *input, const ICLTensor *biases, ICLTensor *output, unsigned int num_groups = 1);
- /** Set the input and output of the kernel.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared,
- * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared. Data types supported: All
- * @param[in] biases The shared biases tensor to append. Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with
- * dimensions [OFM, num_patches] if unshared. Data types supported: F16/F32, for quantized types this must be nullptr.
- * @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types.
- * @param[out] output The output tensor. Should be a 2D Tensor if there are no groups and the weights are not shared; a 3D Tensor otherwise.
- * Data types supported: Same as @p input
- * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout
- * Number of groups greater than one are only supported for NCHW data layout, and the number of weights must be a multiple of it.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *biases, ICLTensor *output, unsigned int num_groups = 1);
- /** Static function to check if given info will lead to a valid configuration of @ref CLWeightsReshapeKernel
- *
- * @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared,
- * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared. Data types supported: All
- * @param[in] biases The shared biases tensor to append. Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with
- * dimensions [OFM, num_patches] if unshared. Data types supported: F16/F32, for quantized types this must be nullptr.
- * @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types.
- * @param[in] output The output tensor. Should be a 2D Tensor if there are no groups and the weights are not shared; a 3D Tensor otherwise.
- * Data types supported: Same as @p input
- * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout
- * Number of groups greater than one are only supported for NCHW data layout, and the number of weights must be a multiple of it.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *biases, const ITensorInfo *output, unsigned int num_groups = 1);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- const ICLTensor *_biases;
- ICLTensor *_output;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLWEIGHTSRESHAPEKERNEL_H */
\ No newline at end of file
+++ /dev/null
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef ARM_COMPUTE_CLWIDTHCONCATENATE_2TENSORS_KERNEL_H
-#define ARM_COMPUTE_CLWIDTHCONCATENATE_2TENSORS_KERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-/** Interface for the width concatenate kernel of 2 tensors.
- * The input1 and input2 tensors will be concatenated into the output tensor.
- */
-class CLWidthConcatenate2TensorsKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLWidthConcatenate2TensorsKernel() = default;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLWidthConcatenate2TensorsKernel(const CLWidthConcatenate2TensorsKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLWidthConcatenate2TensorsKernel &operator=(const CLWidthConcatenate2TensorsKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLWidthConcatenate2TensorsKernel(CLWidthConcatenate2TensorsKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLWidthConcatenate2TensorsKernel &operator=(CLWidthConcatenate2TensorsKernel &&) = default;
- /** Default destructor */
- ~CLWidthConcatenate2TensorsKernel() = default;
- /** Initialise the kernel's input1s and output
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input1 First input tensor. Data types supported: All.
- * @param[in] input2 Second input tensor. Data types supported: same as @p input1
- * @param[out] output Output tensor. Data types supported: Same as @p input1.
- */
- void configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLWidthConcatenate2TensorsKernel
- *
- * @param[in] input1 First tensor info. Data types supported: All.
- * @param[in] input2 Second tensor info. Data types supported: same as @p input1
- * @param[in] output Output tensor info. Data types supported: Same as @p input1.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLWIDTHCONCATENATE_2TENSORS_KERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef ARM_COMPUTE_CLWIDTHCONCATENATE_4TENSORS_KERNEL_H
-#define ARM_COMPUTE_CLWIDTHCONCATENATE_4TENSORS_KERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-/** Interface for the width concatenate kernel of 4 tensors.
- * All input tensors will be concatenated into the output tensor.
- */
-class CLWidthConcatenate4TensorsKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLWidthConcatenate4TensorsKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLWidthConcatenate4TensorsKernel(const CLWidthConcatenate4TensorsKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLWidthConcatenate4TensorsKernel &operator=(const CLWidthConcatenate4TensorsKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLWidthConcatenate4TensorsKernel(CLWidthConcatenate4TensorsKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLWidthConcatenate4TensorsKernel &operator=(CLWidthConcatenate4TensorsKernel &&) = default;
- /** Default destructor */
- ~CLWidthConcatenate4TensorsKernel() = default;
- /** Initialise the kernel's input1s and output
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input1 First input tensor. Data types supported: All.
- * @param[in] input2 Second input tensor. Data types supported: same as @p input1
- * @param[in] input3 Third input tensor. Data types supported: same as @p input1
- * @param[in] input4 Fourth input tensor. Data types supported: same as @p input1
- * @param[out] output Output tensor. Data types supported: Same as @p input1.
- */
- void configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *input3, ITensorInfo *input4, ITensorInfo *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLWidthConcatenate4TensorsKernel
- *
- * @param[in] input1 First tensor info. Data types supported: All.
- * @param[in] input2 Second tensor info. Data types supported: same as @p input1
- * @param[in] input3 Third tensor info. Data types supported: same as @p input1
- * @param[in] input4 Fourth tensor info. Data types supported: same as @p input1
- * @param[in] output Output tensor info. Data types supported: Same as @p input1.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *input3, const ITensorInfo *input4, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLWIDTHCONCATENATE_4TENSORS_KERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef ARM_COMPUTE_CLWIDTHCONCATENATELAYERKERNEL_H
-#define ARM_COMPUTE_CLWIDTHCONCATENATELAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-/** Interface for the width concatenate kernel.
- * The input tensor will be concatenated into the output tensor.
- */
-class CLWidthConcatenateLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLWidthConcatenateLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLWidthConcatenateLayerKernel(const CLWidthConcatenateLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLWidthConcatenateLayerKernel &operator=(const CLWidthConcatenateLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLWidthConcatenateLayerKernel(CLWidthConcatenateLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLWidthConcatenateLayerKernel &operator=(CLWidthConcatenateLayerKernel &&) = default;
- /** Default destructor */
- ~CLWidthConcatenateLayerKernel() = default;
- /** Initialise the kernel's inputs and output
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input tensor. Data types supported: All.
- * @param[in] width_offset The offset on the X axis.
- * @param[in,out] output Output tensor. Data types supported: Same as @p input.
- *
- */
- void configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int width_offset, ITensorInfo *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLWidthConcatenateLayerKernel
- *
- * @param[in] input Input tensor info. Data types supported: All.
- * @param[in] width_offset The offset on the X axis.
- * @param[in] output Output tensor info. Data types supported: Same as @p input.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, unsigned int width_offset, const ITensorInfo *output);
-
- // Inherited methods overridden:
- void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_CLWIDTHCONCATENATELAYERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLWINOGRADFILTERTRANSFORMKERNEL_H
-#define ARM_COMPUTE_CLWINOGRADFILTERTRANSFORMKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the Winograd filter transform kernel. */
-class CLWinogradFilterTransformKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLWinogradFilterTransformKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLWinogradFilterTransformKernel(const CLWinogradFilterTransformKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLWinogradFilterTransformKernel &operator=(const CLWinogradFilterTransformKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLWinogradFilterTransformKernel(CLWinogradFilterTransformKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLWinogradFilterTransformKernel &operator=(CLWinogradFilterTransformKernel &&) = default;
- /** Default destructor */
- ~CLWinogradFilterTransformKernel() = default;
- /** Set the input and output tensor.
- *
- * @note Winograd filter transform supports the following configurations for NCWH data layout
- * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3),
- * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
- * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
- *
- * @note Winograd filter transform supports the following configurations for NHWC data layout
- * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
- * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
- *
- * Strides: only unit strides
- *
- * @param[in] input Source tensor. The input is a 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] (NCHW data layout) or [IFM, kernel_x, kernel_y, OFM] (NHWC data layout). Data types supported: F16/F32.
- * @param[out] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_filter_transform_shape. Data types supported: Same as @p input
- * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
- */
- void configure(const ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info);
- /** Set the input and output tensor.
- *
- * @note Winograd filter transform supports the following configurations for NCWH data layout
- * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3),
- * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
- * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
- *
- * @note Winograd filter transform supports the following configurations for NHWC data layout
- * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
- * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
- *
- * Strides: only unit strides
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. The input is a 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] (NCHW data layout) or [IFM, kernel_x, kernel_y, OFM] (NHWC data layout). Data types supported: F16/F32.
- * @param[out] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_filter_transform_shape. Data types supported: Same as @p input
- * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info);
- /** Static function to check if given info will lead to a valid configuration of @ref CLWinogradFilterTransformKernel
- *
- * @note Winograd filter transform supports the following configurations for NCWH data layout
- * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3),
- * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
- * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
- *
- * @note Winograd filter transform supports the following configurations for NHWC data layout
- * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
- * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
- *
- * Strides: only unit strides
- *
- * @param[in] input Source tensor. The input is a 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] (NCHW data layout) or [IFM, kernel_x, kernel_y, OFM] (NHWC data layout). Data types supported: F16/F32.
- * @param[out] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_filter_transform_shape. Data types supported: Same as @p input
- * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLWINOGRADFILTERTRANSFORMKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLWINOGRADINPUTTRANSFORMKERNEL_H
-#define ARM_COMPUTE_CLWINOGRADINPUTTRANSFORMKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to perform Winograd input transform.*/
-class CLWinogradInputTransformKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLWinogradInputTransformKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLWinogradInputTransformKernel(const CLWinogradInputTransformKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLWinogradInputTransformKernel &operator=(const CLWinogradInputTransformKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLWinogradInputTransformKernel(CLWinogradInputTransformKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLWinogradInputTransformKernel &operator=(CLWinogradInputTransformKernel &&) = default;
- /** Set the input and output of the kernel.
- *
- * @note Winograd input transform supports the following configurations for NCWH data layout
- * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3),
- * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
- * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
- *
- * @note Winograd input transform supports the following configurations for NHWC data layout
- * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
- * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
- *
- * Strides: only unit strides
- *
- * @param[in] input The input tensor to transform. Data types supported: F16/F32
- * @param[in] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_input_transform_shape. Data types supported: Same as @p input
- * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo.
- */
- void configure(const ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info);
- /** Set the input and output of the kernel.
- *
- * @note Winograd input transform supports the following configurations for NCWH data layout
- * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3),
- * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
- * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
- *
- * @note Winograd input transform supports the following configurations for NHWC data layout
- * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
- * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
- *
- * Strides: only unit strides
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input The input tensor to transform. Data types supported: F16/F32
- * @param[in] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_input_transform_shape. Data types supported: Same as @p input
- * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info);
- /** Static function to check if given info will lead to a valid configuration of @ref CLWinogradInputTransformKernel
- *
- * @note Winograd input transform supports the following configurations for NCWH data layout
- * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3),
- * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
- * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
- *
- * @note Winograd input transform supports the following configurations for NHWC data layout
- * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
- * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
- *
- * Strides: only unit strides
- *
- * @param[in] input The input tensor to transform. Data types supported: F16/F32
- * @param[in] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_input_transform_shape. Data types supported: Same as @p input
- * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-private:
- using WinogradKey = std::pair<std::pair<int, int>, std::pair<int, int>>;
-
- BorderSize _border_size;
- const ICLTensor *_input;
- ICLTensor *_output;
- DataLayout _data_layout;
- int _num_tiles_x;
- int _num_tiles_y;
- unsigned int _step_z;
-};
-} // arm_compute
-#endif /*ARM_COMPUTE_CLWINOGRADINPUTTRANSFORMKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLWINOGRADOUTPUTTRANSFORMKERNEL_H
-#define ARM_COMPUTE_CLWINOGRADOUTPUTTRANSFORMKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the Winograd output transform kernel. */
-class CLWinogradOutputTransformKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLWinogradOutputTransformKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLWinogradOutputTransformKernel(const CLWinogradOutputTransformKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLWinogradOutputTransformKernel &operator=(const CLWinogradOutputTransformKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLWinogradOutputTransformKernel(CLWinogradOutputTransformKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLWinogradOutputTransformKernel &operator=(CLWinogradOutputTransformKernel &&) = default;
- /** Default destructor */
- ~CLWinogradOutputTransformKernel() = default;
- /** Set the input and output tensor.
- *
- * @note Winograd output transform supports the following configurations for NCWH data layout
- * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3),
- * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
- * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
- *
- * @note Winograd output transform supports the following configurations for NHWC data layout
- * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
- * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
- *
- * Strides: only unit strides
- *
- * @param[in] input Source tensor with shape [C, N, K, batches]. Data types supported: F16/F32.
- * @param[in] bias Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. It can be a nullptr. Data type supported: as @p input
- * @param[out] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_output_transform_shape. Data types supported: Same as @p input
- * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- */
- void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const WinogradInfo &winograd_info, const ActivationLayerInfo &act_info = ActivationLayerInfo());
- /** Set the input and output tensor.
- *
- * @note Winograd output transform supports the following configurations for NCWH data layout
- * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3),
- * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
- * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
- *
- * @note Winograd output transform supports the following configurations for NHWC data layout
- * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
- * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
- *
- * Strides: only unit strides
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor with shape [C, N, K, batches]. Data types supported: F16/F32.
- * @param[in] bias Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. It can be a nullptr. Data type supported: as @p input
- * @param[out] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_output_transform_shape. Data types supported: Same as @p input
- * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const WinogradInfo &winograd_info,
- const ActivationLayerInfo &act_info = ActivationLayerInfo());
-
- /** Static function to check if given info will lead to a valid configuration of @ref CLWinogradOutputTransformKernel
- *
- * @note Winograd output transform supports the following configurations for NCWH data layout
- * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3),
- * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
- * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
- *
- * @note Winograd output transform supports the following configurations for NHWC data layout
- * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
- * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
- *
- * Strides: only unit strides
- *
- * @param[in] input Source tensor with shape [C, N, K, batches]. Data types supported: F16/F32.
- * @param[in] bias Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. It can be a nullptr. Data type supported: as @p input
- * @param[out] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_output_transform_shape. Data types supported: Same as @p input
- * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation @ref ActivationLayerInfo. Only RELU, BOUNDED_RELU, LU_BOUNDED_RELU, LEAKY_RELU and SOFT_RELU supported.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const WinogradInfo &winograd_info, const ActivationLayerInfo &act_info = ActivationLayerInfo());
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- using WinogradKey = std::pair<std::pair<int, int>, std::pair<int, int>>;
-
- const ICLTensor *_input;
- const ICLTensor *_bias;
- ICLTensor *_output;
- bool _is_nhwc;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLWINOGRADOUTPUTTRANSFORMKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLYOLOLAYERKERNEL_H
-#define ARM_COMPUTE_CLYOLOLAYERKERNEL_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the YOLO layer kernel that performs partial activation.
- * For each box, activate only:
- * - x and y position (channel 0 and 1 of each box)
- * - objectiveness (channel 4 of each box)
- * - classes (channel 5 to (classes - 5) of each box)
- */
-class CLYOLOLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLYOLOLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLYOLOLayerKernel(const CLYOLOLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLYOLOLayerKernel &operator=(const CLYOLOLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLYOLOLayerKernel(CLYOLOLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLYOLOLayerKernel &operator=(CLYOLOLayerKernel &&) = default;
- /** Default destructor */
- ~CLYOLOLayerKernel() = default;
- /** Set the input and output tensor.
- *
- * @note If the output tensor is a nullptr, the activation function will be performed in-place
- *
- * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result
- * of the activation function. Data types supported: F16/F32.
- * @param[out] output Destination tensor. Data type supported: same as @p input
- * @param[in] act_info Activation layer information.
- * @param[in] num_classes Number of classes to activate (must be submultiple of @p input channels)
- */
- void configure(ICLTensor *input, ICLTensor *output, const ActivationLayerInfo &act_info, int32_t num_classes);
- /** Set the input and output tensor.
- *
- * @note If the output tensor is a nullptr, the activation function will be performed in-place
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result
- * of the activation function. Data types supported: F16/F32.
- * @param[out] output Destination tensor. Data type supported: same as @p input
- * @param[in] act_info Activation layer information.
- * @param[in] num_classes Number of classes to activate (must be submultiple of @p input channels)
- */
- void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const ActivationLayerInfo &act_info, int32_t num_classes);
- /** Static function to check if given info will lead to a valid configuration of @ref CLYOLOLayerKernel
- *
- * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result
- * of the activation function. Data types supported: F16/F32.
- * @param[in] output Destination tensor info. Data type supported: same as @p input
- * @param[in] act_info Activation layer information.
- * @param[in] num_classes Number of classes to activate (must be submultiple of @p input channels)
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info, int32_t num_classes);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- ICLTensor *_input;
- ICLTensor *_output;
- bool _run_in_place;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLYOLOLAYERKERNEL_H */
+++ /dev/null
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_ICLDEPTHWISECONVOLUTIONKERNEL3x3_H
-#define ARM_COMPUTE_ICLDEPTHWISECONVOLUTIONKERNEL3x3_H
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the kernel to run a 3x3 depthwise convolution on a tensor.
- */
-class ICLDepthwiseConvolutionLayer3x3Kernel : public ICLKernel
-{
-public:
- /** Default constructor */
- ICLDepthwiseConvolutionLayer3x3Kernel()
- : _border_size(0), _input(), _output(), _weights(), _biases(), _conv_stride_y(1), _output_multipliers(), _output_shifts(), _is_quantized(false)
- {
- }
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- ICLDepthwiseConvolutionLayer3x3Kernel(const ICLDepthwiseConvolutionLayer3x3Kernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- ICLDepthwiseConvolutionLayer3x3Kernel &operator=(const ICLDepthwiseConvolutionLayer3x3Kernel &) = delete;
- /** Default Move Constructor. */
- ICLDepthwiseConvolutionLayer3x3Kernel(ICLDepthwiseConvolutionLayer3x3Kernel &&) = default;
- /** Default move assignment operator */
- ICLDepthwiseConvolutionLayer3x3Kernel &operator=(ICLDepthwiseConvolutionLayer3x3Kernel &&) = default;
- /** Initialize the function's source, destination, conv and border_size.
- *
- * @param[in] input Source tensor. DataType supported: QASYMM8/F16/F32.
- * @param[in] weights Weights tensor. A 3D tensor with dimensions [3, 3, IFM].
- * Data type supported: Same as @p input, QASYMM8/QSYMM8_PER_CHANNEL when input is QASYMM8.
- * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input, S32 when input is QASYMM8.
- * @param[out] output Destination tensor. Data type supported: Same as @p input.
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported for QASYMM8.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization,
- * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
- * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization,
- * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
- */
- virtual void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
- unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U),
- const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) = 0;
- /** Initialize the function's source, destination, conv and border_size.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Source tensor. DataType supported: QASYMM8/F16/F32.
- * @param[in] weights Weights tensor. A 3D tensor with dimensions [3, 3, IFM].
- * Data type supported: Same as @p input, QASYMM8/QSYMM8_PER_CHANNEL when input is QASYMM8.
- * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
- * Data type supported: Same as @p input, S32 when input is QASYMM8.
- * @param[out] output Destination tensor. Data type supported: Same as @p input.
- * @param[in] conv_info Padding and stride information to use for the convolution.
- * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported for QASYMM8.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization,
- * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
- * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization,
- * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
- */
- virtual void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
- unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U),
- const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) = 0;
-
-protected:
- BorderSize _border_size;
- const ICLTensor *_input;
- ICLTensor *_output;
- const ICLTensor *_weights;
- const ICLTensor *_biases;
- unsigned int _conv_stride_y;
- const ICLTensor *_output_multipliers;
- const ICLTensor *_output_shifts;
- bool _is_quantized;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_ICLDEPTHWISECONVOLUTIONKERNEL3x3_H */
#ifndef ARM_COMPUTE_ICLOPERATOR_H
#define ARM_COMPUTE_ICLOPERATOR_H
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "arm_compute/core/Types.h"
+
#include "arm_compute/runtime/IOperator.h"
#include "arm_compute/runtime/IRuntimeContext.h"
#include "arm_compute/runtime/Types.h"
namespace arm_compute
{
+class ICLKernel;
namespace experimental
{
/** Basic interface for functions which have a single async CL kernel */
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#ifndef ARM_COMPUTE_ICLSIMPLEFUNCTION_H
#define ARM_COMPUTE_ICLSIMPLEFUNCTION_H
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
#include "arm_compute/runtime/IFunction.h"
#include <memory>
{
// Forward declarations
class CLRuntimeContext;
+class CLFillBorderKernel;
+class ICLKernel;
/** Basic interface for functions which have a single OpenCL kernel */
class ICLSimpleFunction : public IFunction
ICLSimpleFunction &operator=(const ICLSimpleFunction &) = delete;
/** Default move assignment operator */
ICLSimpleFunction &operator=(ICLSimpleFunction &&) = default;
+ /** Default destructor */
+ ~ICLSimpleFunction();
// Inherited methods overridden:
void run() override final;
protected:
- std::unique_ptr<ICLKernel> _kernel; /**< Kernel to run */
- CLFillBorderKernel _border_handler; /**< Kernel to handle borders */
- CLRuntimeContext *_ctx; /**< Context to use */
+ std::unique_ptr<ICLKernel> _kernel; /**< Kernel to run */
+ std::unique_ptr<CLFillBorderKernel> _border_handler; /**< Kernel to handle borders */
+ CLRuntimeContext *_ctx; /**< Context to use */
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_ICLSIMPLEFUNCTION_H */
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to run @ref CLAbsoluteDifferenceKernel
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to run @ref CLAccumulateKernel */
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLActivationLayerKernel
*
#ifndef ARM_COMPUTE_CLARGMINMAXLAYER_H
#define ARM_COMPUTE_CLARGMINMAXLAYER_H
-#include "arm_compute/core/CL/kernels/CLArgMinMaxLayerKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/functions/CLReshapeLayer.h"
{
class ITensorInfo;
class ICLTensor;
+class CLArgMinMaxLayerKernel;
/** Function to calculate the index of the minimum or maximum values in a
* tensor based on an axis.
* @param[in] memory_manager (Optional) Memory manager.
*/
CLArgMinMaxLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied */
+ CLArgMinMaxLayer(const CLArgMinMaxLayer &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLArgMinMaxLayer &operator=(const CLArgMinMaxLayer &) = delete;
+ /** Prevent instances of this class to be moved */
+ CLArgMinMaxLayer(CLArgMinMaxLayer &&) = delete;
+ /** Prevent instances of this class to be moved */
+ CLArgMinMaxLayer &operator=(CLArgMinMaxLayer &&) = delete;
+ /** Default destructor */
+ ~CLArgMinMaxLayer();
/** Set the input and output tensors.
*
* @param[in] input Input source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/S32/F16/F32.
void run() override;
private:
- MemoryGroup _memory_group;
- std::vector<CLTensor> _results_vector;
- CLTensor _not_reshaped_output;
- std::vector<CLArgMinMaxLayerKernel> _reduction_kernels_vector;
- CLReshapeLayer _reshape;
- unsigned int _num_of_stages;
- unsigned int _reduction_axis;
+ MemoryGroup _memory_group;
+ std::vector<CLTensor> _results_vector;
+ CLTensor _not_reshaped_output;
+ std::vector<std::unique_ptr<CLArgMinMaxLayerKernel>> _reduction_kernels_vector;
+ CLReshapeLayer _reshape;
+ unsigned int _num_of_stages;
+ unsigned int _reduction_axis;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_CLARGMINMAXLAYER_H */
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h"
#include "arm_compute/core/Types.h"
+#include <memory>
+
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
+class CLBatchNormalizationLayerKernel;
/** Basic function to run @ref CLNormalizationLayerKernel and simulate a batch normalization layer.
*
public:
/** Default constructor */
CLBatchNormalizationLayer();
+ /** Prevent instances of this class from being copied */
+ CLBatchNormalizationLayer(const CLBatchNormalizationLayer &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLBatchNormalizationLayer &operator=(const CLBatchNormalizationLayer &) = delete;
+ /** Prevent instances of this class to be moved */
+ CLBatchNormalizationLayer(CLBatchNormalizationLayer &&) = delete;
+ /** Prevent instances of this class to be moved */
+ CLBatchNormalizationLayer &operator=(CLBatchNormalizationLayer &&) = delete;
+ /** Default destructor */
+ ~CLBatchNormalizationLayer();
/** Set the input and output tensors.
*
* @note If the output tensor is a nullptr or is equal to the input, the batch normalization function will be performed in-place
void run() override;
private:
- CLBatchNormalizationLayerKernel _norm_kernel; /**< BatchNormalization layer kernel to run */
+ std::unique_ptr<CLBatchNormalizationLayerKernel> _norm_kernel; /**< BatchNormalization layer kernel to run */
};
-}
+} // namespace arm_compute
#endif /* ARM_COMPUTE_CLBATCHNORMALIZATIONLAYER_H */
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/CL/kernels/CLBatchToSpaceLayerKernel.h"
#include "arm_compute/core/Types.h"
+#include <memory>
+
namespace arm_compute
{
+class CLCompileContext;
+class ITensorInfo;
+class CLBatchToSpaceLayerKernel;
class ICLTensor;
/** Basic function to run @ref CLBatchToSpaceLayerKernel. */
public:
/** Default constructor */
CLBatchToSpaceLayer();
+ /** Prevent instances of this class from being copied */
+ CLBatchToSpaceLayer(const CLBatchToSpaceLayer &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLBatchToSpaceLayer &operator=(const CLBatchToSpaceLayer &) = delete;
+ /** Prevent instances of this class to be moved */
+ CLBatchToSpaceLayer(CLBatchToSpaceLayer &&) = delete;
+ /** Prevent instances of this class to be moved */
+ CLBatchToSpaceLayer &operator=(CLBatchToSpaceLayer &&) = delete;
+ /** Default destructor */
+ ~CLBatchToSpaceLayer();
/** Set the input and output tensors.
*
* @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
void run() override;
private:
- CLBatchToSpaceLayerKernel _batch_to_space_kernel; /**< CLBatchToSpaceLayerKernel to run */
+ std::unique_ptr<CLBatchToSpaceLayerKernel> _batch_to_space_kernel; /**< CLBatchToSpaceLayerKernel to run */
};
-}
+} // namespace arm_compute
#endif /* ARM_COMPUTE_CLBATCHTOSPACELAYER_H */
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to run @ref CLBitwiseAndKernel.
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to run @ref CLBitwiseNotKernel.
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to run @ref CLBitwiseOrKernel.
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to run @ref CLBitwiseXorKernel.
#ifndef ARM_COMPUTE_CLBOUNDINGBOXTRANSOFORM_H
#define ARM_COMPUTE_CLBOUNDINGBOXTRANSOFORM_H
-#include "arm_compute/core/CL/kernels/CLBoundingBoxTransformKernel.h"
+#include "arm_compute/core/Error.h"
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
namespace arm_compute
{
+class CLCompileContext;
+class CLBoundingBoxTransformKernel;
+class BoundingBoxTransformInfo;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLBoundingBoxTransformKernel.
*
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to execute box filter 3x3. This function calls the following OpenCL kernels:
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/CL/kernels/CLCannyEdgeKernel.h"
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/IMemoryManager.h"
#include "arm_compute/runtime/MemoryGroup.h"
namespace arm_compute
{
+class CLCompileContext;
+class CLFillBorderKernel;
+class CLGradientKernel;
+class CLEdgeNonMaxSuppressionKernel;
+class CLEdgeTraceKernel;
class ICLTensor;
/** Basic function to execute canny edge on OpenCL. This function calls the following OpenCL kernels and functions:
CLCannyEdge(const CLCannyEdge &) = delete;
/** Prevent instances of this class from being copied (As this class contains pointers) */
CLCannyEdge &operator=(const CLCannyEdge &) = delete;
+ /** Default destructor */
+ ~CLCannyEdge();
/** Initialise the function's source, destination, thresholds, gradient size, normalization type and border mode.
*
* @param[in,out] input Source tensor. Data types supported: U8. (Written to only for border_mode != UNDEFINED)
virtual void run() override;
private:
- MemoryGroup _memory_group; /**< Function's memory group */
- std::unique_ptr<IFunction> _sobel; /**< Pointer to Sobel kernel. */
- CLGradientKernel _gradient; /**< Gradient kernel. */
- CLFillBorderKernel _border_mag_gradient; /**< Fill border on magnitude tensor kernel */
- CLEdgeNonMaxSuppressionKernel _non_max_suppr; /**< Non-Maxima suppression kernel. */
- CLEdgeTraceKernel _edge_trace; /**< Edge tracing kernel. */
- CLImage _gx; /**< Source tensor - Gx component. */
- CLImage _gy; /**< Source tensor - Gy component. */
- CLImage _mag; /**< Source tensor - Magnitude. */
- CLImage _phase; /**< Source tensor - Phase. */
- CLImage _nonmax; /**< Source tensor - Non-Maxima suppressed. */
- CLImage _visited, _recorded, _l1_list_counter, _l1_stack; /**< Temporary tensors */
- ICLTensor *_output; /**< Output tensor provided by the user. */
+ MemoryGroup _memory_group; /**< Function's memory group */
+ std::unique_ptr<IFunction> _sobel; /**< Pointer to Sobel kernel. */
+ std::unique_ptr<CLGradientKernel> _gradient; /**< Gradient kernel. */
+ std::unique_ptr<CLFillBorderKernel> _border_mag_gradient; /**< Fill border on magnitude tensor kernel */
+ std::unique_ptr<CLEdgeNonMaxSuppressionKernel> _non_max_suppr; /**< Non-Maxima suppression kernel. */
+ std::unique_ptr<CLEdgeTraceKernel> _edge_trace; /**< Edge tracing kernel. */
+ CLImage _gx; /**< Source tensor - Gx component. */
+ CLImage _gy; /**< Source tensor - Gy component. */
+ CLImage _mag; /**< Source tensor - Magnitude. */
+ CLImage _phase; /**< Source tensor - Phase. */
+ CLImage _nonmax; /**< Source tensor - Non-Maxima suppressed. */
+ CLImage _visited, _recorded, _l1_list_counter, _l1_stack; /**< Temporary tensors */
+ ICLTensor *_output; /**< Output tensor provided by the user. */
};
-}
+} // namespace arm_compute
#endif /* ARM_COMPUTE_CLCANNYEDGE_H */
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLDepthConvertLayerKernel. */
class CLCast : public ICLSimpleFunction
namespace arm_compute
{
+class CLCompileContext;
class ICLMultiImage;
class ICLTensor;
using ICLImage = ICLTensor;
namespace arm_compute
{
+class CLCompileContext;
class ICLMultiImage;
class ICLTensor;
using ICLImage = ICLTensor;
#ifndef ARM_COMPUTE_CLCHANNELSHUFFLELAYER_H
#define ARM_COMPUTE_CLCHANNELSHUFFLELAYER_H
+#include "arm_compute/core/Error.h"
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLChannelShuffleLayerKernel
*
namespace arm_compute
{
+class CLCompileContext;
class ICLMultiImage;
class ICLTensor;
using ICLImage = ICLTensor;
namespace arm_compute
{
// Forward declarations
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLComparisonKernel */
class CLComparison : public ICLSimpleFunction
#ifndef ARM_COMPUTE_CLCOMPUTEALLANCHORS_H
#define ARM_COMPUTE_CLCOMPUTEALLANCHORS_H
-#include "arm_compute/core/CL/kernels/CLGenerateProposalsLayerKernel.h"
+#include "arm_compute/core/Error.h"
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
+class ComputeAnchorsInfo;
/** Basic function to run @ref CLComputeAllAnchorsKernel.
*
#include "arm_compute/runtime/CL/ICLOperator.h"
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/Types.h"
#include <memory>
namespace arm_compute
{
// Forward declarations
+class CLCompileContext;
class ICLTensor;
+class ICLKernel;
class ITensorInfo;
class Status;
#ifndef ARM_COMPUTE_CLCONVERTFULLYCONNECTEDWEIGHTS_H
#define ARM_COMPUTE_CLCONVERTFULLYCONNECTEDWEIGHTS_H
-#include "arm_compute/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
#include "arm_compute/runtime/ITransformWeights.h"
namespace arm_compute
{
+class CLCompileContext;
+class CLConvertFullyConnectedWeightsKernel;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLConvertFullyConnectedWeightsKernel. */
class CLConvertFullyConnectedWeights : public ICLSimpleFunction
#ifndef ARM_COMPUTE_CLCONVOLUTION_H
#define ARM_COMPUTE_CLCONVOLUTION_H
-#include "arm_compute/core/CL/kernels/CLConvolutionKernel.h"
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
namespace arm_compute
{
+template <unsigned int matrix_size>
+class CLConvolutionKernel;
+template <unsigned int matrix_size>
+class CLSeparableConvolutionHorKernel;
+template <unsigned int matrix_size>
+class CLSeparableConvolutionVertKernel;
+class CLFillBorderKernel;
class ICLTensor;
/** Basic function to execute convolution of size 3x3. This function calls the following OpenCL kernels:
public:
/** Default constructor */
CLConvolutionSquare(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLConvolutionSquare(const CLConvolutionSquare &) = delete;
+ /** Default move constructor */
+ CLConvolutionSquare(CLConvolutionSquare &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLConvolutionSquare &operator=(const CLConvolutionSquare &) = delete;
+ /** Default move assignment operator */
+ CLConvolutionSquare &operator=(CLConvolutionSquare &&) = default;
+ /** Default destructor */
+ ~CLConvolutionSquare();
/** Initialize the function's source, destination, conv and border_mode.
*
* @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
void run() override;
private:
- MemoryGroup _memory_group; /**< Function's memory group */
- CLTensor _tmp; /**< temporary buffer for output of horizontal pass */
- bool _is_separable; /**< true if the convolution can be separated */
- CLSeparableConvolutionHorKernel<matrix_size> _kernel_hor; /**< kernel for horizontal pass of separated convolution */
- CLSeparableConvolutionVertKernel<matrix_size> _kernel_vert; /**< kernel for vertical pass of separated convolution */
- CLConvolutionKernel<matrix_size> _kernel; /**< kernel for non-separated convolution **/
- CLFillBorderKernel _border_handler; /**< kernel for border handling */
+ MemoryGroup _memory_group; /**< Function's memory group */
+ CLTensor _tmp; /**< temporary buffer for output of horizontal pass */
+ bool _is_separable; /**< true if the convolution can be separated */
+ std::unique_ptr<CLSeparableConvolutionHorKernel<matrix_size>> _kernel_hor; /**< kernel for horizontal pass of separated convolution */
+ std::unique_ptr<CLSeparableConvolutionVertKernel<matrix_size>> _kernel_vert; /**< kernel for vertical pass of separated convolution */
+ std::unique_ptr<CLConvolutionKernel<matrix_size>> _kernel; /**< kernel for non-separated convolution **/
+ std::unique_ptr<CLFillBorderKernel> _border_handler; /**< kernel for border handling */
};
/** Basic function to run 5x5 convolution. */
public:
/** Default constructor */
CLConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Default Destructor */
+ ~CLConvolutionLayer();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLConvolutionLayer(const CLConvolutionLayer &) = delete;
+ /** Default move constructor */
+ CLConvolutionLayer(CLConvolutionLayer &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLConvolutionLayer &operator=(const CLConvolutionLayer &) = delete;
+ /** Default move assignment operator */
+ CLConvolutionLayer &operator=(CLConvolutionLayer &&) = default;
/** Set the input and output tensors.
*
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
class CLCopy : public ICLSimpleFunction
{
#define ARM_COMPUTE_CL_CROP_RESIZE_H
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLCopyKernel.h"
-#include "arm_compute/core/CL/kernels/CLCropKernel.h"
-#include "arm_compute/core/CL/kernels/CLMemsetKernel.h"
+
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/functions/CLScale.h"
namespace arm_compute
{
// Forward Declarations
+class CLCompileContext;
+class CLCopyKernel;
+class CLCropKernel;
class ITensor;
+class ITensorInfo;
/** Function to perform cropping and resizing */
class CLCropResize : public IFunction
/** Allow instances of this class to be moved */
CLCropResize &operator=(CLCropResize &&) = default;
/** Default destructor */
- virtual ~CLCropResize() = default;
+ ~CLCropResize();
/** Configure kernel
*
#ifndef ARM_COMPUTE_CLDECONVOLUTIONLAYERUPSAMPLE_H
#define ARM_COMPUTE_CLDECONVOLUTIONLAYERUPSAMPLE_H
-#include "arm_compute/runtime/IFunction.h"
-
-#include "arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h"
-#include "arm_compute/core/CL/kernels/CLMemsetKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <memory>
namespace arm_compute
{
// Forward declarations
+class CLDeconvolutionLayerUpsampleKernel;
+class CLCompileContext;
+class CLMemsetKernel;
class ICLTensor;
+class ITensorInfo;
/** Basic function to execute deconvolution upsample on OpenCL. This function calls the following OpenCL kernels and functions:
*
/** Allow instances of this class to be moved */
CLDeconvolutionLayerUpsample &operator=(CLDeconvolutionLayerUpsample &&) = default;
/** Default destructor */
- virtual ~CLDeconvolutionLayerUpsample() = default;
+ ~CLDeconvolutionLayerUpsample();
/** Initialize the function's source, destination, interpolation type and border_mode.
*
void run() override;
private:
- CLDeconvolutionLayerUpsampleKernel _upsample;
- CLMemsetKernel _memset;
- ICLTensor *_output;
+ std::unique_ptr<CLDeconvolutionLayerUpsampleKernel> _upsample;
+ std::unique_ptr<CLMemsetKernel> _memset;
+ ICLTensor *_output;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_CLDECONVOLUTIONLAYERUPSAMPLE_H */
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLDepthConvertLayerKernel. */
class CLDepthConvertLayer : public ICLSimpleFunction
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLDepthToSpaceLayerKernel. */
class CLDepthToSpaceLayer : public ICLSimpleFunction
#ifndef ARM_COMPUTE_CLDEPTHWISECONVOLUTION_H
#define ARM_COMPUTE_CLDEPTHWISECONVOLUTION_H
-#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h"
-#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h"
-#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h"
-#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h"
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
-#include "arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/functions/CLPermute.h"
namespace arm_compute
{
+class CLCompileContext;
+class CLFillBorderKernel;
+class CLDepthwiseConvolutionLayerNativeKernel;
+class CLDepthwiseConvolutionLayerReshapeWeightsKernel;
+class ICLDepthwiseConvolutionLayer3x3Kernel;
class ICLTensor;
/** Function to execute a depthwise convolution
CLDepthwiseConvolutionLayer &operator=(const CLDepthwiseConvolutionLayer &) = delete;
/** Default move assignment operator */
CLDepthwiseConvolutionLayer &operator=(CLDepthwiseConvolutionLayer &&) = default;
+ /** Default destructor */
+ ~CLDepthwiseConvolutionLayer();
/** Initialize the function's source, destination, weights and convolution information.
*
* @param[in, out] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/FP16/FP32. Data layout supported: NHWC, NCHW
};
private:
- MemoryGroup _memory_group;
- std::unique_ptr<ICLDepthwiseConvolutionLayer3x3Kernel> _kernel;
- CLFillBorderKernel _border_handler;
- CLPermute _permute_input_to_nchw;
- CLPermute _permute_weights_to_nchw;
- CLPermute _permute_output_to_nhwc;
- CLDepthwiseConvolutionLayerReshapeWeightsKernel _reshape_weights;
- CLTensor _permuted_input;
- CLTensor _permuted_weights;
- CLTensor _permuted_output;
- CLTensor _output_multipliers;
- CLTensor _output_shifts;
- const ITensor *_original_weights;
- const ITensor *_input;
- const ITensor *_output;
- bool _needs_permute;
- bool _needs_weights_reshape;
- bool _is_prepared;
- bool _is_quantized;
+ MemoryGroup _memory_group;
+ std::unique_ptr<ICLDepthwiseConvolutionLayer3x3Kernel> _kernel;
+ std::unique_ptr<CLFillBorderKernel> _border_handler;
+ CLPermute _permute_input_to_nchw;
+ CLPermute _permute_weights_to_nchw;
+ CLPermute _permute_output_to_nhwc;
+ std::unique_ptr<CLDepthwiseConvolutionLayerReshapeWeightsKernel> _reshape_weights;
+ CLTensor _permuted_input;
+ CLTensor _permuted_weights;
+ CLTensor _permuted_output;
+ CLTensor _output_multipliers;
+ CLTensor _output_shifts;
+ const ITensor *_original_weights;
+ const ITensor *_input;
+ const ITensor *_output;
+ bool _needs_permute;
+ bool _needs_weights_reshape;
+ bool _is_prepared;
+ bool _is_quantized;
};
/** Basic function to execute a generic depthwise convolution. This function calls the following OpenCL kernels:
private:
MemoryGroup _memory_group;
- CLDepthwiseConvolutionLayerNativeKernel _dwc_native_kernel;
- CLPermute _permute_input_to_nhwc;
- CLPermute _permute_weights_to_nhwc;
- CLPermute _permute_output_to_nchw;
+ std::unique_ptr<CLDepthwiseConvolutionLayerNativeKernel> _dwc_native_kernel;
+ CLPermute _permute_input_to_nhwc;
+ CLPermute _permute_weights_to_nhwc;
+ CLPermute _permute_output_to_nchw;
CLTensor _permuted_input;
CLTensor _permuted_weights;
namespace arm_compute
{
// Forward declarations
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLDequantizationLayerKernel that dequantizes an input tensor */
class CLDequantizationLayer : public ICLSimpleFunction
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to execute first order derivative operator. This function calls the following CL kernels:
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to execute dilate. This function calls the following OpenCL kernels:
#ifndef ARM_COMPUTE_CLDIRECTCONVOLUTIONLAYER_H
#define ARM_COMPUTE_CLDIRECTCONVOLUTIONLAYER_H
-#include "arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/functions/CLActivationLayer.h"
#include "arm_compute/runtime/IFunction.h"
namespace arm_compute
{
+class CLCompileContext;
+class CLDirectConvolutionLayerKernel;
+class CLFillBorderKernel;
class ICLTensor;
+class ITensorInfo;
/** Basic function to execute direct convolution function:
*/
public:
/** Default constructor */
CLDirectConvolutionLayer();
+ /** Prevent instances of this class from being copied */
+ CLDirectConvolutionLayer(const CLDirectConvolutionLayer &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLDirectConvolutionLayer &operator=(const CLDirectConvolutionLayer &) = delete;
+ /** Default destructor */
+ ~CLDirectConvolutionLayer();
/** Set the input and output tensors.
*
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
void run() override;
private:
- CLDirectConvolutionLayerKernel _direct_conv_kernel;
- CLFillBorderKernel _input_border_handler;
- CLActivationLayer _activationlayer_function;
+ std::unique_ptr<CLDirectConvolutionLayerKernel> _direct_conv_kernel;
+ std::unique_ptr<CLFillBorderKernel> _input_border_handler;
+ CLActivationLayer _activationlayer_function;
bool _is_activationlayer_enabled;
};
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to perform inverse square root on an input tensor. */
class CLRsqrtLayer : public IFunction
namespace arm_compute
{
class ICLTensor;
+class CLCompileContext;
+class ITensorInfo;
namespace experimental
{
#ifndef ARM_COMPUTE_CLEQUALIZEHISTOGRAM_H
#define ARM_COMPUTE_CLEQUALIZEHISTOGRAM_H
-#include "arm_compute/core/CL/kernels/CLHistogramKernel.h"
-#include "arm_compute/core/CL/kernels/CLTableLookupKernel.h"
#include "arm_compute/runtime/CL/CLDistribution1D.h"
#include "arm_compute/runtime/CL/CLLut.h"
#include "arm_compute/runtime/IFunction.h"
#include <cstdint>
+#include <memory>
namespace arm_compute
{
+class CLCompileContext;
+class CLHistogramKernel;
+class CLHistogramBorderKernel;
+class CLTableLookupKernel;
class ICLTensor;
using ICLImage = ICLTensor;
public:
/** Default Constructor. */
CLEqualizeHistogram();
+ /** Prevent instances of this class from being copied */
+ CLEqualizeHistogram(const CLEqualizeHistogram &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLEqualizeHistogram &operator=(const CLEqualizeHistogram &) = delete;
+ /** Default destructor */
+ ~CLEqualizeHistogram();
/** Initialise the kernel's inputs.
*
* @param[in] input Input image. Data types supported: U8.
void run() override;
private:
- CLHistogramKernel _histogram_kernel; /**< Kernel that calculates the histogram of input. */
- CLHistogramBorderKernel _border_histogram_kernel; /**< Kernel that calculates the histogram on the borders. */
- CLTableLookupKernel _map_histogram_kernel; /**< Kernel that maps the input to output using the lut. */
- CLDistribution1D _hist; /**< Distribution that holds the histogram of the input image. */
- CLDistribution1D _cum_dist; /**< Distribution that holds the cummulative distribution of the input histogram. */
- CLLut _cd_lut; /**< Holds the equalization lookuptable. */
- static const uint32_t max_range = 256; /**< Histogram range of the internal histograms. */
- static const uint32_t nr_bins = 256; /**< Histogram bins of the internal histograms. */
+ std::unique_ptr<CLHistogramKernel> _histogram_kernel; /**< Kernel that calculates the histogram of input. */
+ std::unique_ptr<CLHistogramBorderKernel> _border_histogram_kernel; /**< Kernel that calculates the histogram on the borders. */
+ std::unique_ptr<CLTableLookupKernel> _map_histogram_kernel; /**< Kernel that maps the input to output using the lut. */
+ CLDistribution1D _hist; /**< Distribution that holds the histogram of the input image. */
+ CLDistribution1D _cum_dist; /**< Distribution that holds the cummulative distribution of the input histogram. */
+ CLLut _cd_lut; /**< Holds the equalization lookuptable. */
+ static const uint32_t max_range = 256; /**< Histogram range of the internal histograms. */
+ static const uint32_t nr_bins = 256; /**< Histogram bins of the internal histograms. */
};
}
#endif /*ARM_COMPUTE_CLEQUALIZEHISTOGRAM_H */
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to execute erode. This function calls the following OpenCL kernels:
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/CL/kernels/CLFFTDigitReverseKernel.h"
-#include "arm_compute/core/CL/kernels/CLFFTRadixStageKernel.h"
-#include "arm_compute/core/CL/kernels/CLFFTScaleKernel.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/FunctionDescriptors.h"
#include "arm_compute/runtime/MemoryGroup.h"
namespace arm_compute
{
// Forward declaration
+class CLFFTDigitReverseKernel;
+class CLFFTRadixStageKernel;
+class CLFFTScaleKernel;
class ICLTensor;
/** Basic function to execute one dimensional FFT. This function calls the following OpenCL kernels:
public:
/** Default Constructor */
CLFFT1D(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied */
+ CLFFT1D(const CLFFT1D &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLFFT1D &operator=(const CLFFT1D &) = delete;
+ /** Default destructor */
+ ~CLFFT1D();
/** Initialise the function's source, destinations and border mode.
*
* @param[in] input Source tensor. Data types supported: F32.
void run() override;
protected:
- MemoryGroup _memory_group;
- CLFFTDigitReverseKernel _digit_reverse_kernel;
- std::vector<CLFFTRadixStageKernel> _fft_kernels;
- CLFFTScaleKernel _scale_kernel;
- CLTensor _digit_reversed_input;
- CLTensor _digit_reverse_indices;
- unsigned int _num_ffts;
- bool _run_scale;
+ MemoryGroup _memory_group;
+ std::unique_ptr<CLFFTDigitReverseKernel> _digit_reverse_kernel;
+ std::vector<std::unique_ptr<CLFFTRadixStageKernel>> _fft_kernels;
+ std::unique_ptr<CLFFTScaleKernel> _scale_kernel;
+ CLTensor _digit_reversed_input;
+ CLTensor _digit_reverse_indices;
+ unsigned int _num_ffts;
+ bool _run_scale;
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_CLFFT1D_H */
public:
/** Default Constructor */
CLFFT2D(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied */
+ CLFFT2D(const CLFFT2D &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLFFT2D &operator=(const CLFFT2D &) = delete;
+ /** Default destructor */
+ ~CLFFT2D();
/** Initialise the function's source, destinations and border mode.
*
* @param[in] input Source tensor. Data types supported: F32.
#define ARM_COMPUTE_CLFASTCORNERS_H
#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/CL/kernels/CLFastCornersKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Window.h"
#include "arm_compute/runtime/CL/CLArray.h"
namespace arm_compute
{
+class CLFastCornersKernel;
+class CLCopyToArrayKernel;
class ICLTensor;
using ICLImage = ICLTensor;
CLFastCorners(const CLFastCorners &) = delete;
/** Prevent instances of this class from being copied (As this class contains pointers) */
const CLFastCorners &operator=(const CLFastCorners &) = delete;
+ /** Default destructor */
+ ~CLFastCorners();
/** Initialize the function's source, destination, conv and border_mode.
*
* @param[in] input Source image. Data types supported: U8.
void run() override;
private:
- MemoryGroup _memory_group;
- CLFastCornersKernel _fast_corners_kernel;
- CLNonMaximaSuppression3x3 _suppr_func;
- CLCopyToArrayKernel _copy_array_kernel;
- CLImage _output;
- CLImage _suppr;
- Window _win;
- bool _non_max;
- unsigned int *_num_corners;
- cl::Buffer _num_buffer;
- ICLKeyPointArray *_corners;
- uint8_t _constant_border_value;
+ MemoryGroup _memory_group;
+ std::unique_ptr<CLFastCornersKernel> _fast_corners_kernel;
+ CLNonMaximaSuppression3x3 _suppr_func;
+ std::unique_ptr<CLCopyToArrayKernel> _copy_array_kernel;
+ CLImage _output;
+ CLImage _suppr;
+ Window _win;
+ bool _non_max;
+ unsigned int *_num_corners;
+ cl::Buffer _num_buffer;
+ ICLKeyPointArray *_corners;
+ uint8_t _constant_border_value;
};
}
#endif /*ARM_COMPUTE_CLFASTCORNERS_H */
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Function to run @ref CLMemsetKernel to fill a tensor with a scalar value */
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to run @ref CLFillBorderKernel */
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to execute flatten. This function calls the following OpenCL kernel:
*
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLFloorKernel */
class CLFloor : public ICLSimpleFunction
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-#include "arm_compute/core/CL/kernels/CLTransposeKernel.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h"
#include "arm_compute/runtime/CL/functions/CLFlattenLayer.h"
#ifndef ARM_COMPUTE_CLFUSEBATCHNORMALIZATION_H
#define ARM_COMPUTE_CLFUSEBATCHNORMALIZATION_H
-#include "arm_compute/core/CL/kernels/CLFuseBatchNormalizationKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
+#include <memory>
+
namespace arm_compute
{
// Forward declarations
+class CLCompileContext;
+class CLFuseBatchNormalizationKernel;
class ICLTensor;
+class ITensorInfo;
/** Basic function to fuse the batch normalization node to a preceding convolution node */
class CLFuseBatchNormalization : public IFunction
/** Allow instances of this class to be moved */
CLFuseBatchNormalization &operator=(CLFuseBatchNormalization &&) = default;
/** Default destructor */
- ~CLFuseBatchNormalization() = default;
+ ~CLFuseBatchNormalization();
/** Set the input and output tensors.
*
* @param[in] input_weights Input weights tensor for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC
void run() override;
private:
- CLFuseBatchNormalizationKernel _fuse_bn_kernel;
+ std::unique_ptr<CLFuseBatchNormalizationKernel> _fuse_bn_kernel;
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_CLFUSEBATCHNORMALIZATION_H */
#ifndef ARM_COMPUTE_CLGEMM_H
#define ARM_COMPUTE_CLGEMM_H
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/CLTypes.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/IWeightsManager.h"
#include "arm_compute/runtime/MemoryGroup.h"
+#include <memory>
+
namespace arm_compute
{
+class CLCompileContext;
+class CLGEMMReshapeRHSMatrixKernel;
+class CLGEMMMatrixMultiplyKernel;
+class CLGEMMMatrixMultiplyReshapedKernel;
+class CLGEMMMatrixMultiplyReshapedOnlyRHSKernel;
+class CLGEMMReshapeLHSMatrixKernel;
class ICLTensor;
+class ITensorInfo;
namespace weights_transformations
{
class CLGEMMReshapeRHSMatrixKernelManaged : public ITransformWeights
{
public:
+ /** Default constructor */
+ CLGEMMReshapeRHSMatrixKernelManaged();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGEMMReshapeRHSMatrixKernelManaged(const CLGEMMReshapeRHSMatrixKernelManaged &) = delete;
+ /** Default move constructor */
+ CLGEMMReshapeRHSMatrixKernelManaged(CLGEMMReshapeRHSMatrixKernelManaged &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGEMMReshapeRHSMatrixKernelManaged &operator=(const CLGEMMReshapeRHSMatrixKernelManaged &) = delete;
+ /** Default move assignment operator */
+ CLGEMMReshapeRHSMatrixKernelManaged &operator=(CLGEMMReshapeRHSMatrixKernelManaged &&) = default;
+ /** Default desctructor */
+ ~CLGEMMReshapeRHSMatrixKernelManaged();
//Inherited method override
- void run() override
- {
- _output.allocator()->allocate();
- CLScheduler::get().enqueue(_kernel, false);
- _reshape_run = true;
- }
+ void run() override;
//Inherited method override
- void release() override
- {
- _output.allocator()->free();
- }
+ void release() override;
//Inherited method override
- ICLTensor *get_weights() override
- {
- return &_output;
- }
+ ICLTensor *get_weights() override;
//Inherited method override
- uint32_t uid() override
- {
- return _uid;
- }
+ uint32_t uid() override;
/** Configures the @ref CLGEMMReshapeRHSMatrixKernel kernel
*
* @param[in] input Input tensor. Data types supported: All
* @param[in] info RHS matrix information to be used for reshaping.
*/
- void configure(const ICLTensor *input, GEMMRHSMatrixInfo info)
- {
- configure(CLKernelLibrary::get().get_compile_context(), input, info);
- }
+ void configure(const ICLTensor *input, GEMMRHSMatrixInfo info);
/** Configures the @ref CLGEMMReshapeRHSMatrixKernel kernel
*
* @param[in] input Input tensor. Data types supported: All
* @param[in] info RHS matrix information to be used for reshaping.
*/
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, GEMMRHSMatrixInfo info)
- {
- _kernel.configure(compile_context, input, &_output, info);
- }
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, GEMMRHSMatrixInfo info);
private:
- static constexpr uint32_t _uid = 0x15;
- CLTensor _output{};
- CLGEMMReshapeRHSMatrixKernel _kernel{};
+ static constexpr uint32_t _uid{ 0x15 };
+ CLTensor _output{};
+ std::unique_ptr<CLGEMMReshapeRHSMatrixKernel> _kernel;
};
} // namespace weights_transformations
CLGEMM &operator=(const CLGEMM &) = delete;
/** Default move assignment operator */
CLGEMM &operator=(CLGEMM &&) = default;
+ /** Default destructor */
+ ~CLGEMM();
/** Initialise the kernel's inputs and output
*
* @note GEMM: General Matrix Multiply - [alpha * A * B + beta * C].
static Status validate_reshaped(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info);
static Status validate_reshaped_only_rhs(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info);
- MemoryGroup _memory_group;
- IWeightsManager *_weights_manager;
- CLGEMMMatrixMultiplyKernel _mm_kernel;
- CLGEMMReshapeLHSMatrixKernel _reshape_lhs_kernel;
- CLGEMMReshapeRHSMatrixKernel _reshape_rhs_kernel;
- weights_transformations::CLGEMMReshapeRHSMatrixKernelManaged _reshape_rhs_kernel_managed;
- CLGEMMMatrixMultiplyReshapedKernel _mm_reshaped_kernel;
- CLGEMMMatrixMultiplyReshapedOnlyRHSKernel _mm_reshaped_only_rhs_kernel;
- CLGEMMMatrixMultiplyReshapedOnlyRHSKernel _mm_reshaped_only_rhs_fallback_kernel;
- CLTensor _tmp_a;
- CLTensor _tmp_b;
- const ICLTensor *_original_b;
- const ICLTensor *_lhs;
- ICLTensor *_dst;
- bool _reshape_b_only_on_first_run;
- bool _is_prepared;
- bool _has_pad_y;
- CLGEMMKernelType _gemm_kernel_type;
+ MemoryGroup _memory_group;
+ IWeightsManager *_weights_manager;
+ std::unique_ptr<CLGEMMMatrixMultiplyKernel> _mm_kernel;
+ std::unique_ptr<CLGEMMReshapeLHSMatrixKernel> _reshape_lhs_kernel;
+ std::unique_ptr<CLGEMMReshapeRHSMatrixKernel> _reshape_rhs_kernel;
+ std::unique_ptr<weights_transformations::CLGEMMReshapeRHSMatrixKernelManaged> _reshape_rhs_kernel_managed;
+ std::unique_ptr<CLGEMMMatrixMultiplyReshapedKernel> _mm_reshaped_kernel;
+ std::unique_ptr<CLGEMMMatrixMultiplyReshapedOnlyRHSKernel> _mm_reshaped_only_rhs_kernel;
+ std::unique_ptr<CLGEMMMatrixMultiplyReshapedOnlyRHSKernel> _mm_reshaped_only_rhs_fallback_kernel;
+ CLTensor _tmp_a;
+ CLTensor _tmp_b;
+ const ICLTensor *_original_b;
+ const ICLTensor *_lhs;
+ ICLTensor *_dst;
+ bool _reshape_b_only_on_first_run;
+ bool _is_prepared;
+ bool _has_pad_y;
+ CLGEMMKernelType _gemm_kernel_type;
};
} // namespace arm_compute
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/CL/kernels/CLCol2ImKernel.h"
-#include "arm_compute/core/CL/kernels/CLIm2ColKernel.h"
-#include "arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/functions/CLActivationLayer.h"
namespace arm_compute
{
+class CLCol2ImKernel;
+class CLIm2ColKernel;
+class CLWeightsReshapeKernel;
class ICLTensor;
/** Function to reshape and transpose the weights. This function calls the following kernels:
public:
/** Constructor */
CLConvolutionLayerReshapeWeights();
+ /** Prevent instances of this class from being copied */
+ CLConvolutionLayerReshapeWeights(const CLConvolutionLayerReshapeWeights &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLConvolutionLayerReshapeWeights &operator=(const CLConvolutionLayerReshapeWeights &) = delete;
+ /** Prevent instances of this class to be moved */
+ CLConvolutionLayerReshapeWeights(CLConvolutionLayerReshapeWeights &&) = delete;
+ /** Prevent instances of this class to be moved */
+ CLConvolutionLayerReshapeWeights &operator=(CLConvolutionLayerReshapeWeights &&) = delete;
+ /** Default destructor */
+ ~CLConvolutionLayerReshapeWeights();
/** Set the input and output tensors.
*
* @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
void run() override;
private:
- CLWeightsReshapeKernel _weights_reshape_kernel;
+ std::unique_ptr<CLWeightsReshapeKernel> _weights_reshape_kernel;
};
namespace weights_transformations
CLGEMMConvolutionLayer &operator=(const CLGEMMConvolutionLayer &) = delete;
/** Default move assignment operator */
CLGEMMConvolutionLayer &operator=(CLGEMMConvolutionLayer &&) = default;
+ /**Default destructor */
+ ~CLGEMMConvolutionLayer();
/** Set the input and output tensors.
*
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
IWeightsManager *_weights_manager;
CLConvolutionLayerReshapeWeights _reshape_weights;
weights_transformations::CLConvolutionLayerReshapeWeightsTransform _reshape_weights_managed;
- CLIm2ColKernel _im2col_kernel;
+ std::unique_ptr<CLIm2ColKernel> _im2col_kernel;
CLGEMM _mm_gemm;
CLGEMMLowpMatrixMultiplyCore _mm_gemmlowp;
- CLCol2ImKernel _col2im_kernel;
+ std::unique_ptr<CLCol2ImKernel> _col2im_kernel;
CLActivationLayer _activationlayer_function;
const ICLTensor *_original_weights;
#ifndef ARM_COMPUTE_CLGEMMDECONVOLUTIONLAYER_H
#define ARM_COMPUTE_CLGEMMDECONVOLUTIONLAYER_H
-#include "arm_compute/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h"
#include "arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h"
namespace arm_compute
{
+class CLDeconvolutionReshapeOutputKernel;
class ICLTensor;
/** Function to run the deconvolution layer through a call to GEMM.
*
CLGEMMDeconvolutionLayer &operator=(const CLGEMMDeconvolutionLayer &) = delete;
/** Default move assignment operator */
CLGEMMDeconvolutionLayer &operator=(CLGEMMDeconvolutionLayer &&) = default;
+ /** Default desctructor */
+ ~CLGEMMDeconvolutionLayer();
/** Set the input, weights, biases and output tensors.
*
* @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs.
private:
MemoryGroup _memory_group;
- CLGEMM _mm_gemm;
- CLGEMMLowpMatrixMultiplyCore _mm_gemmlowp;
- CLGEMMLowpOutputStage _gemmlowp_output_stage;
- CLPermute _permute_input_to_nhwc;
- CLPermute _permute_weights_to_nhwc;
- CLReshapeLayer _reshape_weights;
- CLTranspose _transpose_weights;
- CLDeconvolutionReshapeOutputKernel _deconv_reshape;
- CLSlice _slice_gemm;
+ CLGEMM _mm_gemm;
+ CLGEMMLowpMatrixMultiplyCore _mm_gemmlowp;
+ CLGEMMLowpOutputStage _gemmlowp_output_stage;
+ CLPermute _permute_input_to_nhwc;
+ CLPermute _permute_weights_to_nhwc;
+ CLReshapeLayer _reshape_weights;
+ CLTranspose _transpose_weights;
+ std::unique_ptr<CLDeconvolutionReshapeOutputKernel> _deconv_reshape;
+ CLSlice _slice_gemm;
CLTensor _gemmlowp_final;
CLTensor _reshaped_weights;
#ifndef ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYCORE_H
#define ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYCORE_H
-#include "arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/MemoryGroup.h"
namespace arm_compute
{
+class CLCompileContext;
class IMemoryManager;
class ICLTensor;
+class ITensorInfo;
+class CLDepthConvertLayerKernel;
+class CLGEMMLowpMatrixMultiplyNativeKernel;
+class CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel;
+class CLGEMMLowpOffsetContributionKernel;
+class CLGEMMLowpOffsetContributionOutputStageKernel;
+class CLGEMMLowpMatrixAReductionKernel;
+class CLGEMMLowpMatrixBReductionKernel;
+class CLGEMMReshapeRHSMatrixKernel;
/** Basic function to execute GEMMLowpMatrixMultiplyCore on OpenCL. */
class CLGEMMLowpMatrixMultiplyCore : public IFunction
CLGEMMLowpMatrixMultiplyCore &operator=(const CLGEMMLowpMatrixMultiplyCore &) = delete;
/** Default move assignment operator */
CLGEMMLowpMatrixMultiplyCore &operator=(CLGEMMLowpMatrixMultiplyCore &&) = default;
+ /** Default destructor */
+ ~CLGEMMLowpMatrixMultiplyCore();
/** Initialise the kernel's inputs, output
*
* @note GEMMLowp: low precision GEMM kernel. [A * B + C]
MemoryGroup _memory_group;
// Kernels used
- CLDepthConvertLayerKernel _weights_to_qasymm8;
- CLGEMMLowpMatrixMultiplyNativeKernel _mm_native_kernel;
- CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel _mm_reshaped_only_rhs_kernel;
- CLGEMMReshapeRHSMatrixKernel _mtx_b_reshape_kernel;
- CLGEMMLowpMatrixAReductionKernel _mtx_a_reduction_kernel;
- CLGEMMLowpMatrixBReductionKernel _mtx_b_reduction_kernel;
- CLGEMMLowpOffsetContributionKernel _offset_contribution_kernel;
- CLGEMMLowpOffsetContributionOutputStageKernel _offset_contribution_output_stage_kernel;
+ std::unique_ptr<CLDepthConvertLayerKernel> _weights_to_qasymm8;
+ std::unique_ptr<CLGEMMLowpMatrixMultiplyNativeKernel> _mm_native_kernel;
+ std::unique_ptr<CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel> _mm_reshaped_only_rhs_kernel;
+ std::unique_ptr<CLGEMMReshapeRHSMatrixKernel> _mtx_b_reshape_kernel;
+ std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _mtx_a_reduction_kernel;
+ std::unique_ptr<CLGEMMLowpMatrixBReductionKernel> _mtx_b_reduction_kernel;
+ std::unique_ptr<CLGEMMLowpOffsetContributionKernel> _offset_contribution_kernel;
+ std::unique_ptr<CLGEMMLowpOffsetContributionOutputStageKernel> _offset_contribution_output_stage_kernel;
// Temporary tensors
CLTensor _qasymm8_weights;
#ifndef ARM_COMPUTE_CLGEMMLOWPOUTPUTSTAGE_H
#define ARM_COMPUTE_CLGEMMLOWPOUTPUTSTAGE_H
+#include "arm_compute/core/Error.h"
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+#include <limits>
+
/** This file contains all available output stages for GEMMLowp on OpenCL.
*
* In gemmlowp, the "output stage" is the process that takes a final int32 accumulator value (the output of @ref CLGEMMLowpMatrixMultiplyCore),
namespace arm_compute
{
+class CLCompileContext;
class ITensor;
+class ICLTensor;
+class ITensorInfo;
+struct GEMMLowpOutputStageInfo;
/** Basic function to execute CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint on OpenCL.
*
#ifndef ARM_COMPUTE_CLGATHER_H
#define ARM_COMPUTE_CLGATHER_H
+#include "arm_compute/core/Error.h"
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLGatherKernel */
class CLGather : public ICLSimpleFunction
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to execute gaussian filter 3x3. This function calls the following OpenCL kernels:
#ifndef ARM_COMPUTE_CLGAUSSIAN5X5_H
#define ARM_COMPUTE_CLGAUSSIAN5X5_H
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
-#include "arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/IFunction.h"
namespace arm_compute
{
+class CLCompileContext;
+class CLFillBorderKernel;
+class CLGaussian5x5HorKernel;
+class CLGaussian5x5VertKernel;
class ICLTensor;
/** Basic function to execute gaussian filter 5x5. This function calls the following OpenCL kernels:
* @param[in] memory_manager (Optional) Memory manager.
*/
CLGaussian5x5(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied */
+ CLGaussian5x5(const CLGaussian5x5 &) = delete;
+ /** Default move constructor */
+ CLGaussian5x5(CLGaussian5x5 &&) = default;
+ /** Prevent instances of this class from being copied */
+ CLGaussian5x5 &operator=(const CLGaussian5x5 &) = delete;
+ /** Default move assignment operator */
+ CLGaussian5x5 &operator=(CLGaussian5x5 &&) = default;
+ /** Default destructor */
+ ~CLGaussian5x5();
/** Initialise the function's source, destinations and border mode.
*
* @param[in,out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
void run() override;
protected:
- MemoryGroup _memory_group; /**< Function's memory group */
- CLGaussian5x5HorKernel _kernel_hor; /**< Horizontal pass kernel */
- CLGaussian5x5VertKernel _kernel_vert; /**< Vertical pass kernel */
- CLFillBorderKernel _border_handler; /**< Kernel to handle image borders */
- CLImage _tmp; /**< Temporary buffer */
+ MemoryGroup _memory_group; /**< Function's memory group */
+ std::unique_ptr<CLGaussian5x5HorKernel> _kernel_hor; /**< Horizontal pass kernel */
+ std::unique_ptr<CLGaussian5x5VertKernel> _kernel_vert; /**< Vertical pass kernel */
+ std::unique_ptr<CLFillBorderKernel> _border_handler; /**< Kernel to handle image borders */
+ CLImage _tmp; /**< Temporary buffer */
};
}
#endif /*ARM_COMPUTE_CLGAUSSIAN5X5_H */
#ifndef ARM_COMPUTE_CLGAUSSIANPYRAMID_H
#define ARM_COMPUTE_CLGAUSSIANPYRAMID_H
-#include "arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h"
-
-#include "arm_compute/core/CL/kernels/CLScaleKernel.h"
#include "arm_compute/core/IPyramid.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLPyramid.h"
namespace arm_compute
{
+class CLCompileContext;
+class CLFillBorderKernel;
class ICLTensor;
+class CLGaussianPyramidHorKernel;
+class CLGaussianPyramidVertKernel;
+class CLScaleKernel;
/** Common interface for all Gaussian pyramid functions */
class CLGaussianPyramid : public IFunction
/** Allow instances of this class to be moved */
CLGaussianPyramid &operator=(CLGaussianPyramid &&) = default;
/** Default destructor */
- virtual ~CLGaussianPyramid() = default;
+ ~CLGaussianPyramid();
/** Initialise the function's source, destinations and border mode.
*
* @param[in, out] input Source tensor. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
public:
/** Constructor */
CLGaussianPyramidHalf();
+ /** Prevent instances of this class from being copied */
+ CLGaussianPyramidHalf(const CLGaussianPyramidHalf &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLGaussianPyramidHalf &operator=(const CLGaussianPyramidHalf &) = delete;
+ /** Default destructor */
+ ~CLGaussianPyramidHalf();
// Inherited methods overridden:
void configure(ICLTensor *input, CLPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) override;
void run() override;
private:
- std::vector<CLFillBorderKernel> _horizontal_border_handler;
- std::vector<CLFillBorderKernel> _vertical_border_handler;
- std::vector<CLGaussianPyramidHorKernel> _horizontal_reduction;
- std::vector<CLGaussianPyramidVertKernel> _vertical_reduction;
+ std::vector<std::unique_ptr<CLFillBorderKernel>> _horizontal_border_handler;
+ std::vector<std::unique_ptr<CLFillBorderKernel>> _vertical_border_handler;
+ std::vector<std::unique_ptr<CLGaussianPyramidHorKernel>> _horizontal_reduction;
+ std::vector<std::unique_ptr<CLGaussianPyramidVertKernel>> _vertical_reduction;
};
/** Basic function to execute gaussian pyramid with ORB scale factor. This function calls the following OpenCL kernels and functions:
void run() override;
private:
- std::vector<CLGaussian5x5> _gauss5x5;
- std::vector<CLScaleKernel> _scale_nearest;
+ std::vector<CLGaussian5x5> _gauss5x5;
+ std::vector<std::unique_ptr<CLScaleKernel>> _scale_nearest;
};
}
#endif /*ARM_COMPUTE_CLGAUSSIANPYRAMID_H */
*/
#ifndef ARM_COMPUTE_CLGENERATEPROPOSALSLAYER_H
#define ARM_COMPUTE_CLGENERATEPROPOSALSLAYER_H
-#include "arm_compute/core/CL/kernels/CLBoundingBoxTransformKernel.h"
-#include "arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLGenerateProposalsLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLPadLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLPermuteKernel.h"
-#include "arm_compute/core/CL/kernels/CLQuantizationLayerKernel.h"
+
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/MemoryGroup.h"
+#include <memory>
+
namespace arm_compute
{
+class CLCompileContext;
+class CLBoundingBoxTransformKernel;
+class CLDequantizationLayerKernel;
+class CLComputeAllAnchorsKernel;
+class CLPadLayerKernel;
+class CLPermuteKernel;
+class CLQuantizationLayerKernel;
class ICLTensor;
+class ITensorInfo;
/** Basic function to generate proposals for a RPN (Region Proposal Network)
*
CLGenerateProposalsLayer(const CLGenerateProposalsLayer &) = delete;
/** Prevent instances of this class from being copied (As this class contains pointers) */
CLGenerateProposalsLayer &operator=(const CLGenerateProposalsLayer &) = delete;
+ /** Default destructor */
+ ~CLGenerateProposalsLayer();
/** Set the input and output tensors.
*
MemoryGroup _memory_group;
// OpenCL kernels
- CLPermuteKernel _permute_deltas_kernel;
- CLReshapeLayer _flatten_deltas;
- CLPermuteKernel _permute_scores_kernel;
- CLReshapeLayer _flatten_scores;
- CLComputeAllAnchorsKernel _compute_anchors_kernel;
- CLBoundingBoxTransformKernel _bounding_box_kernel;
- CLPadLayerKernel _pad_kernel;
- CLDequantizationLayerKernel _dequantize_anchors;
- CLDequantizationLayerKernel _dequantize_deltas;
- CLQuantizationLayerKernel _quantize_all_proposals;
+ std::unique_ptr<CLPermuteKernel> _permute_deltas_kernel;
+ CLReshapeLayer _flatten_deltas;
+ std::unique_ptr<CLPermuteKernel> _permute_scores_kernel;
+ CLReshapeLayer _flatten_scores;
+ std::unique_ptr<CLComputeAllAnchorsKernel> _compute_anchors_kernel;
+ std::unique_ptr<CLBoundingBoxTransformKernel> _bounding_box_kernel;
+ std::unique_ptr<CLPadLayerKernel> _pad_kernel;
+ std::unique_ptr<CLDequantizationLayerKernel> _dequantize_anchors;
+ std::unique_ptr<CLDequantizationLayerKernel> _dequantize_deltas;
+ std::unique_ptr<CLQuantizationLayerKernel> _quantize_all_proposals;
// CPP functions
CPPBoxWithNonMaximaSuppressionLimit _cpp_nms;
#ifndef ARM_COMPUTE_CLHOGDESCRIPTOR_H
#define ARM_COMPUTE_CLHOGDESCRIPTOR_H
-#include "arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/functions/CLHOGGradient.h"
namespace arm_compute
{
class IHOG;
+class CLHOGOrientationBinningKernel;
+class CLHOGBlockNormalizationKernel;
/** Basic function to calculate HOG descriptor. This function calls the following OpenCL kernels:
*
* -# @ref CLHOGGradient
public:
/** Default constructor */
CLHOGDescriptor(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied */
+ CLHOGDescriptor(const CLHOGDescriptor &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLHOGDescriptor &operator=(const CLHOGDescriptor &) = delete;
+ /** Default destructor */
+ ~CLHOGDescriptor();
/** Initialise the function's source, destination, HOG data-object and border mode
*
* @param[in, out] input Input tensor. Data type supported: U8
void run() override;
private:
- MemoryGroup _memory_group;
- CLHOGGradient _gradient;
- CLHOGOrientationBinningKernel _orient_bin;
- CLHOGBlockNormalizationKernel _block_norm;
- CLTensor _mag;
- CLTensor _phase;
- CLTensor _hog_space;
+ MemoryGroup _memory_group;
+ CLHOGGradient _gradient;
+ std::unique_ptr<CLHOGOrientationBinningKernel> _orient_bin;
+ std::unique_ptr<CLHOGBlockNormalizationKernel> _block_norm;
+ CLTensor _mag;
+ CLTensor _phase;
+ CLTensor _hog_space;
};
}
#ifndef ARM_COMPUTE_CLHOGDETECTOR_H
#define ARM_COMPUTE_CLHOGDETECTOR_H
+#include "arm_compute/core/CL/ICLArray.h"
#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/CL/kernels/CLHOGDetectorKernel.h"
#include "arm_compute/core/IHOG.h"
#include "arm_compute/runtime/IFunction.h"
+#include <memory>
+
namespace arm_compute
{
+class CLCompileContext;
+class CLHOGDetectorKernel;
+class ICLTensor;
+class ICLHOG;
+
/** Basic function to execute HOG detector based on linear SVM. This function calls the following OpenCL kernel:
*
* -# @ref CLHOGDetectorKernel
/** Allow instances of this class to be moved */
CLHOGDetector &operator=(CLHOGDetector &&) = default;
/** Default destructor */
- ~CLHOGDetector() = default;
+ ~CLHOGDetector();
/** Initialise the kernel's input, output, HOG data object, detection window stride, threshold and index class
*
* @attention The function does not reset the number of values in @ref IDetectionWindowArray so it is caller's responsibility to clear it.
* @param[in] idx_class (Optional) Index of the class used for evaluating which class the detection window belongs to
*/
void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, const Size2D &detection_window_stride,
- float threshold = 0.0f,
+ float threshold = 0.0f,
size_t idx_class = 0);
// Inherited methods overridden:
void run() override;
private:
- CLHOGDetectorKernel _hog_detector_kernel;
- ICLDetectionWindowArray *_detection_windows;
- cl::Buffer _num_detection_windows;
+ std::unique_ptr<CLHOGDetectorKernel> _hog_detector_kernel;
+ ICLDetectionWindowArray *_detection_windows;
+ cl::Buffer _num_detection_windows;
};
}
#ifndef ARM_COMPUTE_CLHOGGRADIENT_H
#define ARM_COMPUTE_CLHOGGRADIENT_H
-#include "arm_compute/core/CL/ICLKernel.h"
-
-#include "arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/functions/CLDerivative.h"
namespace arm_compute
{
+class CLCompileContext;
+class CLMagnitudePhaseKernel;
+class ITensorInfo;
/** Basic function to calculate the gradient for HOG. This function calls the following OpenCL kernels:
*
* -# @ref CLDerivative
void run() override;
private:
- MemoryGroup _memory_group;
- CLDerivative _derivative;
- CLMagnitudePhaseKernel _mag_phase;
- CLTensor _gx;
- CLTensor _gy;
+ MemoryGroup _memory_group;
+ CLDerivative _derivative;
+ std::unique_ptr<CLMagnitudePhaseKernel> _mag_phase;
+ CLTensor _gx;
+ CLTensor _gy;
};
}
#endif /*ARM_COMPUTE_CLHOGGRADIENT_H */
#include "arm_compute/core/CL/ICLArray.h"
#include "arm_compute/core/CL/ICLMultiHOG.h"
-#include "arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h"
#include "arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/functions/CLHOGDetector.h"
namespace arm_compute
{
+class CLCompileContext;
+class CLHOGOrientationBinningKernel;
+class CLHOGBlockNormalizationKernel;
/** Basic function to detect multiple objects (or the same object at different scales) on the same input image using HOG. This function calls the following kernels:
*
* -# @ref CLHOGGradient
CLHOGMultiDetection(const CLHOGMultiDetection &) = delete;
/** Prevent instances of this class from being copied (As this class contains pointers) */
CLHOGMultiDetection &operator=(const CLHOGMultiDetection &) = delete;
+ /** Default destructor */
+ ~CLHOGMultiDetection();
/** Initialise the function's source, destination, detection window strides, border mode, threshold and non-maxima suppression
*
* @param[in, out] input Input tensor. Data type supported: U8
void run() override;
private:
- MemoryGroup _memory_group;
- CLHOGGradient _gradient_kernel;
- std::vector<CLHOGOrientationBinningKernel> _orient_bin_kernel;
- std::vector<CLHOGBlockNormalizationKernel> _block_norm_kernel;
- std::vector<CLHOGDetector> _hog_detect_kernel;
- CPPDetectionWindowNonMaximaSuppressionKernel _non_maxima_kernel;
- std::vector<CLTensor> _hog_space;
- std::vector<CLTensor> _hog_norm_space;
- ICLDetectionWindowArray *_detection_windows;
- CLTensor _mag;
- CLTensor _phase;
- bool _non_maxima_suppression;
- size_t _num_orient_bin_kernel;
- size_t _num_block_norm_kernel;
- size_t _num_hog_detect_kernel;
+ MemoryGroup _memory_group;
+ CLHOGGradient _gradient_kernel;
+ std::vector<std::unique_ptr<CLHOGOrientationBinningKernel>> _orient_bin_kernel;
+ std::vector<std::unique_ptr<CLHOGBlockNormalizationKernel>> _block_norm_kernel;
+ std::vector<CLHOGDetector> _hog_detect_kernel;
+ CPPDetectionWindowNonMaximaSuppressionKernel _non_maxima_kernel;
+ std::vector<CLTensor> _hog_space;
+ std::vector<CLTensor> _hog_norm_space;
+ ICLDetectionWindowArray *_detection_windows;
+ CLTensor _mag;
+ CLTensor _phase;
+ bool _non_maxima_suppression;
+ size_t _num_orient_bin_kernel;
+ size_t _num_block_norm_kernel;
+ size_t _num_hog_detect_kernel;
};
}
#ifndef ARM_COMPUTE_CLHARRISCORNERS_H
#define ARM_COMPUTE_CLHARRISCORNERS_H
-#include "arm_compute/runtime/IFunction.h"
-
#include "arm_compute/core/CL/ICLArray.h"
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
-#include "arm_compute/core/CL/kernels/CLHarrisCornersKernel.h"
#include "arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h"
#include "arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h"
+#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/IMemoryManager.h"
#include "arm_compute/runtime/MemoryGroup.h"
#include <cstdint>
namespace arm_compute
{
+class CLCompileContext;
+class CLHarrisScoreKernel;
+class CLFillBorderKernel;
class ICLTensor;
using ICLImage = ICLTensor;
CLHarrisCorners(const CLHarrisCorners &) = delete;
/** Prevent instances of this class from being copied (As this class contains pointers) */
const CLHarrisCorners &operator=(const CLHarrisCorners &) = delete;
+ /** Default destructor */
+ ~CLHarrisCorners();
/** Initialize the function's source, destination, conv and border_mode.
*
* @param[in,out] input Source image. Data types supported: U8. (Written to only for @p border_mode != UNDEFINED)
void run() override;
private:
- MemoryGroup _memory_group; /**< Function's memory group */
- std::unique_ptr<IFunction> _sobel; /**< Sobel function */
- CLHarrisScoreKernel _harris_score; /**< Harris score kernel */
- CLNonMaximaSuppression3x3 _non_max_suppr; /**< Non-maxima suppression function */
- CPPCornerCandidatesKernel _candidates; /**< Sort kernel */
- CPPSortEuclideanDistanceKernel _sort_euclidean; /**< Euclidean distance kernel */
- CLFillBorderKernel _border_gx; /**< Border handler before running harris score */
- CLFillBorderKernel _border_gy; /**< Border handler before running harris score */
- CLImage _gx; /**< Source image - Gx component */
- CLImage _gy; /**< Source image - Gy component */
- CLImage _score; /**< Source image - Harris score */
- CLImage _nonmax; /**< Source image - Non-Maxima suppressed image */
- std::vector<InternalKeypoint> _corners_list; /**< Array of InternalKeypoint. It stores the potential corner candidates */
- int32_t _num_corner_candidates; /**< Number of potential corner candidates */
- ICLKeyPointArray *_corners; /**< Output corners array */
+ MemoryGroup _memory_group; /**< Function's memory group */
+ std::unique_ptr<IFunction> _sobel; /**< Sobel function */
+ std::unique_ptr<CLHarrisScoreKernel> _harris_score; /**< Harris score kernel */
+ CLNonMaximaSuppression3x3 _non_max_suppr; /**< Non-maxima suppression function */
+ CPPCornerCandidatesKernel _candidates; /**< Sort kernel */
+ CPPSortEuclideanDistanceKernel _sort_euclidean; /**< Euclidean distance kernel */
+ std::unique_ptr<CLFillBorderKernel> _border_gx; /**< Border handler before running harris score */
+ std::unique_ptr<CLFillBorderKernel> _border_gy; /**< Border handler before running harris score */
+ CLImage _gx; /**< Source image - Gx component */
+ CLImage _gy; /**< Source image - Gy component */
+ CLImage _score; /**< Source image - Harris score */
+ CLImage _nonmax; /**< Source image - Non-Maxima suppressed image */
+ std::vector<InternalKeypoint> _corners_list; /**< Array of InternalKeypoint. It stores the potential corner candidates */
+ int32_t _num_corner_candidates; /**< Number of potential corner candidates */
+ ICLKeyPointArray *_corners; /**< Output corners array */
};
}
#endif /*ARM_COMPUTE_CLHARRISCORNERS_H */
#ifndef ARM_COMPUTE_CLHISTOGRAM_H
#define ARM_COMPUTE_CLHISTOGRAM_H
-#include "arm_compute/core/CL/kernels/CLHistogramKernel.h"
#include "arm_compute/runtime/IFunction.h"
+#include "src/core/CL/kernels/CLHistogramKernel.h"
namespace arm_compute
{
#ifndef ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYER_H
#define ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYER_H
+#include "arm_compute/core/Error.h"
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to perform a Instance normalization.
*
#ifndef ARM_COMPUTE_CLINTEGRALIMAGE_H
#define ARM_COMPUTE_CLINTEGRALIMAGE_H
-#include "arm_compute/core/CL/kernels/CLIntegralImageKernel.h"
#include "arm_compute/runtime/IFunction.h"
+#include <memory>
+
namespace arm_compute
{
+class CLCompileContext;
+class CLIntegralImageHorKernel;
+class CLIntegralImageVertKernel;
class ICLTensor;
/** Basic function to execute integral image. This function calls the following OpenCL kernels:
public:
/** Default Constructor. */
CLIntegralImage();
+ /** Prevent instances of this class from being copied */
+ CLIntegralImage(const CLIntegralImage &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLIntegralImage &operator=(const CLIntegralImage &) = delete;
+ /** Default destructor */
+ ~CLIntegralImage();
/** Initialise the function's source, destinations and border mode.
*
* @param[in] input Source tensor. Data types supported: U8.
void run() override;
protected:
- CLIntegralImageHorKernel _integral_hor; /**< Integral Image Horizontal kernel */
- CLIntegralImageVertKernel _integral_vert; /**< Integral Image Vertical kernel */
+ std::unique_ptr<CLIntegralImageHorKernel> _integral_hor; /**< Integral Image Horizontal kernel */
+ std::unique_ptr<CLIntegralImageVertKernel> _integral_vert; /**< Integral Image Vertical kernel */
};
}
#endif /*ARM_COMPUTE_CLINTEGRALIMAGE_H */
#ifndef ARM_COMPUTE_CLL2NORMALIZELAYER_H
#define ARM_COMPUTE_CLL2NORMALIZELAYER_H
-#include "arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
namespace arm_compute
{
+class CLCompileContext;
+class CLL2NormalizeLayerKernel;
class ICLTensor;
+class ITensorInfo;
/** Basic function to perform a L2 normalization on a given axis.
*
public:
/** Constructor */
CLL2NormalizeLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Default Destructor */
+ ~CLL2NormalizeLayer();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLL2NormalizeLayer(const CLL2NormalizeLayer &) = delete;
+ /** Default move constructor */
+ CLL2NormalizeLayer(CLL2NormalizeLayer &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLL2NormalizeLayer &operator=(const CLL2NormalizeLayer &) = delete;
+ /** Default move assignment operator */
+ CLL2NormalizeLayer &operator=(CLL2NormalizeLayer &&) = default;
/** Set the input and output tensors.
*
void run() override;
private:
- MemoryGroup _memory_group;
- CLReductionOperation _reduce_func;
- CLL2NormalizeLayerKernel _normalize_kernel;
- CLTensor _sumsq;
+ MemoryGroup _memory_group;
+ CLReductionOperation _reduce_func;
+ std::unique_ptr<CLL2NormalizeLayerKernel> _normalize_kernel;
+ CLTensor _sumsq;
};
}
#endif /*ARM_COMPUTE_CLL2NORMALIZELAYER_H */
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/CL/kernels/CLCopyKernel.h"
-#include "arm_compute/core/CL/kernels/CLMemsetKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/functions/CLActivationLayer.h"
namespace arm_compute
{
+class CLCompileContext;
+class CLCopyKernel;
+class CLMemsetKernel;
+class CLTransposeKernel;
class ICLTensor;
/** This function performs a single time step in a Long Short-Term Memory (LSTM) layer.
public:
/** Default constructor */
CLLSTMLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied */
+ CLLSTMLayer(const CLLSTMLayer &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLLSTMLayer &operator=(const CLLSTMLayer &) = delete;
+ /** Prevent instances of this class to be moved */
+ CLLSTMLayer(CLLSTMLayer &&) = delete;
+ /** Prevent instances of this class to be moved */
+ CLLSTMLayer &operator=(CLLSTMLayer &&) = delete;
+ /** Default destructor */
+ ~CLLSTMLayer();
/** Initialize function's tensors.
*
* @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: F16/F32.
void prepare() override;
private:
- MemoryGroup _memory_group;
- CLFullyConnectedLayer _fully_connected_input_gate;
- CLArithmeticAddition _accum_input_gate1;
- CLArithmeticSubtraction _subtract_input_gate;
- CLPixelWiseMultiplication _pixelwise_mul_input_gate;
- CLActivationLayer _activation_input_gate;
- CLFullyConnectedLayer _fully_connected_forget_gate;
- CLArithmeticAddition _accum_forget_gate1;
- CLPixelWiseMultiplication _pixelwise_mul_forget_gate;
- CLActivationLayer _activation_forget_gate;
- CLFullyConnectedLayer _fully_connected_cell_state;
- CLGEMM _gemm_cell_state1;
- CLTransposeKernel _transpose_cell_state;
- CLArithmeticAddition _accum_cell_state1;
- CLArithmeticAddition _accum_cell_state2;
- CLPixelWiseMultiplication _pixelwise_mul_cell_state1;
- CLActivationLayer _activation_cell_state;
- CLActivationLayer _cell_clip;
- CLPixelWiseMultiplication _pixelwise_mul_cell_state2;
- CLFullyConnectedLayer _fully_connected_output;
- CLPixelWiseMultiplication _pixelwise_mul_output_state1;
- CLArithmeticAddition _accum_output1;
- CLActivationLayer _activation_output;
- CLActivationLayer _activation_output_state;
- CLPixelWiseMultiplication _pixelwise_mul_output_state2;
- CLFullyConnectedLayer _fully_connected_output_state;
- CLActivationLayer _projection_clip;
- CLCopyKernel _copy_cell_state;
- CLCopyKernel _copy_output;
- CLConcatenateLayer _concat_scratch_buffer;
- CLConcatenateLayer _concat_inputs_forget_gate;
- CLConcatenateLayer _concat_weights_forget_gate;
- CLConcatenateLayer _concat_weights_input_gate;
- CLConcatenateLayer _concat_weights_output;
- CLMemsetKernel _ones_memset_kernel;
- CLMeanStdDevNormalizationLayer _mean_std_norm_input_gate;
- CLPixelWiseMultiplication _pixelwise_mul_input_gate_coeff;
- CLArithmeticAddition _accum_input_gate_bias;
- CLMeanStdDevNormalizationLayer _mean_std_norm_forget_gate;
- CLPixelWiseMultiplication _pixelwise_mul_forget_gate_coeff;
- CLArithmeticAddition _accum_forget_gate_bias;
- CLMeanStdDevNormalizationLayer _mean_std_norm_cell_gate;
- CLPixelWiseMultiplication _pixelwise_mul_cell_gate_coeff;
- CLArithmeticAddition _accum_cell_gate_bias;
- CLMeanStdDevNormalizationLayer _mean_std_norm_output_gate;
- CLPixelWiseMultiplication _pixelwise_mul_output_gate_coeff;
- CLArithmeticAddition _accum_output_gate_bias;
- CLTensor _input_gate_out1;
- CLTensor _input_gate_out2;
- CLTensor _input_gate_out3;
- CLTensor _input_gate_out4;
- CLTensor _forget_gate_out1;
- CLTensor _forget_gate_out2;
- CLTensor _forget_gate_out3;
- CLTensor _forget_gate_out4;
- CLTensor _forget_gate_out5;
- CLTensor _forget_gate_out6;
- CLTensor _cell_state_out1;
- CLTensor _cell_state_out2;
- CLTensor _cell_state_out3;
- CLTensor _cell_state_out4;
- CLTensor _cell_state_out5;
- CLTensor _output1;
- CLTensor _output2;
- CLTensor _output3;
- CLTensor _output4;
- CLTensor _cell_state_activation;
- CLTensor _output_state1;
- CLTensor _ones;
- CLTensor _input_layer_norm_out1;
- CLTensor _input_layer_norm_out2;
- CLTensor _forget_layer_norm_out1;
- CLTensor _forget_layer_norm_out2;
- CLTensor _cell_layer_norm_out1;
- CLTensor _cell_layer_norm_out2;
- CLTensor _output_layer_norm_out1;
- CLTensor _output_layer_norm_out2;
- bool _run_peephole_opt;
- bool _run_cifg_opt;
- bool _perform_cell_clipping;
- bool _has_projection_weights;
- bool _perform_projection_clipping;
- bool _is_prepared;
- bool _is_layer_norm_lstm;
+ MemoryGroup _memory_group;
+ CLFullyConnectedLayer _fully_connected_input_gate;
+ CLArithmeticAddition _accum_input_gate1;
+ CLArithmeticSubtraction _subtract_input_gate;
+ CLPixelWiseMultiplication _pixelwise_mul_input_gate;
+ CLActivationLayer _activation_input_gate;
+ CLFullyConnectedLayer _fully_connected_forget_gate;
+ CLArithmeticAddition _accum_forget_gate1;
+ CLPixelWiseMultiplication _pixelwise_mul_forget_gate;
+ CLActivationLayer _activation_forget_gate;
+ CLFullyConnectedLayer _fully_connected_cell_state;
+ CLGEMM _gemm_cell_state1;
+ std::unique_ptr<CLTransposeKernel> _transpose_cell_state;
+ CLArithmeticAddition _accum_cell_state1;
+ CLArithmeticAddition _accum_cell_state2;
+ CLPixelWiseMultiplication _pixelwise_mul_cell_state1;
+ CLActivationLayer _activation_cell_state;
+ CLActivationLayer _cell_clip;
+ CLPixelWiseMultiplication _pixelwise_mul_cell_state2;
+ CLFullyConnectedLayer _fully_connected_output;
+ CLPixelWiseMultiplication _pixelwise_mul_output_state1;
+ CLArithmeticAddition _accum_output1;
+ CLActivationLayer _activation_output;
+ CLActivationLayer _activation_output_state;
+ CLPixelWiseMultiplication _pixelwise_mul_output_state2;
+ CLFullyConnectedLayer _fully_connected_output_state;
+ CLActivationLayer _projection_clip;
+ std::unique_ptr<CLCopyKernel> _copy_cell_state;
+ std::unique_ptr<CLCopyKernel> _copy_output;
+ CLConcatenateLayer _concat_scratch_buffer;
+ CLConcatenateLayer _concat_inputs_forget_gate;
+ CLConcatenateLayer _concat_weights_forget_gate;
+ CLConcatenateLayer _concat_weights_input_gate;
+ CLConcatenateLayer _concat_weights_output;
+ std::unique_ptr<CLMemsetKernel> _ones_memset_kernel;
+ CLMeanStdDevNormalizationLayer _mean_std_norm_input_gate;
+ CLPixelWiseMultiplication _pixelwise_mul_input_gate_coeff;
+ CLArithmeticAddition _accum_input_gate_bias;
+ CLMeanStdDevNormalizationLayer _mean_std_norm_forget_gate;
+ CLPixelWiseMultiplication _pixelwise_mul_forget_gate_coeff;
+ CLArithmeticAddition _accum_forget_gate_bias;
+ CLMeanStdDevNormalizationLayer _mean_std_norm_cell_gate;
+ CLPixelWiseMultiplication _pixelwise_mul_cell_gate_coeff;
+ CLArithmeticAddition _accum_cell_gate_bias;
+ CLMeanStdDevNormalizationLayer _mean_std_norm_output_gate;
+ CLPixelWiseMultiplication _pixelwise_mul_output_gate_coeff;
+ CLArithmeticAddition _accum_output_gate_bias;
+ CLTensor _input_gate_out1;
+ CLTensor _input_gate_out2;
+ CLTensor _input_gate_out3;
+ CLTensor _input_gate_out4;
+ CLTensor _forget_gate_out1;
+ CLTensor _forget_gate_out2;
+ CLTensor _forget_gate_out3;
+ CLTensor _forget_gate_out4;
+ CLTensor _forget_gate_out5;
+ CLTensor _forget_gate_out6;
+ CLTensor _cell_state_out1;
+ CLTensor _cell_state_out2;
+ CLTensor _cell_state_out3;
+ CLTensor _cell_state_out4;
+ CLTensor _cell_state_out5;
+ CLTensor _output1;
+ CLTensor _output2;
+ CLTensor _output3;
+ CLTensor _output4;
+ CLTensor _cell_state_activation;
+ CLTensor _output_state1;
+ CLTensor _ones;
+ CLTensor _input_layer_norm_out1;
+ CLTensor _input_layer_norm_out2;
+ CLTensor _forget_layer_norm_out1;
+ CLTensor _forget_layer_norm_out2;
+ CLTensor _cell_layer_norm_out1;
+ CLTensor _cell_layer_norm_out2;
+ CLTensor _output_layer_norm_out1;
+ CLTensor _output_layer_norm_out2;
+ bool _run_peephole_opt;
+ bool _run_cifg_opt;
+ bool _perform_cell_clipping;
+ bool _has_projection_weights;
+ bool _perform_projection_clipping;
+ bool _is_prepared;
+ bool _is_layer_norm_lstm;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_CLLSTMLAYER_H */
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/CL/kernels/CLCol2ImKernel.h"
-#include "arm_compute/core/CL/kernels/CLIm2ColKernel.h"
-#include "arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h"
-#include "arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/IMemoryManager.h"
namespace arm_compute
{
+class CLCompileContext;
+class CLCol2ImKernel;
+class CLIm2ColKernel;
+class CLWeightsReshapeKernel;
+class CLLocallyConnectedMatrixMultiplyKernel;
class ICLTensor;
+class ITensorInfo;
/** Basic function to compute the locally connected layer. This function calls the following OpenCL kernels:
*
void prepare() override;
private:
- MemoryGroup _memory_group;
- CLIm2ColKernel _input_im2col_kernel;
- CLWeightsReshapeKernel _weights_reshape_kernel;
- CLLocallyConnectedMatrixMultiplyKernel _mm_kernel;
- CLCol2ImKernel _output_col2im_kernel;
- CLTensor _input_im2col_reshaped;
- CLTensor _weights_reshaped;
- CLTensor _gemm_output;
- bool _is_prepared;
- const ICLTensor *_original_weights;
+ MemoryGroup _memory_group;
+ std::unique_ptr<CLIm2ColKernel> _input_im2col_kernel;
+ std::unique_ptr<CLWeightsReshapeKernel> _weights_reshape_kernel;
+ std::unique_ptr<CLLocallyConnectedMatrixMultiplyKernel> _mm_kernel;
+ std::unique_ptr<CLCol2ImKernel> _output_col2im_kernel;
+ CLTensor _input_im2col_reshaped;
+ CLTensor _weights_reshaped;
+ CLTensor _gemm_output;
+ bool _is_prepared;
+ const ICLTensor *_original_weights;
};
}
#endif /* ARM_COMPUTE_CLLOCALLYCONNECTEDLAYER_H */
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to run @ref CLMagnitudePhaseKernel. */
#ifndef ARM_COMPUTE_CLMAXUNPOOLINGLAYER_H
#define ARM_COMPUTE_CLMAXUNPOOLINGLAYER_H
+#include "arm_compute/core/Error.h"
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/CL/kernels/CLMaxUnpoolingLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLMemsetKernel.h"
+#include <memory>
namespace arm_compute
{
-class ITensor;
+class CLCompileContext;
+class ICLTensor;
+class ITensorInfo;
+class CLMaxUnpoolingLayerKernel;
+class CLMemsetKernel;
+struct PoolingLayerInfo;
/** Function to perform MaxUnpooling. This function calls the following OpenCL kernels:
*
public:
/** Constructor */
CLMaxUnpoolingLayer();
+ /** Prevent instances of this class from being copied */
+ CLMaxUnpoolingLayer(const CLMaxUnpoolingLayer &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLMaxUnpoolingLayer &operator=(const CLMaxUnpoolingLayer &) = delete;
+ /** Default destructor */
+ ~CLMaxUnpoolingLayer();
/** Set the input and output tensors.
*
* @note Output shape must be equal to the shape of the original input to pool.
void run() override;
private:
- CLMemsetKernel _memset_kernel;
- CLMaxUnpoolingLayerKernel _unpooling_layer_kernel;
+ std::unique_ptr<CLMemsetKernel> _memset_kernel;
+ std::unique_ptr<CLMaxUnpoolingLayerKernel> _unpooling_layer_kernel;
};
}
#endif /* ARM_COMPUTE_CLMAXUNPOOLINGLAYER_H */
#define ARM_COMPUTE_CLMEANSTDDEV_H
#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
-#include "arm_compute/core/CL/kernels/CLMeanStdDevKernel.h"
#include "arm_compute/runtime/CL/functions/CLReductionOperation.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/IMemoryManager.h"
#include "arm_compute/runtime/MemoryGroup.h"
+#include <memory>
+
namespace arm_compute
{
+class CLCompileContext;
+class ICLTensor;
+class ITensorInfo;
+class CLFillBorderKernel;
+class CLMeanStdDevKernel;
/** Basic function to execute mean and standard deviation by calling @ref CLMeanStdDevKernel */
class CLMeanStdDev : public IFunction
{
/** Allow instances of this class to be moved */
CLMeanStdDev &operator=(CLMeanStdDev &&) = default;
/** Default destructor */
- ~CLMeanStdDev() = default;
+ ~CLMeanStdDev();
/** Initialise the kernel's inputs and outputs.
*
* @param[in, out] input Input image. Data types supported: U8/F16/F32. (Written to only for border filling)
void run_float();
void run_int();
- MemoryGroup _memory_group; /**< Function's memory group */
- DataType _data_type; /**< Input data type. */
- unsigned int _num_pixels; /**< Number of image's pixels. */
- bool _run_stddev; /**< Flag for knowing if we should run stddev reduction function. */
- CLReductionOperation _reduction_operation_mean; /**< Reduction operation function for computing mean value. */
- CLReductionOperation _reduction_operation_stddev; /**< Reduction operation function for computing standard deviation. */
- CLTensor _reduction_output_mean; /**< Reduction operation output tensor for mean value. */
- CLTensor _reduction_output_stddev; /**< Reduction operation output tensor for standard deviation value. */
- float *_mean; /**< Pointer that holds the mean value. */
- float *_stddev; /**< Pointer that holds the standard deviation value. */
- CLMeanStdDevKernel _mean_stddev_kernel; /**< Kernel that standard deviation calculation. */
- CLFillBorderKernel _fill_border_kernel; /**< Kernel that fills the border with zeroes. */
- cl::Buffer _global_sum; /**< Variable that holds the global sum among calls in order to ease reduction */
- cl::Buffer _global_sum_squared; /**< Variable that holds the global sum of squared values among calls in order to ease reduction */
+ MemoryGroup _memory_group; /**< Function's memory group */
+ DataType _data_type; /**< Input data type. */
+ unsigned int _num_pixels; /**< Number of image's pixels. */
+ bool _run_stddev; /**< Flag for knowing if we should run stddev reduction function. */
+ CLReductionOperation _reduction_operation_mean; /**< Reduction operation function for computing mean value. */
+ CLReductionOperation _reduction_operation_stddev; /**< Reduction operation function for computing standard deviation. */
+ CLTensor _reduction_output_mean; /**< Reduction operation output tensor for mean value. */
+ CLTensor _reduction_output_stddev; /**< Reduction operation output tensor for standard deviation value. */
+ float *_mean; /**< Pointer that holds the mean value. */
+ float *_stddev; /**< Pointer that holds the standard deviation value. */
+ std::unique_ptr<CLMeanStdDevKernel> _mean_stddev_kernel; /**< Kernel that standard deviation calculation. */
+ std::unique_ptr<CLFillBorderKernel> _fill_border_kernel; /**< Kernel that fills the border with zeroes. */
+ cl::Buffer _global_sum; /**< Variable that holds the global sum among calls in order to ease reduction */
+ cl::Buffer _global_sum_squared; /**< Variable that holds the global sum of squared values among calls in order to ease reduction */
};
}
#endif /*ARM_COMPUTE_CLMEANSTDDEV_H */
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to execute mean and standard deviation normalization by calling @ref CLMeanStdDevNormalizationKernel */
class CLMeanStdDevNormalizationLayer : public ICLSimpleFunction
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to execute median filter. This function calls the following OpenCL kernels:
#ifndef ARM_COMPUTE_CLMINMAXLOCATION_H
#define ARM_COMPUTE_CLMINMAXLOCATION_H
-#include "arm_compute/core/CL/kernels/CLMinMaxLocationKernel.h"
#include "arm_compute/runtime/CL/CLArray.h"
#include "arm_compute/runtime/IFunction.h"
+#include <memory>
+
namespace arm_compute
{
+class CLCompileContext;
+class CLMinMaxKernel;
+class CLMinMaxLocationKernel;
class ICLTensor;
using ICLImage = ICLTensor;
CLMinMaxLocation(CLMinMaxLocation &&) = default;
/** Allow instances of this class to be moved */
CLMinMaxLocation &operator=(CLMinMaxLocation &&) = default;
+ /** Default destructor */
+ ~CLMinMaxLocation();
/** Initialise the kernel's inputs and outputs.
*
* @note When locations of min and max occurrences are requested, the reported number of locations is limited to the given array size.
void run() override;
private:
- CLMinMaxKernel _min_max_kernel; /**< Kernel that performs min/max */
- CLMinMaxLocationKernel _min_max_loc_kernel; /**< Kernel that counts min/max occurrences and identifies their positions */
- cl::Buffer _min_max_vals; /**< Buffer to collect min, max values */
- cl::Buffer _min_max_count_vals; /**< Buffer to collect min, max values */
- void *_min; /**< Minimum value. */
- void *_max; /**< Maximum value. */
- uint32_t *_min_count; /**< Minimum value occurrences. */
- uint32_t *_max_count; /**< Maximum value occurrences. */
- CLCoordinates2DArray *_min_loc; /**< Minimum value occurrences coordinates. */
- CLCoordinates2DArray *_max_loc; /**< Maximum value occurrences coordinates. */
+ std::unique_ptr<CLMinMaxKernel> _min_max_kernel; /**< Kernel that performs min/max */
+ std::unique_ptr<CLMinMaxLocationKernel> _min_max_loc_kernel; /**< Kernel that counts min/max occurrences and identifies their positions */
+ cl::Buffer _min_max_vals; /**< Buffer to collect min, max values */
+ cl::Buffer _min_max_count_vals; /**< Buffer to collect min, max values */
+ void *_min; /**< Minimum value. */
+ void *_max; /**< Maximum value. */
+ uint32_t *_min_count; /**< Minimum value occurrences. */
+ uint32_t *_max_count; /**< Maximum value occurrences. */
+ CLCoordinates2DArray *_min_loc; /**< Minimum value occurrences coordinates. */
+ CLCoordinates2DArray *_max_loc; /**< Maximum value occurrences coordinates. */
};
}
#endif /*ARM_COMPUTE_CLMINMAXLOCATION_H */
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to execute non linear filter. This function calls the following OpenCL kernels:
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to execute non-maxima suppression over a 3x3 window. This function calls the following CL kernels:
#ifndef ARM_COMPUTE_CLNORMALIZATIONLAYER_H
#define ARM_COMPUTE_CLNORMALIZATIONLAYER_H
-#include "arm_compute/runtime/IFunction.h"
-
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
-#include "arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h"
+#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/Types.h"
+#include <memory>
namespace arm_compute
{
+class CLCompileContext;
+class CLFillBorderKernel;
+class CLNormalizationLayerKernel;
class ICLTensor;
+class ITensorInfo;
/** Basic function to compute a normalization layer. This function calls the following CL kernels:
*
public:
/** Default constructor */
CLNormalizationLayer();
+ /** Prevent instances of this class from being copied */
+ CLNormalizationLayer(const CLNormalizationLayer &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLNormalizationLayer &operator=(const CLNormalizationLayer &) = delete;
+ /** Prevent instances of this class to be moved */
+ CLNormalizationLayer(CLNormalizationLayer &&) = delete;
+ /** Prevent instances of this class to be moved */
+ CLNormalizationLayer &operator=(CLNormalizationLayer &&) = delete;
+ /** Default destructor */
+ ~CLNormalizationLayer();
/** Set the input and output tensors.
*
* @param[in, out] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM],
void run() override;
private:
- CLNormalizationLayerKernel _norm_kernel; /**< Normalization layer kernel to run */
- CLFillBorderKernel _border_handler; /**< Kernel to handle borders */
+ std::unique_ptr<CLNormalizationLayerKernel> _norm_kernel; /**< Normalization layer kernel to run */
+ std::unique_ptr<CLFillBorderKernel> _border_handler; /**< Kernel to handle borders */
};
}
#endif /* ARM_COMPUTE_CLNORMALIZATIONLAYER_H */
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLNormalizePlanarYUVLayerKernel
*
#ifndef ARM_COMPUTE_CLOPTICALFLOW_H
#define ARM_COMPUTE_CLOPTICALFLOW_H
-#include "arm_compute/core/CL/kernels/CLLKTrackerKernel.h"
-
#include "arm_compute/core/IArray.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLArray.h"
namespace arm_compute
{
+class CLCompileContext;
class CLPyramid;
+class CLLKTrackerInitKernel;
+class CLLKTrackerStage0Kernel;
+class CLLKTrackerStage1Kernel;
+class CLLKTrackerFinalizeKernel;
/** OpenCL Array of Internal Keypoints */
using CLLKInternalKeypointArray = CLArray<CLLKInternalKeypoint>;
CLOpticalFlow(CLOpticalFlow &&) = default;
/** Allow instances of this class to be moved */
CLOpticalFlow &operator=(CLOpticalFlow &&) = default;
+ /** Default destructor */
+ ~CLOpticalFlow();
/** Initialise the function input and output
*
* @param[in] old_pyramid Pointer to the pyramid for the old tensor. Data types supported U8
void run() override;
private:
- MemoryGroup _memory_group;
- std::vector<CLLKTrackerInitKernel> _tracker_init_kernel;
- std::vector<CLLKTrackerStage0Kernel> _tracker_stage0_kernel;
- std::vector<CLLKTrackerStage1Kernel> _tracker_stage1_kernel;
- CLLKTrackerFinalizeKernel _tracker_finalize_kernel;
- std::vector<CLScharr3x3> _func_scharr;
- std::vector<CLTensor> _scharr_gx;
- std::vector<CLTensor> _scharr_gy;
- const ICLKeyPointArray *_old_points;
- const ICLKeyPointArray *_new_points_estimates;
- ICLKeyPointArray *_new_points;
- std::unique_ptr<CLLKInternalKeypointArray> _old_points_internal;
- std::unique_ptr<CLLKInternalKeypointArray> _new_points_internal;
- std::unique_ptr<CLCoefficientTableArray> _coefficient_table;
- std::unique_ptr<CLOldValueArray> _old_values;
- size_t _num_levels;
+ MemoryGroup _memory_group;
+ std::vector<std::unique_ptr<CLLKTrackerInitKernel>> _tracker_init_kernel;
+ std::vector<std::unique_ptr<CLLKTrackerStage0Kernel>> _tracker_stage0_kernel;
+ std::vector<std::unique_ptr<CLLKTrackerStage1Kernel>> _tracker_stage1_kernel;
+ std::unique_ptr<CLLKTrackerFinalizeKernel> _tracker_finalize_kernel;
+ std::vector<CLScharr3x3> _func_scharr;
+ std::vector<CLTensor> _scharr_gx;
+ std::vector<CLTensor> _scharr_gy;
+ const ICLKeyPointArray *_old_points;
+ const ICLKeyPointArray *_new_points_estimates;
+ ICLKeyPointArray *_new_points;
+ std::unique_ptr<CLLKInternalKeypointArray> _old_points_internal;
+ std::unique_ptr<CLLKInternalKeypointArray> _new_points_internal;
+ std::unique_ptr<CLCoefficientTableArray> _coefficient_table;
+ std::unique_ptr<CLOldValueArray> _old_values;
+ size_t _num_levels;
};
}
#endif /*ARM_COMPUTE_CLOPTICALFLOW_H */
#ifndef ARM_COMPUTE_CLPRELULAYER_H
#define ARM_COMPUTE_CLPRELULAYER_H
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
#include "arm_compute/runtime/CL/ICLOperator.h"
#include "arm_compute/runtime/IFunction.h"
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
namespace experimental
{
#ifndef ARM_COMPUTE_CLPADLAYER_H
#define ARM_COMPUTE_CLPADLAYER_H
-#include "arm_compute/core/CL/kernels/CLCopyKernel.h"
-#include "arm_compute/core/CL/kernels/CLPadLayerKernel.h"
+#include "arm_compute/core/Error.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/IFunction.h"
namespace arm_compute
{
+class CLCompileContext;
+class CLPadLayerKernel;
+class CLCopyKernel;
class ICLTensor;
/** Basic function to pad a tensor. This function calls the following OpenCL functions/kernels:
CLPadLayer &operator=(const CLPadLayer &) = delete;
/** Default move assignment operator */
CLPadLayer &operator=(CLPadLayer &&) = default;
+ /** Default destructor */
+ ~CLPadLayer();
/** Initialize the function
*
private:
void configure_reflect_mode(ICLTensor *input, ICLTensor *output);
- CLPadLayerKernel _pad_kernel;
- CLCopyKernel _copy_kernel;
- bool _perform_pad;
+ std::unique_ptr<CLPadLayerKernel> _pad_kernel;
+ std::unique_ptr<CLCopyKernel> _copy_kernel;
+ bool _perform_pad;
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_PADLAYER_H */
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to execute an @ref CLPermuteKernel. */
class CLPermute : public ICLSimpleFunction
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to execute an @ref CLMagnitudePhaseKernel. */
#ifndef ARM_COMPUTE_CLPIXELWISEMULTIPLICATION_H
#define ARM_COMPUTE_CLPIXELWISEMULTIPLICATION_H
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
#include "arm_compute/runtime/CL/ICLOperator.h"
#include "arm_compute/runtime/IFunction.h"
namespace arm_compute
{
// Forward declaration
+class CLCompileContext;
+class CLFillBorderKernel;
class ICLTensor;
+class ITensorInfo;
namespace experimental
{
void run(ITensorPack &tensors) override;
private:
- CLFillBorderKernel _border_handler;
+ std::unique_ptr<CLFillBorderKernel> _border_handler;
};
/** Basic function to run @ref CLComplexPixelWiseMultiplicationKernel. */
void run(ITensorPack &tensors) override;
private:
- CLFillBorderKernel _border_handler;
+ std::unique_ptr<CLFillBorderKernel> _border_handler;
};
} // namespace experimental
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to simulate a pooling layer with the specified pooling operation. This function calls the following OpenCL kernels:
*
#ifndef ARM_COMPUTE_CLPRIORBOXLAYER_H
#define ARM_COMPUTE_CLPRIORBOXLAYER_H
-#include "arm_compute/core/CL/kernels/CLPriorBoxLayerKernel.h"
+#include "arm_compute/core/CL/OpenCL.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
namespace arm_compute
{
+class CLCompileContext;
+class CLPriorBoxLayerKernel;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLPriorBoxLayerKernel. */
class CLPriorBoxLayer : public ICLSimpleFunction
#ifndef ARM_COMPUTE_CLQLSTMLAYER_H
#define ARM_COMPUTE_CLQLSTMLAYER_H
-#include "arm_compute/core/CL/kernels/CLCopyKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h"
-#include "arm_compute/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/functions/CLActivationLayer.h"
#include "arm_compute/runtime/CL/functions/CLElementwiseOperations.h"
namespace arm_compute
{
// Forward declarations
+class CLCompileContext;
+class CLCopyKernel;
class ICLTensor;
+class CLGEMMLowpMatrixAReductionKernel;
+class CLQLSTMLayerNormalizationKernel;
+class ITensorInfo;
/** Basic function to run @ref CLQLSTMLayer
*
CLQLSTMLayer &operator=(const CLQLSTMLayer &) = delete;
/** Default move assignment operator */
CLQLSTMLayer &operator=(CLQLSTMLayer &&) = default;
+ /** Default destructor */
+ ~CLQLSTMLayer();
/** Initialize function's tensors.
*
* @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: QASYMM8_SIGNED.
};
// Functions used
- CLTranspose _transpose_input_to_forget_weights{};
- CLTranspose _transpose_input_to_cell_weights{};
- CLTranspose _transpose_input_to_output_weights{};
- CLTranspose _transpose_input_to_input_weights{};
- CLTranspose _transpose_recurrent_to_forget_weights{};
- CLTranspose _transpose_recurrent_to_cell_weights{};
- CLTranspose _transpose_recurrent_to_output_weights{};
- CLTranspose _transpose_recurrent_to_input_weights{};
- CLTranspose _transpose_projection_weights{};
- CLGEMMLowpMatrixAReductionKernel _input_to_input_reduction{};
- CLGEMMLowpMatrixAReductionKernel _recurrent_to_input_reduction{};
- CLGEMMLowpMatrixAReductionKernel _input_to_forget_reduction{};
- CLGEMMLowpMatrixAReductionKernel _recurrent_to_forget_reduction{};
- CLGEMMLowpMatrixAReductionKernel _input_to_cell_reduction{};
- CLGEMMLowpMatrixAReductionKernel _recurrent_to_cell_reduction{};
- CLGEMMLowpMatrixAReductionKernel _input_to_output_reduction{};
- CLGEMMLowpMatrixAReductionKernel _recurrent_to_output_reduction{};
- CLGEMMLowpMatrixAReductionKernel _projection_reduction{};
- CLArithmeticAddition _projection_bias_add{};
- CLGEMMLowpMatrixMultiplyCore _mm_input_to_forget{};
- CLGEMMLowpMatrixMultiplyCore _mm_recurrent_to_forget{};
- CLPixelWiseMultiplication _pixelwise_mul_cell_to_forget{};
- CLGEMMLowpOutputStage _input_to_forget_outstage{};
- CLGEMMLowpOutputStage _recurrent_to_forget_outstage{};
- CLGEMMLowpOutputStage _cell_to_forget_outstage{};
- CLArithmeticAddition _accumulate_input_recurrent_forget{};
- CLArithmeticAddition _accumulate_cell_forget{};
- CLActivationLayer _forget_gate_sigmoid{};
- CLGEMMLowpMatrixMultiplyCore _mm_input_to_cell{};
- CLGEMMLowpOutputStage _input_to_cell_outstage{};
- CLGEMMLowpMatrixMultiplyCore _mm_recurrent_to_cell{};
- CLGEMMLowpOutputStage _recurrent_to_cell_outstage{};
- CLArithmeticAddition _accumulate_input_recurrent_modulation{};
- CLActivationLayer _cell_gate_tanh{};
- CLArithmeticSubtraction _input_gate_sub{};
- CLGEMMLowpMatrixMultiplyCore _mm_input_to_input{};
- CLGEMMLowpOutputStage _input_to_input_outstage{};
- CLGEMMLowpMatrixMultiplyCore _mm_recurrent_to_input{};
- CLGEMMLowpOutputStage _recurrent_to_input_outstage{};
- CLArithmeticAddition _accumulate_input_recurrent_input{};
- CLPixelWiseMultiplication _pixelwise_mul_cell_to_input{};
- CLGEMMLowpOutputStage _cell_to_input_outstage{};
- CLArithmeticAddition _accumulate_cell_input{};
- CLActivationLayer _input_gate_sigmoid{};
- CLPixelWiseMultiplication _pixelwise_mul_forget_cell{};
- CLPixelWiseMultiplication _pixelwise_mul_input_cell{};
- CLArithmeticAddition _add_forget_cell{};
- CLActivationLayer _cell_clip{};
- CLGEMMLowpMatrixMultiplyCore _mm_input_to_output{};
- CLGEMMLowpOutputStage _input_to_output_outstage{};
- CLGEMMLowpMatrixMultiplyCore _mm_recurrent_to_output{};
- CLGEMMLowpOutputStage _recurrent_to_output_outstage{};
- CLArithmeticAddition _accumulate_input_recurrent_output{};
- CLPixelWiseMultiplication _pixelwise_mul_cell_to_output{};
- CLGEMMLowpOutputStage _cell_to_output_outstage{};
- CLArithmeticAddition _accumulate_cell_to_output{};
- CLActivationLayer _output_gate_sigmoid{};
- CLActivationLayer _hidden_tanh{};
- CLPixelWiseMultiplication _pixelwise_mul_hidden{};
- CLGEMMLowpOutputStage _hidden_outstage{};
- CLGEMMLowpMatrixMultiplyCore _mm_projection{};
- CLGEMMLowpOutputStage _projection_outstage{};
- CLArithmeticAddition _accumulate_projection{};
- CLActivationLayer _projection_clip{};
- std::array<CLQLSTMLayerNormalizationKernel, _layer_norm_count> _layer_norms{ {} };
- CLCopyKernel _copy_output{};
+ CLTranspose _transpose_input_to_forget_weights{};
+ CLTranspose _transpose_input_to_cell_weights{};
+ CLTranspose _transpose_input_to_output_weights{};
+ CLTranspose _transpose_input_to_input_weights{};
+ CLTranspose _transpose_recurrent_to_forget_weights{};
+ CLTranspose _transpose_recurrent_to_cell_weights{};
+ CLTranspose _transpose_recurrent_to_output_weights{};
+ CLTranspose _transpose_recurrent_to_input_weights{};
+ CLTranspose _transpose_projection_weights{};
+ std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _input_to_input_reduction;
+ std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _recurrent_to_input_reduction;
+ std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _input_to_forget_reduction;
+ std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _recurrent_to_forget_reduction;
+ std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _input_to_cell_reduction;
+ std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _recurrent_to_cell_reduction;
+ std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _input_to_output_reduction;
+ std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _recurrent_to_output_reduction;
+ std::unique_ptr<CLGEMMLowpMatrixAReductionKernel> _projection_reduction;
+ CLArithmeticAddition _projection_bias_add{};
+ CLGEMMLowpMatrixMultiplyCore _mm_input_to_forget{};
+ CLGEMMLowpMatrixMultiplyCore _mm_recurrent_to_forget{};
+ CLPixelWiseMultiplication _pixelwise_mul_cell_to_forget{};
+ CLGEMMLowpOutputStage _input_to_forget_outstage{};
+ CLGEMMLowpOutputStage _recurrent_to_forget_outstage{};
+ CLGEMMLowpOutputStage _cell_to_forget_outstage{};
+ CLArithmeticAddition _accumulate_input_recurrent_forget{};
+ CLArithmeticAddition _accumulate_cell_forget{};
+ CLActivationLayer _forget_gate_sigmoid{};
+ CLGEMMLowpMatrixMultiplyCore _mm_input_to_cell{};
+ CLGEMMLowpOutputStage _input_to_cell_outstage{};
+ CLGEMMLowpMatrixMultiplyCore _mm_recurrent_to_cell{};
+ CLGEMMLowpOutputStage _recurrent_to_cell_outstage{};
+ CLArithmeticAddition _accumulate_input_recurrent_modulation{};
+ CLActivationLayer _cell_gate_tanh{};
+ CLArithmeticSubtraction _input_gate_sub{};
+ CLGEMMLowpMatrixMultiplyCore _mm_input_to_input{};
+ CLGEMMLowpOutputStage _input_to_input_outstage{};
+ CLGEMMLowpMatrixMultiplyCore _mm_recurrent_to_input{};
+ CLGEMMLowpOutputStage _recurrent_to_input_outstage{};
+ CLArithmeticAddition _accumulate_input_recurrent_input{};
+ CLPixelWiseMultiplication _pixelwise_mul_cell_to_input{};
+ CLGEMMLowpOutputStage _cell_to_input_outstage{};
+ CLArithmeticAddition _accumulate_cell_input{};
+ CLActivationLayer _input_gate_sigmoid{};
+ CLPixelWiseMultiplication _pixelwise_mul_forget_cell{};
+ CLPixelWiseMultiplication _pixelwise_mul_input_cell{};
+ CLArithmeticAddition _add_forget_cell{};
+ CLActivationLayer _cell_clip{};
+ CLGEMMLowpMatrixMultiplyCore _mm_input_to_output{};
+ CLGEMMLowpOutputStage _input_to_output_outstage{};
+ CLGEMMLowpMatrixMultiplyCore _mm_recurrent_to_output{};
+ CLGEMMLowpOutputStage _recurrent_to_output_outstage{};
+ CLArithmeticAddition _accumulate_input_recurrent_output{};
+ CLPixelWiseMultiplication _pixelwise_mul_cell_to_output{};
+ CLGEMMLowpOutputStage _cell_to_output_outstage{};
+ CLArithmeticAddition _accumulate_cell_to_output{};
+ CLActivationLayer _output_gate_sigmoid{};
+ CLActivationLayer _hidden_tanh{};
+ CLPixelWiseMultiplication _pixelwise_mul_hidden{};
+ CLGEMMLowpOutputStage _hidden_outstage{};
+ CLGEMMLowpMatrixMultiplyCore _mm_projection{};
+ CLGEMMLowpOutputStage _projection_outstage{};
+ CLArithmeticAddition _accumulate_projection{};
+ CLActivationLayer _projection_clip{};
+ std::array<std::unique_ptr<CLQLSTMLayerNormalizationKernel>, _layer_norm_count> _layer_norms;
+ std::unique_ptr<CLCopyKernel> _copy_output;
TensorCopyKernel _projection_bias_copy{};
TensorCopyKernel _projection_output_to_accumulate_copy{};
inline CLQLSTMLayerNormalizationKernel &get_layer_norm(LayerNormGate g)
{
- return _layer_norms[getGateIndex(g)];
+ return *_layer_norms[getGateIndex(g)];
}
- inline void configure_layer_norm(LayerNormGate g, const ICLTensor *in)
- {
- ARM_COMPUTE_ERROR_ON(!_has_layer_norm);
-
- CLTensor *out = &get_layer_norm_output(g);
- _memory_group.manage(out);
- out->allocator()->init(*(in->info()));
-
- get_layer_norm(g).configure(in, out, get_layer_norm_weight(g), get_layer_norm_bias(g));
- }
-
- inline static Status validate_layer_norm(const ITensorInfo &in, const ITensorInfo &weight, const ITensorInfo &bias)
- {
- // Output quantization scale will be different, but ignored here
- // since it will be configured at configure() stage.
- const TensorInfo out
- {
- in
- };
- return CLQLSTMLayerNormalizationKernel::validate(&in, &out, &weight, &bias);
- }
+ inline void configure_layer_norm(LayerNormGate g, const ICLTensor *in);
+ inline static Status validate_layer_norm(const ITensorInfo &in, const ITensorInfo &weight, const ITensorInfo &bias);
// Temporary tensors
CLTensor _input_to_forget_weights_transposed{ nullptr };
#ifndef ARM_COMPUTE_CLQUANTIZATIONLAYER_H
#define ARM_COMPUTE_CLQUANTIZATIONLAYER_H
+#include "arm_compute/core/Error.h"
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to simulate a quantization layer. This function calls the following CL kernels:
*
#ifndef ARM_COMPUTE_CLRNN_LAYER_H
#define ARM_COMPUTE_CLRNN_LAYER_H
-#include "arm_compute/core/CL/kernels/CLCopyKernel.h"
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
#include "arm_compute/runtime/CL/functions/CLActivationLayer.h"
#include "arm_compute/runtime/CL/functions/CLElementwiseOperations.h"
#include "arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h"
#include "arm_compute/runtime/CL/functions/CLGEMM.h"
+#include <memory>
+
namespace arm_compute
{
+class CLCopyKernel;
class ICLTensor;
/** Basic function to run @ref CLRNNLayer */
public:
/** Default constructor */
CLRNNLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied */
+ CLRNNLayer(const CLRNNLayer &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLRNNLayer &operator=(const CLRNNLayer &) = delete;
+ /** Default destructor */
+ ~CLRNNLayer();
/** Initialize the function
*
* @param[in] input Input is a 2-D tensor of shape [input_size, batch_size]. Data types supported: F16/F32
void prepare() override;
private:
- MemoryGroup _memory_group;
- CLGEMM _gemm_state_f;
- CLArithmeticAddition _add_kernel;
- CLActivationLayer _activation;
- CLFullyConnectedLayer _fully_connected_kernel;
- CLCopyKernel _copy_kernel;
- CLTensor _fully_connected_out;
- CLTensor _gemm_output;
- CLTensor _add_output;
- bool _is_prepared;
+ MemoryGroup _memory_group;
+ CLGEMM _gemm_state_f;
+ CLArithmeticAddition _add_kernel;
+ CLActivationLayer _activation;
+ CLFullyConnectedLayer _fully_connected_kernel;
+ std::unique_ptr<CLCopyKernel> _copy_kernel;
+ CLTensor _fully_connected_out;
+ CLTensor _gemm_output;
+ CLTensor _add_output;
+ bool _is_prepared;
};
}
#endif /* ARM_COMPUTE_CLRNN_LAYER_H */
#define ARM_COMPUTE_CLROIALIGNLAYER_H
#include "arm_compute/core/CL/ICLArray.h"
-#include "arm_compute/core/CL/kernels/CLROIPoolingLayerKernel.h"
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ROIPoolingLayerInfo;
+class ITensorInfo;
/** Basic function to run @ref CLROIAlignLayerKernel.
*
#ifndef ARM_COMPUTE_CLROIPOOLINGLAYER_H
#define ARM_COMPUTE_CLROIPOOLINGLAYER_H
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
#include "arm_compute/core/CL/ICLArray.h"
-#include "arm_compute/core/CL/kernels/CLROIPoolingLayerKernel.h"
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ROIPoolingLayerInfo;
/** Basic function to run @ref CLROIPoolingLayerKernel.
*
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLRangeKernel
*
#ifndef ARM_COMPUTE_CLREDUCTIONOPERATION_H
#define ARM_COMPUTE_CLREDUCTIONOPERATION_H
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
-#include "arm_compute/core/CL/kernels/CLReductionOperationKernel.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/functions/CLReshapeLayer.h"
#include "arm_compute/runtime/IFunction.h"
namespace arm_compute
{
// Forward declarations
+class CLCompileContext;
+class CLFillBorderKernel;
+class CLReductionOperationKernel;
class ICLTensor;
/** Perform reduction operation.
* @param[in] memory_manager (Optional) Memory manager.
*/
CLReductionOperation(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Default Destructor */
+ ~CLReductionOperation();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLReductionOperation(const CLReductionOperation &) = delete;
+ /** Default move constructor */
+ CLReductionOperation(CLReductionOperation &&) = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLReductionOperation &operator=(const CLReductionOperation &) = delete;
+ /** Default move assignment operator */
+ CLReductionOperation &operator=(CLReductionOperation &&) = default;
/** Set the input and output tensors.
*
private:
ICLTensor *configure_intermediate_result_vector(ICLTensor *input, ICLTensor *output);
- MemoryGroup _memory_group;
- std::vector<CLTensor> _results_vector;
- std::vector<CLReductionOperationKernel> _reduction_kernels_vector;
- std::vector<CLFillBorderKernel> _border_handlers_vector;
- CLReshapeLayer _reshape;
- unsigned int _num_of_stages;
- unsigned int _reduction_axis;
- bool _is_serial;
- bool _is_reshape_required;
+ MemoryGroup _memory_group;
+ std::vector<CLTensor> _results_vector;
+ std::vector<std::unique_ptr<CLReductionOperationKernel>> _reduction_kernels_vector;
+ std::vector<std::unique_ptr<CLFillBorderKernel>> _border_handlers_vector;
+ CLReshapeLayer _reshape;
+ unsigned int _num_of_stages;
+ unsigned int _reduction_axis;
+ bool _is_serial;
+ bool _is_reshape_required;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_CLREDUCTIONOPERATION_H */
\ No newline at end of file
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to execute remap. This function calls the following OpenCL kernels:
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
class CLReorgLayer : public ICLSimpleFunction
{
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLReshapeLayerKernel */
class CLReshapeLayer : public IFunction
#ifndef ARM_COMPUTE_CLREVERSE_H
#define ARM_COMPUTE_CLREVERSE_H
+#include "arm_compute/core/Error.h"
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLReverseKernel */
class CLReverse : public ICLSimpleFunction
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLScaleKernel */
class CLScale : public ICLSimpleFunction
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to execute scharr 3x3 filter. This function calls the following OpenCL kernels:
#ifndef ARM_COMPUTE_CLSELECT_H
#define ARM_COMPUTE_CLSELECT_H
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
namespace arm_compute
{
// Forward declarations
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLSelect */
class CLSelect : public ICLSimpleFunction
{
// Forward Declarations
class ICLTensor;
+class CLCompileContext;
+class ITensorInfo;
namespace experimental
{
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to execute sobel 3x3 filter. This function calls the following OpenCL kernels:
class CLSobel3x3 : public ICLSimpleFunction
{
public:
+ /** Default Constructor */
+ CLSobel3x3() = default;
+ /** Prevent instances of this class from being copied */
+ CLSobel3x3(const CLSobel3x3 &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLSobel3x3 &operator=(const CLSobel3x3 &) = delete;
+ /** Default destructor */
+ ~CLSobel3x3();
/** Initialise the function's source, destinations and border mode.
*
* @note At least one of output_x or output_y must be not NULL.
#ifndef ARM_COMPUTE_CLSOBEL5X5_H
#define ARM_COMPUTE_CLSOBEL5X5_H
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
-#include "arm_compute/core/CL/kernels/CLSobel5x5Kernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/IFunction.h"
namespace arm_compute
{
+class CLCompileContext;
+class CLFillBorderKernel;
+class CLSobel5x5HorKernel;
+class CLSobel5x5VertKernel;
class ICLTensor;
/** Basic function to execute sobel 5x5 filter. This function calls the following OpenCL kernels:
* @param[in] memory_manager (Optional) Memory manager.
*/
CLSobel5x5(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied */
+ CLSobel5x5(const CLSobel5x5 &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLSobel5x5 &operator=(const CLSobel5x5 &) = delete;
+ /** Default destructor */
+ ~CLSobel5x5();
/** Initialise the function's source, destinations and border mode.
*
* @note At least one of output_x or output_y must be not NULL.
void run() override;
protected:
- MemoryGroup _memory_group; /**< Function's memory group */
- CLSobel5x5HorKernel _sobel_hor; /**< Sobel Horizontal 5x5 kernel */
- CLSobel5x5VertKernel _sobel_vert; /**< Sobel Vertical 5x5 kernel */
- CLFillBorderKernel _border_handler; /**< Kernel to handle image borders */
- CLImage _tmp_x; /**< Temporary buffer for Sobel X */
- CLImage _tmp_y; /**< Temporary buffer for Sobel Y */
+ MemoryGroup _memory_group; /**< Function's memory group */
+ std::unique_ptr<CLSobel5x5HorKernel> _sobel_hor; /**< Sobel Horizontal 5x5 kernel */
+ std::unique_ptr<CLSobel5x5VertKernel> _sobel_vert; /**< Sobel Vertical 5x5 kernel */
+ std::unique_ptr<CLFillBorderKernel> _border_handler; /**< Kernel to handle image borders */
+ CLImage _tmp_x; /**< Temporary buffer for Sobel X */
+ CLImage _tmp_y; /**< Temporary buffer for Sobel Y */
};
}
#endif /*ARM_COMPUTE_CLSOBEL5X5_H */
#ifndef ARM_COMPUTE_CLSOBEL7X7_H
#define ARM_COMPUTE_CLSOBEL7X7_H
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
-#include "arm_compute/core/CL/kernels/CLSobel7x7Kernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/IFunction.h"
namespace arm_compute
{
+class CLCompileContext;
+class CLFillBorderKernel;
+class CLSobel7x7HorKernel;
+class CLSobel7x7VertKernel;
class ICLTensor;
/** Basic function to execute sobel 7x7 filter. This function calls the following OpenCL kernels:
* @param[in] memory_manager (Optional) Memory manager.
*/
CLSobel7x7(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied */
+ CLSobel7x7(const CLSobel7x7 &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLSobel7x7 &operator=(const CLSobel7x7 &) = delete;
+ /** Default destructor */
+ ~CLSobel7x7();
/** Initialise the function's source, destinations and border mode.
*
* @note At least one of output_x or output_y must be not NULL.
void run() override;
protected:
- MemoryGroup _memory_group; /**< Function's memory group */
- CLSobel7x7HorKernel _sobel_hor; /**< Sobel Horizontal 7x7 kernel */
- CLSobel7x7VertKernel _sobel_vert; /**< Sobel Vertical 7x7 kernel */
- CLFillBorderKernel _border_handler; /**< Kernel to handle image borders */
- CLImage _tmp_x; /**< Temporary buffer for Sobel X */
- CLImage _tmp_y; /**< Temporary buffer for Sobel Y */
+ MemoryGroup _memory_group; /**< Function's memory group */
+ std::unique_ptr<CLSobel7x7HorKernel> _sobel_hor; /**< Sobel Horizontal 7x7 kernel */
+ std::unique_ptr<CLSobel7x7VertKernel> _sobel_vert; /**< Sobel Vertical 7x7 kernel */
+ std::unique_ptr<CLFillBorderKernel> _border_handler; /**< Kernel to handle image borders */
+ CLImage _tmp_x; /**< Temporary buffer for Sobel X */
+ CLImage _tmp_y; /**< Temporary buffer for Sobel Y */
};
}
#endif /*ARM_COMPUTE_CLSOBEL7X7_H */
#ifndef ARM_COMPUTE_CLSOFTMAXLAYER_H
#define ARM_COMPUTE_CLSOFTMAXLAYER_H
-#include "arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/functions/CLPermute.h"
#include "arm_compute/runtime/IFunction.h"
namespace arm_compute
{
+class CLCompileContext;
+class CLLogits1DMaxShiftExpSumKernel;
+class CLLogits1DNormKernel;
class ICLTensor;
+class ITensorInfo;
/** Basic function to compute a SoftmaxLayer.
*
public:
/** Constructor */
CLSoftmaxLayerGeneric(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ /** Prevent instances of this class from being copied */
+ CLSoftmaxLayerGeneric(const CLSoftmaxLayerGeneric &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLSoftmaxLayerGeneric &operator=(const CLSoftmaxLayerGeneric &) = delete;
+ /** Prevent instances of this class to be moved */
+ CLSoftmaxLayerGeneric(CLSoftmaxLayerGeneric &&) = delete;
+ /** Prevent instances of this class to be moved */
+ CLSoftmaxLayerGeneric &operator=(CLSoftmaxLayerGeneric &&) = delete;
+ /** Default destructor */
+ ~CLSoftmaxLayerGeneric();
/** Set the input and output tensors.
*
* @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32 for Softmax and F16/F32 for Log Softmax
void run() override;
private:
- MemoryGroup _memory_group;
- CLPermute _permute_input;
- CLPermute _permute_output;
- CLLogits1DMaxShiftExpSumKernel _max_shift_exp_sum_kernel;
- CLLogits1DNormKernel _norm_kernel;
- CLTensor _max;
- CLTensor _sum;
- CLTensor _tmp;
- CLTensor _input_permuted;
- CLTensor _output_permuted;
- bool _needs_permute;
+ MemoryGroup _memory_group;
+ CLPermute _permute_input;
+ CLPermute _permute_output;
+ std::unique_ptr<CLLogits1DMaxShiftExpSumKernel> _max_shift_exp_sum_kernel;
+ std::unique_ptr<CLLogits1DNormKernel> _norm_kernel;
+ CLTensor _max;
+ CLTensor _sum;
+ CLTensor _tmp;
+ CLTensor _input_permuted;
+ CLTensor _output_permuted;
+ bool _needs_permute;
};
using CLSoftmaxLayer = CLSoftmaxLayerGeneric<false>;
#ifndef ARM_COMPUTE_CLSPACETOBATCHLAYER_H
#define ARM_COMPUTE_CLSPACETOBATCHLAYER_H
-#include "arm_compute/runtime/IFunction.h"
-
-#include "arm_compute/core/CL/kernels/CLMemsetKernel.h"
-#include "arm_compute/core/CL/kernels/CLSpaceToBatchLayerKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <memory>
namespace arm_compute
{
+class CLCompileContext;
+class CLMemsetKernel;
+class CLSpaceToBatchLayerKernel;
class ICLTensor;
+class ITensorInfo;
/** Basic function to spatial divide a tensor. This function calls the following OpenCL kernels/functions:
*
/** Allow instances of this class to be moved */
CLSpaceToBatchLayer &operator=(CLSpaceToBatchLayer &&) = default;
/** Default destructor */
- virtual ~CLSpaceToBatchLayer() = default;
+ ~CLSpaceToBatchLayer();
/** Set the input and output tensors.
*
* @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
void run() override;
private:
- CLSpaceToBatchLayerKernel _space_to_batch_kernel; /**< SpaceToBatch kernel to run */
- CLMemsetKernel _memset_kernel; /**< Memset kernel to run */
- bool _has_padding; /**< Flag to check if the output has padding */
+ std::unique_ptr<CLSpaceToBatchLayerKernel> _space_to_batch_kernel; /**< SpaceToBatch kernel to run */
+ std::unique_ptr<CLMemsetKernel> _memset_kernel; /**< Memset kernel to run */
+ bool _has_padding; /**< Flag to check if the output has padding */
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_CLSPACETOBATCHLAYER_H */
#ifndef ARM_COMPUTE_CLSPACETODEPTHLAYER_H
#define ARM_COMPUTE_CLSPACETODEPTHLAYER_H
+#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/CL/kernels/CLSpaceToDepthLayerKernel.h"
-#include "arm_compute/core/Types.h"
+#include <memory>
namespace arm_compute
{
+class CLCompileContext;
+class CLSpaceToDepthLayerKernel;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLSpaceToDepthLayerKernel. */
class CLSpaceToDepthLayer : public IFunction
public:
/** Default constructor */
CLSpaceToDepthLayer();
+ /** Prevent instances of this class from being copied */
+ CLSpaceToDepthLayer(const CLSpaceToDepthLayer &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLSpaceToDepthLayer &operator=(const CLSpaceToDepthLayer &) = delete;
+ /** Prevent instances of this class to be moved */
+ CLSpaceToDepthLayer(CLSpaceToDepthLayer &&) = delete;
+ /** Prevent instances of this class to be moved */
+ CLSpaceToDepthLayer &operator=(CLSpaceToDepthLayer &&) = delete;
+ /** Default destructor */
+ ~CLSpaceToDepthLayer();
/** Set the input and output tensors.
*
* @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
void run() override;
private:
- CLSpaceToDepthLayerKernel _space_to_depth_kernel; /**< CLSpaceToDepthLayerKernel to run */
+ std::unique_ptr<CLSpaceToDepthLayerKernel> _space_to_depth_kernel; /**< CLSpaceToDepthLayerKernel to run */
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_CLSPACETODEPTHLAYER_H */
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/CL/kernels/CLStackLayerKernel.h"
-
#include <memory>
#include <vector>
namespace arm_compute
{
+class CLCompileContext;
+class CLStackLayerKernel;
class ICLTensor;
+class ITensorInfo;
/** Basic function to stack tensors along an axis. This function calls the following kernel:
*
public:
/** Default constructor */
CLStackLayer();
+ /** Prevent instances of this class from being copied */
+ CLStackLayer(const CLStackLayer &) = delete;
+ /** Prevent instances of this class from being copied */
+ CLStackLayer &operator=(const CLStackLayer &) = delete;
+ /** Prevent instances of this class to be moved */
+ CLStackLayer(CLStackLayer &&) = delete;
+ /** Prevent instances of this class to be moved */
+ CLStackLayer &operator=(CLStackLayer &&) = delete;
+ /** Default destructor */
+ ~CLStackLayer();
/** Initialise the kernel's inputs vector and output.
*
* @note Supported input tensor rank: up to 4
void run() override;
private:
- std::vector<ICLTensor *> _input;
- std::vector<CLStackLayerKernel> _stack_kernels;
- unsigned int _num_inputs;
+ std::vector<ICLTensor *> _input;
+ std::vector<std::unique_ptr<CLStackLayerKernel>> _stack_kernels;
+ unsigned int _num_inputs;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_CLSTACKLAYER_H */
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
class ICLLut;
namespace arm_compute
{
// Forward declarations
+class CLCompileContext;
class ICLTensor;
/** Basic function to run @ref CLThresholdKernel */
#ifndef ARM_COMPUTE_CLTILE_H
#define ARM_COMPUTE_CLTILE_H
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLTileKernel */
class CLTile : public ICLSimpleFunction
#ifndef ARM_COMPUTE_CLTRANSPOSE_H
#define ARM_COMPUTE_CLTRANSPOSE_H
+#include "arm_compute/core/Error.h"
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to transpose a matrix on OpenCL. This function calls the following OpenCL kernel:
*
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
+#include <memory>
+
namespace arm_compute
{
+class CLCompileContext;
+class CLUpsampleLayerKernel;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLUpsampleLayerKernel */
class CLUpsampleLayer : public IFunction
/** Allow instances of this class to be moved */
CLUpsampleLayer &operator=(CLUpsampleLayer &&) = default;
/** Default destructor */
- virtual ~CLUpsampleLayer() = default;
+ ~CLUpsampleLayer();
/** Initialize the function's source, destination, interpolation type and border_mode.
*
void run() override;
private:
- CLUpsampleLayerKernel _upsample;
- ICLTensor *_output;
+ std::unique_ptr<CLUpsampleLayerKernel> _upsample;
+ ICLTensor *_output;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_CLUPSAMPLELAYER_H */
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to run @ref CLWarpAffineKernel for AFFINE transformation */
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
/** Basic function to run @ref CLWarpPerspectiveKernel for PERSPECTIVE transformation */
#ifndef ARM_COMPUTE_CLWINOGRADCONVOLUTIONLAYER_H
#define ARM_COMPUTE_CLWINOGRADCONVOLUTIONLAYER_H
-#include "arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h"
-#include "arm_compute/core/CL/kernels/CLWinogradOutputTransformKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/functions/CLGEMM.h"
#include "arm_compute/runtime/CL/functions/CLWinogradInputTransform.h"
namespace arm_compute
{
+class CLCompileContext;
+class CLWinogradFilterTransformKernel;
+class CLWinogradOutputTransformKernel;
class ICLTensor;
+class ITensorInfo;
/** Basic function to execute Winograd-based convolution on OpenCL. This function calls the following OpenCL functions/kernels:
*
CLWinogradConvolutionLayer &operator=(const CLWinogradConvolutionLayer &) = delete;
/** Default move assignment operator */
CLWinogradConvolutionLayer &operator=(CLWinogradConvolutionLayer &&) = default;
+ /** Default destructor */
+ ~CLWinogradConvolutionLayer();
/** Set the input and output tensors.
*
* @note: This function only works with 3x3,3x1,1x3,5x5,5x1,1x5,7x1 and 1x7 kernels along with unit strides for both NCHW and NHWC data layout
void prepare() override;
private:
- MemoryGroup _memory_group;
- CLGEMM _batched_mm;
- CLWinogradInputTransform _input_transform;
- CLWinogradFilterTransformKernel _filter_transform;
- CLWinogradOutputTransformKernel _output_transform;
- CLTensor _input0;
- CLTensor _input1;
- CLTensor _batched_mm_output;
- const ICLTensor *_original_weights;
- bool _is_prepared;
+ MemoryGroup _memory_group;
+ CLGEMM _batched_mm;
+ CLWinogradInputTransform _input_transform;
+ std::unique_ptr<CLWinogradFilterTransformKernel> _filter_transform;
+ std::unique_ptr<CLWinogradOutputTransformKernel> _output_transform;
+ CLTensor _input0;
+ CLTensor _input1;
+ CLTensor _batched_mm_output;
+ const ICLTensor *_original_weights;
+ bool _is_prepared;
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_CLWINOGRADCONVOLUTIONLAYER_H */
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to execute a @ref CLWinogradInputTransformKernel. */
class CLWinogradInputTransform : public ICLSimpleFunction
#ifndef ARM_COMPUTE_CLYOLOLAYER_H
#define ARM_COMPUTE_CLYOLOLAYER_H
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
namespace arm_compute
{
+class CLCompileContext;
class ICLTensor;
+class ITensorInfo;
/** Basic function to run @ref CLYOLOLayerKernel that performs a partial activation on the input
*
#ifndef ARM_COMPUTE_IOPERATOR_H
#define ARM_COMPUTE_IOPERATOR_H
-#include "arm_compute/core/ITensorPack.h"
#include "arm_compute/core/experimental/Types.h"
-#include "arm_compute/runtime/IOperator.h"
#include "arm_compute/runtime/IRuntimeContext.h"
#include "arm_compute/runtime/Types.h"
namespace arm_compute
{
+class ITensorPack;
namespace experimental
{
/** Base class for all async functions */
There are two others implementation of @ref IKernel called @ref ICLSimpleKernel and INESimpleKernel, they are the interface for simple kernels that have just one input tensor and one output tensor.
Creating a new kernel implies adding new files:
-- arm_compute/core/CL/kernels/CLReshapeLayerKernel.h
+- src/core/CL/kernels/CLReshapeLayerKernel.h
- src/core/CL/cl_kernels/reshape_layer.cl
- src/core/CL/kernels/CLReshapeLayerKernel.cpp
- src/core/CL/CLKernelLibrary.cpp
- src/core/NEON/kernels/NEReshapeLayerKernel.cpp
We must register the new layer in the respective libraries:
-- arm_compute/core/CL/CLKernels.h
+- src/core/CL/CLKernels.h
- arm_compute/core/NEON/NEKernels.h
These files contain the list of all kernels available in the corresponding Compute Library's backend, for example CLKernels:
@code{.cpp}
...
-#include "arm_compute/core/CL/kernels/CLMinMaxLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLMinMaxLocationKernel.h"
+#include "src/core/CL/kernels/CLMinMaxLayerKernel.h"
+#include "src/core/CL/kernels/CLMinMaxLocationKernel.h"
...
-#include "arm_compute/core/CL/kernels/CLReshapeLayerKernel.h"
+#include "src/core/CL/kernels/CLReshapeLayerKernel.h"
...
@endcode
* @brief Manages all the OpenCL kernels compilation and caching, provides accessors for the OpenCL Context.
*/
-/** @file arm_compute/core/CL/CLKernels.h
- * @brief Includes all the OpenCL kernels at once
- */
-
/** @file arm_compute/core/CL/OpenCL.h
* @brief Wrapper to configure the Khronos OpenCL C++ header
*/
-/** @dir arm_compute/core/CL/kernels
- * @brief Folder containing all the OpenCL kernels
- */
-
/** @dir arm_compute/core/CPP
* @brief CPP backend core: kernels and utilities.
*/
* @brief Folder containing all the configuration files for GEMM
*/
-/** @dir src/core/CL/cl_kernels
+/** @dir src/core/CL/kernels
* @brief All the OpenCL kernels
*/
/*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/runtime/CL/CLFunctions.h"
-
+#include "arm_compute/core/CL/OpenCL.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLHelpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "arm_compute/runtime/CL/Utils.h"
+#include "arm_compute/runtime/CL/functions/CLPermute.h"
#include "utils/Utils.h"
using namespace arm_compute;
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#error "This example needs to be built with -DARM_COMPUTE_CL"
#endif /* ARM_COMPUTE_CL */
+#include "arm_compute/core/CL/OpenCL.h"
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLFunctions.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "arm_compute/runtime/CL/functions/CLConvolution.h"
#include "utils/ImageLoader.h"
#include "utils/Utils.h"
#endif /* ARM_COMPUTE_CL */
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLFunctions.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "arm_compute/runtime/CL/functions/CLGaussian5x5.h"
+#include "arm_compute/runtime/CL/functions/CLMedian3x3.h"
+#include "arm_compute/runtime/CL/functions/CLScale.h"
#include "utils/ImageLoader.h"
#include "utils/Utils.h"
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#endif /* ARM_COMPUTE_CL */
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLFunctions.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "arm_compute/runtime/CL/CLTuner.h"
+#include "arm_compute/runtime/CL/functions/CLGEMM.h"
#include "utils/Utils.h"
#include <cstdlib>
#endif /* ARM_COMPUTE_CL */
#include "CommonGemmExampleOptions.h"
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/KernelDescriptors.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "arm_compute/runtime/CL/CLFunctions.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "arm_compute/runtime/CL/CLTuner.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h"
#include "tests/CL/Helper.h"
#include "utils/Utils.h"
#include "utils/command_line/CommandLineOptions.h"
#error "This example needs to be built with -DARM_COMPUTE_CL"
#endif /* ARM_COMPUTE_CL */
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/KernelDescriptors.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "arm_compute/runtime/CL/CLFunctions.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "arm_compute/runtime/CL/CLTuner.h"
#include "examples/gemm_tuner/CommonGemmExampleOptions.h"
#include "examples/gemm_tuner/GemmTunerHelpers.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
#include "tests/CL/Helper.h"
#include "utils/Utils.h"
#include "utils/command_line/CommandLineOptions.h"
#include "CommonGemmExampleOptions.h"
#include "GemmTunerHelpers.h"
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/KernelDescriptors.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "arm_compute/runtime/CL/CLFunctions.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "arm_compute/runtime/CL/CLTuner.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h"
#include "tests/CL/Helper.h"
#include "utils/Utils.h"
#include "utils/command_line/CommandLineOptions.h"
#endif /* ARM_COMPUTE_CL */
#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLFunctions.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "arm_compute/runtime/CL/functions/CLGaussian5x5.h"
+#include "arm_compute/runtime/CL/functions/CLScale.h"
#include "arm_compute/runtime/NEON/NEFunctions.h"
#include "utils/ImageLoader.h"
#include "utils/Utils.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLKERNELS_H
+#define ARM_COMPUTE_CLKERNELS_H
+
+/* Header regrouping all the CL kernels */
+#include "src/core/CL/kernels/CLAbsoluteDifferenceKernel.h"
+#include "src/core/CL/kernels/CLAccumulateKernel.h"
+#include "src/core/CL/kernels/CLActivationLayerKernel.h"
+#include "src/core/CL/kernels/CLArgMinMaxLayerKernel.h"
+#include "src/core/CL/kernels/CLBatchConcatenateLayerKernel.h"
+#include "src/core/CL/kernels/CLBatchNormalizationLayerKernel.h"
+#include "src/core/CL/kernels/CLBatchToSpaceLayerKernel.h"
+#include "src/core/CL/kernels/CLBitwiseAndKernel.h"
+#include "src/core/CL/kernels/CLBitwiseNotKernel.h"
+#include "src/core/CL/kernels/CLBitwiseOrKernel.h"
+#include "src/core/CL/kernels/CLBitwiseXorKernel.h"
+#include "src/core/CL/kernels/CLBoundingBoxTransformKernel.h"
+#include "src/core/CL/kernels/CLBox3x3Kernel.h"
+#include "src/core/CL/kernels/CLCannyEdgeKernel.h"
+#include "src/core/CL/kernels/CLChannelCombineKernel.h"
+#include "src/core/CL/kernels/CLChannelExtractKernel.h"
+#include "src/core/CL/kernels/CLChannelShuffleLayerKernel.h"
+#include "src/core/CL/kernels/CLCol2ImKernel.h"
+#include "src/core/CL/kernels/CLColorConvertKernel.h"
+#include "src/core/CL/kernels/CLComparisonKernel.h"
+#include "src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h"
+#include "src/core/CL/kernels/CLConvolutionKernel.h"
+#include "src/core/CL/kernels/CLCopyKernel.h"
+#include "src/core/CL/kernels/CLCropKernel.h"
+#include "src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h"
+#include "src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h"
+#include "src/core/CL/kernels/CLDepthConcatenateLayerKernel.h"
+#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
+#include "src/core/CL/kernels/CLDepthToSpaceLayerKernel.h"
+#include "src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h"
+#include "src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h"
+#include "src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h"
+#include "src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h"
+#include "src/core/CL/kernels/CLDequantizationLayerKernel.h"
+#include "src/core/CL/kernels/CLDerivativeKernel.h"
+#include "src/core/CL/kernels/CLDilateKernel.h"
+#include "src/core/CL/kernels/CLDirectConvolutionLayerKernel.h"
+#include "src/core/CL/kernels/CLElementWiseUnaryLayerKernel.h"
+#include "src/core/CL/kernels/CLElementwiseOperationKernel.h"
+#include "src/core/CL/kernels/CLErodeKernel.h"
+#include "src/core/CL/kernels/CLFFTDigitReverseKernel.h"
+#include "src/core/CL/kernels/CLFFTRadixStageKernel.h"
+#include "src/core/CL/kernels/CLFFTScaleKernel.h"
+#include "src/core/CL/kernels/CLFastCornersKernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLFlattenLayerKernel.h"
+#include "src/core/CL/kernels/CLFloorKernel.h"
+#include "src/core/CL/kernels/CLFuseBatchNormalizationKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpReductionKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
+#include "src/core/CL/kernels/CLGatherKernel.h"
+#include "src/core/CL/kernels/CLGaussian3x3Kernel.h"
+#include "src/core/CL/kernels/CLGaussian5x5Kernel.h"
+#include "src/core/CL/kernels/CLGaussianPyramidKernel.h"
+#include "src/core/CL/kernels/CLGenerateProposalsLayerKernel.h"
+#include "src/core/CL/kernels/CLHOGDescriptorKernel.h"
+#include "src/core/CL/kernels/CLHOGDetectorKernel.h"
+#include "src/core/CL/kernels/CLHarrisCornersKernel.h"
+#include "src/core/CL/kernels/CLHeightConcatenateLayerKernel.h"
+#include "src/core/CL/kernels/CLHistogramKernel.h"
+#include "src/core/CL/kernels/CLIm2ColKernel.h"
+#include "src/core/CL/kernels/CLInstanceNormalizationLayerKernel.h"
+#include "src/core/CL/kernels/CLIntegralImageKernel.h"
+#include "src/core/CL/kernels/CLL2NormalizeLayerKernel.h"
+#include "src/core/CL/kernels/CLLKTrackerKernel.h"
+#include "src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h"
+#include "src/core/CL/kernels/CLMagnitudePhaseKernel.h"
+#include "src/core/CL/kernels/CLMaxUnpoolingLayerKernel.h"
+#include "src/core/CL/kernels/CLMeanStdDevKernel.h"
+#include "src/core/CL/kernels/CLMeanStdDevNormalizationKernel.h"
+#include "src/core/CL/kernels/CLMedian3x3Kernel.h"
+#include "src/core/CL/kernels/CLMemsetKernel.h"
+#include "src/core/CL/kernels/CLMinMaxLayerKernel.h"
+#include "src/core/CL/kernels/CLMinMaxLocationKernel.h"
+#include "src/core/CL/kernels/CLNonLinearFilterKernel.h"
+#include "src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h"
+#include "src/core/CL/kernels/CLNormalizationLayerKernel.h"
+#include "src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h"
+#include "src/core/CL/kernels/CLPadLayerKernel.h"
+#include "src/core/CL/kernels/CLPermuteKernel.h"
+#include "src/core/CL/kernels/CLPixelWiseMultiplicationKernel.h"
+#include "src/core/CL/kernels/CLPoolingLayerKernel.h"
+#include "src/core/CL/kernels/CLPriorBoxLayerKernel.h"
+#include "src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h"
+#include "src/core/CL/kernels/CLQuantizationLayerKernel.h"
+#include "src/core/CL/kernels/CLROIAlignLayerKernel.h"
+#include "src/core/CL/kernels/CLROIPoolingLayerKernel.h"
+#include "src/core/CL/kernels/CLRangeKernel.h"
+#include "src/core/CL/kernels/CLReductionOperationKernel.h"
+#include "src/core/CL/kernels/CLRemapKernel.h"
+#include "src/core/CL/kernels/CLReorgLayerKernel.h"
+#include "src/core/CL/kernels/CLReshapeLayerKernel.h"
+#include "src/core/CL/kernels/CLReverseKernel.h"
+#include "src/core/CL/kernels/CLScaleKernel.h"
+#include "src/core/CL/kernels/CLScharr3x3Kernel.h"
+#include "src/core/CL/kernels/CLSelectKernel.h"
+#include "src/core/CL/kernels/CLSobel3x3Kernel.h"
+#include "src/core/CL/kernels/CLSobel5x5Kernel.h"
+#include "src/core/CL/kernels/CLSobel7x7Kernel.h"
+#include "src/core/CL/kernels/CLSoftmaxLayerKernel.h"
+#include "src/core/CL/kernels/CLSpaceToBatchLayerKernel.h"
+#include "src/core/CL/kernels/CLSpaceToDepthLayerKernel.h"
+#include "src/core/CL/kernels/CLStackLayerKernel.h"
+#include "src/core/CL/kernels/CLStridedSliceKernel.h"
+#include "src/core/CL/kernels/CLTableLookupKernel.h"
+#include "src/core/CL/kernels/CLThresholdKernel.h"
+#include "src/core/CL/kernels/CLTileKernel.h"
+#include "src/core/CL/kernels/CLTransposeKernel.h"
+#include "src/core/CL/kernels/CLUpsampleLayerKernel.h"
+#include "src/core/CL/kernels/CLWarpAffineKernel.h"
+#include "src/core/CL/kernels/CLWarpPerspectiveKernel.h"
+#include "src/core/CL/kernels/CLWeightsReshapeKernel.h"
+#include "src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h"
+#include "src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h"
+#include "src/core/CL/kernels/CLWidthConcatenateLayerKernel.h"
+#include "src/core/CL/kernels/CLWinogradFilterTransformKernel.h"
+#include "src/core/CL/kernels/CLWinogradInputTransformKernel.h"
+#include "src/core/CL/kernels/CLWinogradOutputTransformKernel.h"
+#include "src/core/CL/kernels/CLYOLOLayerKernel.h"
+#include "src/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h"
+
+#endif /* ARM_COMPUTE_CLKERNELS_H */
*/
#include "arm_compute/core/TracePoint.h"
+#include "arm_compute/core/CL/CLTypes.h"
#include "arm_compute/core/CL/ICLArray.h"
#include "arm_compute/core/CL/ICLDistribution1D.h"
#include "arm_compute/core/CL/ICLHOG.h"
#include "arm_compute/core/CL/ICLMultiHOG.h"
#include "arm_compute/core/CL/ICLMultiImage.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLLKTrackerKernel.h"
#include "utils/TypePrinter.h"
#include <vector>
#ifndef ARM_COMPUTE_CL_VALIDATE_H
#define ARM_COMPUTE_CL_VALIDATE_H
+#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/Validate.h"
namespace arm_compute
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLKernel.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Helpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_ICLKERNEL_H
+#define ARM_COMPUTE_ICLKERNEL_H
+
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/CL/CLTypes.h"
+#include "arm_compute/core/CL/OpenCL.h"
+#include "arm_compute/core/GPUTarget.h"
+#include "arm_compute/core/IKernel.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/experimental/Types.h"
+
+#include <string>
+
+namespace arm_compute
+{
+template <typename T>
+class ICLArray;
+class ICLTensor;
+class Window;
+
+/** Common interface for all the OpenCL kernels */
+class ICLKernel : public IKernel
+{
+private:
+ /** Returns the number of arguments enqueued per array object.
+ *
+ * @return The number of arguments enqueued per array object.
+ */
+ template <unsigned int dimension_size>
+ constexpr static unsigned int num_arguments_per_array()
+ {
+ return num_arguments_per_tensor<dimension_size>();
+ }
+ /** Returns the number of arguments enqueued per tensor object.
+ *
+ * @return The number of arguments enqueued per tensor object.
+ */
+ template <unsigned int dimension_size>
+ constexpr static unsigned int num_arguments_per_tensor()
+ {
+ return 2 + 2 * dimension_size;
+ }
+ using IKernel::configure; //Prevent children from calling IKernel::configure() directly
+protected:
+ /** Configure the kernel's window and local workgroup size hint.
+ *
+ * @param[in] window The maximum window which will be returned by window()
+ * @param[in] lws_hint (Optional) Local-Workgroup-Size to use.
+ */
+ void configure_internal(const Window &window, cl::NDRange lws_hint = CLKernelLibrary::get().default_ndrange())
+ {
+ _lws_hint = lws_hint;
+ IKernel::configure(window);
+ }
+
+public:
+ /** Constructor */
+ ICLKernel()
+ : _kernel(nullptr), _target(GPUTarget::MIDGARD), _config_id(arm_compute::default_config_id), _max_workgroup_size(0), _lws_hint()
+ {
+ }
+ /** Returns a reference to the OpenCL kernel of this object.
+ *
+ * @return A reference to the OpenCL kernel of this object.
+ */
+ cl::Kernel &kernel()
+ {
+ return _kernel;
+ }
+ /** Add the passed 1D array's parameters to the object's kernel's arguments starting from the index idx.
+ *
+ * @param[in,out] idx Index at which to start adding the array's arguments. Will be incremented by the number of kernel arguments set.
+ * @param[in] array Array to set as an argument of the object's kernel.
+ * @param[in] strides @ref Strides object containing stride of each dimension in bytes.
+ * @param[in] num_dimensions Number of dimensions of the @p array.
+ * @param[in] window Window the kernel will be executed on.
+ */
+ template <typename T>
+ void add_1D_array_argument(unsigned int &idx, const ICLArray<T> *array, const Strides &strides, unsigned int num_dimensions, const Window &window)
+ {
+ add_array_argument<T, 1>(idx, array, strides, num_dimensions, window);
+ }
+ /** Add the passed 1D tensor's parameters to the object's kernel's arguments starting from the index idx.
+ *
+ * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set.
+ * @param[in] tensor Tensor to set as an argument of the object's kernel.
+ * @param[in] window Window the kernel will be executed on.
+ */
+ void add_1D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
+ {
+ add_tensor_argument<1>(idx, tensor, window);
+ }
+ /** Add the passed 1D tensor's parameters to the object's kernel's arguments starting from the index idx if the condition is true.
+ *
+ * @param[in] cond Condition to check
+ * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set.
+ * @param[in] tensor Tensor to set as an argument of the object's kernel.
+ * @param[in] window Window the kernel will be executed on.
+ */
+ void add_1D_tensor_argument_if(bool cond, unsigned int &idx, const ICLTensor *tensor, const Window &window)
+ {
+ if(cond)
+ {
+ add_1D_tensor_argument(idx, tensor, window);
+ }
+ }
+ /** Add the passed 2D tensor's parameters to the object's kernel's arguments starting from the index idx.
+ *
+ * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set.
+ * @param[in] tensor Tensor to set as an argument of the object's kernel.
+ * @param[in] window Window the kernel will be executed on.
+ */
+ void add_2D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
+ {
+ add_tensor_argument<2>(idx, tensor, window);
+ }
+ /** Add the passed 2D tensor's parameters to the object's kernel's arguments starting from the index idx if the condition is true.
+ *
+ * @param[in] cond Condition to check
+ * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set.
+ * @param[in] tensor Tensor to set as an argument of the object's kernel.
+ * @param[in] window Window the kernel will be executed on.
+ */
+ void add_2D_tensor_argument_if(bool cond, unsigned int &idx, const ICLTensor *tensor, const Window &window)
+ {
+ if(cond)
+ {
+ add_2D_tensor_argument(idx, tensor, window);
+ }
+ }
+ /** Add the passed 3D tensor's parameters to the object's kernel's arguments starting from the index idx.
+ *
+ * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set.
+ * @param[in] tensor Tensor to set as an argument of the object's kernel.
+ * @param[in] window Window the kernel will be executed on.
+ */
+ void add_3D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
+ {
+ add_tensor_argument<3>(idx, tensor, window);
+ }
+ /** Add the passed 4D tensor's parameters to the object's kernel's arguments starting from the index idx.
+ *
+ * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set.
+ * @param[in] tensor Tensor to set as an argument of the object's kernel.
+ * @param[in] window Window the kernel will be executed on.
+ */
+ void add_4D_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window)
+ {
+ add_tensor_argument<4>(idx, tensor, window);
+ }
+ /** Returns the number of arguments enqueued per 1D array object.
+ *
+ * @return The number of arguments enqueues per 1D array object.
+ */
+ constexpr static unsigned int num_arguments_per_1D_array()
+ {
+ return num_arguments_per_array<1>();
+ }
+ /** Returns the number of arguments enqueued per 1D tensor object.
+ *
+ * @return The number of arguments enqueues per 1D tensor object.
+ */
+ constexpr static unsigned int num_arguments_per_1D_tensor()
+ {
+ return num_arguments_per_tensor<1>();
+ }
+ /** Returns the number of arguments enqueued per 2D tensor object.
+ *
+ * @return The number of arguments enqueues per 2D tensor object.
+ */
+ constexpr static unsigned int num_arguments_per_2D_tensor()
+ {
+ return num_arguments_per_tensor<2>();
+ }
+ /** Returns the number of arguments enqueued per 3D tensor object.
+ *
+ * @return The number of arguments enqueues per 3D tensor object.
+ */
+ constexpr static unsigned int num_arguments_per_3D_tensor()
+ {
+ return num_arguments_per_tensor<3>();
+ }
+ /** Returns the number of arguments enqueued per 4D tensor object.
+ *
+ * @return The number of arguments enqueues per 4D tensor object.
+ */
+ constexpr static unsigned int num_arguments_per_4D_tensor()
+ {
+ return num_arguments_per_tensor<4>();
+ }
+ /** Enqueue the OpenCL kernel to process the given window on the passed OpenCL command queue.
+ *
+ * @note The queue is *not* flushed by this method, and therefore the kernel will not have been executed by the time this method returns.
+ *
+ * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
+ * @param[in,out] queue Command queue on which to enqueue the kernel.
+ */
+ virtual void run(const Window &window, cl::CommandQueue &queue)
+ {
+ ARM_COMPUTE_UNUSED(window, queue);
+ }
+ /** Enqueue the OpenCL kernel to process the given window on the passed OpenCL command queue.
+ *
+ * @note The queue is *not* flushed by this method, and therefore the kernel will not have been executed by the time this method returns.
+ *
+ * @param[in] tensors A vector containing the tensors to operato on.
+ * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
+ * @param[in,out] queue Command queue on which to enqueue the kernel.
+ */
+ virtual void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue)
+ {
+ ARM_COMPUTE_UNUSED(tensors, window, queue);
+ }
+ /** Add the passed parameters to the object's kernel's arguments starting from the index idx.
+ *
+ * @param[in,out] idx Index at which to start adding the arguments. Will be incremented by the number of kernel arguments set.
+ * @param[in] value Value to set as an argument of the object's kernel.
+ */
+ template <typename T>
+ void add_argument(unsigned int &idx, T value)
+ {
+ _kernel.setArg(idx++, value);
+ }
+
+ /** Set the Local-Workgroup-Size hint
+ *
+ * @note This method should be called after the configuration of the kernel
+ *
+ * @param[in] lws_hint Local-Workgroup-Size to use
+ */
+ void set_lws_hint(const cl::NDRange &lws_hint)
+ {
+ ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); // lws_hint will be overwritten by configure()
+ _lws_hint = lws_hint;
+ }
+
+ /** Return the Local-Workgroup-Size hint
+ *
+ * @return Current lws hint
+ */
+ cl::NDRange lws_hint() const
+ {
+ return _lws_hint;
+ }
+
+ /** Get the configuration ID
+ *
+ * @note The configuration ID can be used by the caller to distinguish different calls of the same OpenCL kernel
+ * In particular, this method can be used by CLScheduler to keep track of the best LWS for each configuration of the same kernel.
+ * The configuration ID should be provided only for the kernels potentially affected by the LWS geometry
+ *
+ * @note This method should be called after the configuration of the kernel
+ *
+ * @return configuration id string
+ */
+ const std::string &config_id() const
+ {
+ return _config_id;
+ }
+
+ /** Set the targeted GPU architecture
+ *
+ * @param[in] target The targeted GPU architecture
+ */
+ void set_target(GPUTarget target)
+ {
+ _target = target;
+ }
+
+ /** Set the targeted GPU architecture according to the CL device
+ *
+ * @param[in] device A CL device
+ */
+ void set_target(cl::Device &device);
+
+ /** Get the targeted GPU architecture
+ *
+ * @return The targeted GPU architecture.
+ */
+ GPUTarget get_target() const
+ {
+ return _target;
+ }
+
+ /** Get the maximum workgroup size for the device the CLKernelLibrary uses.
+ *
+ * @return The maximum workgroup size value.
+ */
+ size_t get_max_workgroup_size();
+ /** Get the global work size given an execution window
+ *
+ * @param[in] window Execution window
+ *
+ * @return Global work size of the given execution window
+ */
+ static cl::NDRange gws_from_window(const Window &window);
+
+private:
+ /** Add the passed array's parameters to the object's kernel's arguments starting from the index idx.
+ *
+ * @param[in,out] idx Index at which to start adding the array's arguments. Will be incremented by the number of kernel arguments set.
+ * @param[in] array Array to set as an argument of the object's kernel.
+ * @param[in] strides @ref Strides object containing stride of each dimension in bytes.
+ * @param[in] num_dimensions Number of dimensions of the @p array.
+ * @param[in] window Window the kernel will be executed on.
+ */
+ template <typename T, unsigned int dimension_size>
+ void add_array_argument(unsigned int &idx, const ICLArray<T> *array, const Strides &strides, unsigned int num_dimensions, const Window &window);
+ /** Add the passed tensor's parameters to the object's kernel's arguments starting from the index idx.
+ *
+ * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set.
+ * @param[in] tensor Tensor to set as an argument of the object's kernel.
+ * @param[in] window Window the kernel will be executed on.
+ */
+ template <unsigned int dimension_size>
+ void add_tensor_argument(unsigned int &idx, const ICLTensor *tensor, const Window &window);
+
+protected:
+ cl::Kernel _kernel; /**< OpenCL kernel to run */
+ GPUTarget _target; /**< The targeted GPU */
+ std::string _config_id; /**< Configuration ID */
+ size_t _max_workgroup_size; /**< The maximum workgroup size for this kernel */
+private:
+ cl::NDRange _lws_hint; /**< Local workgroup size hint for the OpenCL kernel */
+};
+
+/** Add the kernel to the command queue with the given window.
+ *
+ * @note Depending on the size of the window, this might translate into several jobs being enqueued.
+ *
+ * @note If kernel->kernel() is empty then the function will return without adding anything to the queue.
+ *
+ * @param[in,out] queue OpenCL command queue.
+ * @param[in] kernel Kernel to enqueue
+ * @param[in] window Window the kernel has to process.
+ * @param[in] lws_hint (Optional) Local workgroup size requested. Default is based on the device target.
+ * @param[in] use_dummy_work_items (Optional) Use dummy work items in order to have two dimensional power of two NDRange. Default is false
+ * Note: it is kernel responsibility to check if the work-item is out-of-range
+ *
+ * @note If any dimension of the lws is greater than the global workgroup size then no lws will be passed.
+ */
+void enqueue(cl::CommandQueue &queue, ICLKernel &kernel, const Window &window, const cl::NDRange &lws_hint = CLKernelLibrary::get().default_ndrange(), bool use_dummy_work_items = false);
+
+/** Add the passed array's parameters to the object's kernel's arguments starting from the index idx.
+ *
+ * @param[in,out] idx Index at which to start adding the array's arguments. Will be incremented by the number of kernel arguments set.
+ * @param[in] array Array to set as an argument of the object's kernel.
+ * @param[in] strides @ref Strides object containing stride of each dimension in bytes.
+ * @param[in] num_dimensions Number of dimensions of the @p array.
+ * @param[in] window Window the kernel will be executed on.
+ */
+template <typename T, unsigned int dimension_size>
+void ICLKernel::add_array_argument(unsigned &idx, const ICLArray<T> *array, const Strides &strides, unsigned int num_dimensions, const Window &window)
+{
+ ARM_COMPUTE_ERROR_ON(array == nullptr);
+
+ // Calculate offset to the start of the window
+ unsigned int offset_first_element = 0;
+
+ for(unsigned int n = 0; n < num_dimensions; ++n)
+ {
+ offset_first_element += window[n].start() * strides[n];
+ }
+
+ unsigned int idx_start = idx;
+ _kernel.setArg(idx++, array->cl_buffer());
+
+ for(unsigned int dimension = 0; dimension < dimension_size; dimension++)
+ {
+ _kernel.setArg<cl_uint>(idx++, strides[dimension]);
+ _kernel.setArg<cl_uint>(idx++, strides[dimension] * window[dimension].step());
+ }
+
+ _kernel.setArg<cl_uint>(idx++, offset_first_element);
+
+ ARM_COMPUTE_ERROR_ON_MSG_VAR(idx_start + num_arguments_per_array<dimension_size>() != idx,
+ "add_%dD_array_argument() is supposed to add exactly %d arguments to the kernel", dimension_size, num_arguments_per_array<dimension_size>());
+ ARM_COMPUTE_UNUSED(idx_start);
+}
+}
+#endif /*ARM_COMPUTE_ICLKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/ICLSimple2DKernel.h"
+#include "src/core/CL/ICLSimple2DKernel.h"
#include "src/core/helpers/WindowHelpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_ICLSIMPLE2DKERNEL_H
+#define ARM_COMPUTE_ICLSIMPLE2DKERNEL_H
+
+#include "src/core/CL/ICLSimpleKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for simple OpenCL kernels having 1 tensor input and 1 tensor output. This interface can be used when the work-item processes a 2D tile */
+class ICLSimple2DKernel : public ICLSimpleKernel
+{
+public:
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+};
+}
+#endif /*ARM_COMPUTE_ICLSIMPLE2DKERNEL_H */
/*
- * Copyright (c) 2017-2018 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/ICLSimple3DKernel.h"
+#include "src/core/CL/ICLSimple3DKernel.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Helpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_ICLSIMPLE3DKERNEL_H
+#define ARM_COMPUTE_ICLSIMPLE3DKERNEL_H
+
+#include "src/core/CL/ICLSimple2DKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for simple OpenCL kernels having 1 tensor input and 1 tensor output.
+ * Both input tensor and output tensor must have at least 3 dimensions.
+ */
+class ICLSimple3DKernel : public ICLSimple2DKernel
+{
+public:
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+};
+}
+#endif /*ARM_COMPUTE_ICLSIMPLE3DKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/ICLSimpleKernel.h"
-
+#include "src/core/CL/ICLSimpleKernel.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/IAccessWindow.h"
#include "arm_compute/core/Validate.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_ICLSIMPLEKERNEL_H
+#define ARM_COMPUTE_ICLSIMPLEKERNEL_H
+
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/Helpers.h"
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+/** Interface for simple OpenCL kernels having 1 tensor input and 1 tensor output */
+class ICLSimpleKernel : public ICLKernel
+{
+public:
+ /** Constructor. */
+ ICLSimpleKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ ICLSimpleKernel(const ICLSimpleKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ ICLSimpleKernel &operator=(const ICLSimpleKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ ICLSimpleKernel(ICLSimpleKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ ICLSimpleKernel &operator=(ICLSimpleKernel &&) = default;
+ /** Default destructor */
+ ~ICLSimpleKernel() = default;
+
+ /** Configure the kernel
+ *
+ * @param[in] input Source tensor.
+ * @param[out] output Destination tensor.
+ * @param[in] num_elems_processed_per_iteration Number of processed elements per iteration.
+ * @param[in] border_undefined (Optional) True if the border mode is undefined. False if it's replicate or constant.
+ * @param[in] border_size (Optional) Size of the border.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, unsigned int num_elems_processed_per_iteration, bool border_undefined = false, const BorderSize &border_size = BorderSize());
+
+protected:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+};
+}
+
+#endif /*ARM_COMPUTE_ICLSIMPLEKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h"
-
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/Validate.h"
+
+#include "src/core/CL/kernels/CLAbsoluteDifferenceKernel.h"
#include "src/core/helpers/WindowHelpers.h"
#include <set>
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLABSOLUTEDIFFERENCEKERNEL_H
+#define ARM_COMPUTE_CLABSOLUTEDIFFERENCEKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the absolute difference kernel.
+ *
+ * Absolute difference is computed by:
+ * @f[ output(x,y) = | input1(x,y) - input2(x,y) | @f]
+ */
+class CLAbsoluteDifferenceKernel : public ICLKernel
+{
+public:
+ /** Default constructor. */
+ CLAbsoluteDifferenceKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLAbsoluteDifferenceKernel(const CLAbsoluteDifferenceKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLAbsoluteDifferenceKernel &operator=(const CLAbsoluteDifferenceKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLAbsoluteDifferenceKernel(CLAbsoluteDifferenceKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLAbsoluteDifferenceKernel &operator=(CLAbsoluteDifferenceKernel &&) = default;
+ /** Default destructor */
+ ~CLAbsoluteDifferenceKernel() = default;
+
+ /** Set the inputs and output images.
+ *
+ * @param[in] input1 Source tensor. Data types supported: U8/S16.
+ * @param[in] input2 Source tensor. Data types supported: U8/S16.
+ * @param[out] output Destination tensor. Data types supported: U8/S16.
+ */
+ void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
+ /** Set the inputs and output images.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input1 Source tensor. Data types supported: U8/S16.
+ * @param[in] input2 Source tensor. Data types supported: U8/S16.
+ * @param[out] output Destination tensor. Data types supported: U8/S16.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input1; /**< Source tensor 1. */
+ const ICLTensor *_input2; /**< Source tensor 2. */
+ ICLTensor *_output; /**< Destination tensor. */
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CLABSOLUTEDIFFERENCEKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLAccumulateKernel.h"
+#include "src/core/CL/kernels/CLAccumulateKernel.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLACCUMULATEKERNEL_H
+#define ARM_COMPUTE_CLACCUMULATEKERNEL_H
+
+#include "src/core/CL/ICLSimple2DKernel.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the accumulate kernel.
+ *
+ * Accumulation is computed by:
+ * @f[ accum(x,y) = accum(x,y) + input(x,y) @f]
+ */
+class CLAccumulateKernel : public ICLSimple2DKernel
+{
+public:
+ /** Set the input and accumulation tensors.
+ *
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[out] accum Destination tensor. Data types supported: S16.
+ */
+ void configure(const ICLTensor *input, ICLTensor *accum);
+ /** Set the input and accumulation tensors.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[out] accum Destination tensor. Data types supported: S16.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *accum);
+};
+
+/** Interface for the accumulate weighted kernel.
+ *
+ * Weighted accumulation is computed:
+ * @f[ accum(x,y) = (1 - \alpha)*accum(x,y) + \alpha*input(x,y) @f]
+ *
+ * Where @f$ 0 \le \alpha \le 1 @f$
+ * Conceptually, the rounding for this is defined as:
+ * @f[ output(x,y)= uint8( (1 - \alpha) * float32( int32( output(x,y) ) ) + \alpha * float32( int32( input(x,y) ) ) ) @f]
+*/
+class CLAccumulateWeightedKernel : public ICLSimple2DKernel
+{
+public:
+ /** Set the input and accumulation images, and the scale value.
+ *
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[in] alpha Scalar value in the range [0, 1.0]. Data types supported: F32.
+ * @param[in,out] accum Accumulated tensor. Data types supported: U8.
+ */
+ void configure(const ICLTensor *input, float alpha, ICLTensor *accum);
+ /** Set the input and accumulation images, and the scale value.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[in] alpha Scalar value in the range [0, 1.0]. Data types supported: F32.
+ * @param[in,out] accum Accumulated tensor. Data types supported: U8.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, float alpha, ICLTensor *accum);
+};
+
+/** Interface for the accumulate squared kernel.
+ *
+ * The accumulation of squares is computed:
+ * @f[ accum(x,y) = saturate_{int16} ( (uint16) accum(x,y) + (((uint16)(input(x,y)^2)) >> (shift)) ) @f]
+ *
+ * Where @f$ 0 \le shift \le 15 @f$
+*/
+class CLAccumulateSquaredKernel : public ICLSimple2DKernel
+{
+public:
+ /** Set the input and accumulation tensors and the shift value.
+ *
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[in] shift Shift value in the range of [0, 15]. Data types supported: U32.
+ * @param[in,out] accum Accumulated tensor. Data types supported: S16.
+ */
+ void configure(const ICLTensor *input, uint32_t shift, ICLTensor *accum);
+ /** Set the input and accumulation tensors and the shift value.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[in] shift Shift value in the range of [0, 15]. Data types supported: U32.
+ * @param[in,out] accum Accumulated tensor. Data types supported: S16.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, uint32_t shift, ICLTensor *accum);
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLACCUMULATEKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLActivationLayerKernel.h"
+#include "src/core/CL/kernels/CLActivationLayerKernel.h"
#include "arm_compute/core/CL/CLCoreRuntimeContext.h"
#include "arm_compute/core/CL/CLHelpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLACTIVATIONLAYERKERNEL_H
+#define ARM_COMPUTE_CLACTIVATIONLAYERKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+/** Interface for the activation layer kernel. */
+class CLActivationLayerKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLActivationLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLActivationLayerKernel(const CLActivationLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLActivationLayerKernel &operator=(const CLActivationLayerKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLActivationLayerKernel(CLActivationLayerKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLActivationLayerKernel &operator=(CLActivationLayerKernel &&) = default;
+ /** Default destructor */
+ ~CLActivationLayerKernel() = default;
+ /** Set the input and output tensor.
+ *
+ * @note If the output tensor is a nullptr, the activation function will be performed in-place
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result
+ * of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32.
+ * @param[out] output Destination tensor. Data type supported: same as @p input
+ * @param[in] act_info Activation layer information.
+ */
+ void configure(const CLCompileContext &compile_context, ITensorInfo *input, ITensorInfo *output, ActivationLayerInfo act_info);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLActivationLayerKernel
+ *
+ * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result
+ * of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32.
+ * @param[in] output Destination tensor info. Data type supported: same as @p input
+ * @param[in] act_info Activation layer information.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info);
+
+ // Inherited methods overridden:
+ void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ bool _run_in_place;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLACTIVATIONLAYERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLArgMinMaxLayerKernel.h"
+#include "src/core/CL/kernels/CLArgMinMaxLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2019-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLARGMINMAXLAYERKERNEL_H
+#define ARM_COMPUTE_CLARGMINMAXLAYERKERNEL_H
+
+#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the reduction operation kernel
+ *
+ * @note The default data type for an uninitialized output tensor is
+ * signed 32-bit integer (S32). It is the user's responsibility to check
+ * that the results do not overflow because the indices are computed
+ * in unsigned 32-bit (U32).
+ */
+class CLArgMinMaxLayerKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLArgMinMaxLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLArgMinMaxLayerKernel(const CLArgMinMaxLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLArgMinMaxLayerKernel &operator=(const CLArgMinMaxLayerKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLArgMinMaxLayerKernel(CLArgMinMaxLayerKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLArgMinMaxLayerKernel &operator=(CLArgMinMaxLayerKernel &&) = default;
+ /** Default destructor */
+ ~CLArgMinMaxLayerKernel() = default;
+
+ /** Set the input and output tensors.
+ *
+ * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/S32/F16/F32.
+ * @param[in] prev_output Destination tensor of the previous iterations of @ref CLArgMinMaxLayerKernel. Data types supported: U32/S32
+ * Has to be nullptr for the first iteration
+ * @param[out] output Destination tensor. Data types supported: U32/S32
+ * Output will have the same number of dimensions as input.
+ * @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1,2,3
+ * @param[in] op Reduction operation to perform. Only ArgMin and ArgMax are supported.
+ */
+ void configure(const ICLTensor *input, const ICLTensor *prev_output, ICLTensor *output, unsigned int axis, ReductionOperation op);
+ /** Set the input and output tensors.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/S32/F16/F32.
+ * @param[in] prev_output Destination tensor of the previous iterations of @ref CLArgMinMaxLayerKernel. Data types supported: U32/S32
+ * Has to be nullptr for the first iteration
+ * @param[out] output Destination tensor. Data types supported: U32/S32
+ * Output will have the same number of dimensions as input.
+ * @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1,2,3
+ * @param[in] op Reduction operation to perform. Only ArgMin and ArgMax are supported.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *prev_output, ICLTensor *output, unsigned int axis, ReductionOperation op);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref CLArgMinMaxLayerKernel.
+ *
+ * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/S32/F16/F32.
+ * @param[in] prev_output Destination tensor info of the previous iterations. Data types supported: U32/S32
+ * Has to be nullptr for the first iteration
+ * @param[in] output Destination tensor info. Data types supported: U32/S32
+ * Output will have the same number of dimensions as input.
+ * @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1,2,3
+ * @param[in] op Reduction operation to perform. Only ArgMin and ArgMax are supported.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *prev_output, const ITensorInfo *output, unsigned int axis, ReductionOperation op);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input;
+ const ICLTensor *_prev_output;
+ ICLTensor *_output;
+ unsigned int _reduction_axis;
+ ReductionOperation _op;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CLARGMINMAXLAYERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLBatchConcatenateLayerKernel.h"
+#include "src/core/CL/kernels/CLBatchConcatenateLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2019-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ARM_COMPUTE_CLBATCHCONCATENATEKERNEL_H
+#define ARM_COMPUTE_CLBATCHCONCATENATEKERNEL_H
+
+#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the batch concatenate kernel.
+ * The input tensor will be concatenated into the output tensor.
+ */
+class CLBatchConcatenateLayerKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLBatchConcatenateLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLBatchConcatenateLayerKernel(const CLBatchConcatenateLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLBatchConcatenateLayerKernel &operator=(const CLBatchConcatenateLayerKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLBatchConcatenateLayerKernel(CLBatchConcatenateLayerKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLBatchConcatenateLayerKernel &operator=(CLBatchConcatenateLayerKernel &&) = default;
+ /** Default destructor */
+ ~CLBatchConcatenateLayerKernel() = default;
+ /** Initialise the kernel's inputs and output
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Input tensor. Data types supported: All.
+ * @param[in] batch_offset The offset on axis # 3.
+ * @param[in,out] output Output tensor. Data types supported: Same as @p input.
+ *
+ * @note: The output tensor's low two dimensions can't be smaller than the input one's.
+ * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2.
+ *
+ */
+ void configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int batch_offset, ITensorInfo *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLBatchConcatenateLayerKernel
+ *
+ * @param[in] input Input tensor info. Data types supported: All.
+ * @param[in] batch_offset The offset on axis # 3.
+ * @param[in] output Output tensor info. Data types supported: Same as @p input.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, unsigned int batch_offset, const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ unsigned int _batch_offset;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CLBATCHCONCATENATEKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h"
+#include "src/core/CL/kernels/CLBatchNormalizationLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLBATCHNORMALIZATIONLAYERKERNEL_H
+#define ARM_COMPUTE_CLBATCHNORMALIZATIONLAYERKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the BatchNormalization layer kernel.
+ */
+class CLBatchNormalizationLayerKernel : public ICLKernel
+{
+public:
+ /** Constructor */
+ CLBatchNormalizationLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLBatchNormalizationLayerKernel(const CLBatchNormalizationLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLBatchNormalizationLayerKernel &operator=(const CLBatchNormalizationLayerKernel &) = delete;
+ /** Default Move Constructor. */
+ CLBatchNormalizationLayerKernel(CLBatchNormalizationLayerKernel &&) = default;
+ /** Default move assignment operator */
+ CLBatchNormalizationLayerKernel &operator=(CLBatchNormalizationLayerKernel &&) = default;
+ /** Default destructor */
+ ~CLBatchNormalizationLayerKernel() = default;
+
+ /** Set the input and output tensors.
+ *
+ * @note If the output tensor is a nullptr, the batch normalization function will be performed in-place
+ *
+ * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result.
+ * 3 lower dimensions represent a single input with dimensions [width, height, FM].
+ * The rest are optional and used for representing batches. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC.
+ * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input
+ * @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+ * @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+ * @param[in] beta (Optional) Beta values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for beta is 0. Data types supported: Same as @p input
+ * @param[in] gamma (Optional) Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for gamma is 1. Data types supported: Same as @p input
+ * @param[in] epsilon (Optional) Small value to avoid division with zero. Default value is 0.001f.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
+ */
+ void configure(ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta = nullptr, const ICLTensor *gamma = nullptr, float epsilon = 0.001f,
+ ActivationLayerInfo act_info = ActivationLayerInfo());
+ /** Set the input and output tensors.
+ *
+ * @note If the output tensor is a nullptr, the batch normalization function will be performed in-place
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result.
+ * 3 lower dimensions represent a single input with dimensions [width, height, FM].
+ * The rest are optional and used for representing batches. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC.
+ * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input
+ * @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+ * @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+ * @param[in] beta (Optional) Beta values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for beta is 0. Data types supported: Same as @p input
+ * @param[in] gamma (Optional) Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for gamma is 1. Data types supported: Same as @p input
+ * @param[in] epsilon (Optional) Small value to avoid division with zero. Default value is 0.001f.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
+ */
+ void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta = nullptr,
+ const ICLTensor *gamma = nullptr, float epsilon = 0.001f, ActivationLayerInfo act_info = ActivationLayerInfo());
+ /** Static function to check if given info will lead to a valid configuration of @ref CLBatchNormalizationLayerKernel
+ *
+ * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result.
+ * 3 lower dimensions represent a single input with dimensions [width, height, FM].
+ * The rest are optional and used for representing batches. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC.
+ * @param[in] output Destination tensor info. Output will have the same number of dimensions as input. Data type supported: same as @p input
+ * @param[in] mean Mean values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+ * @param[in] var Variance values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+ * @param[in] beta (Optional) Beta values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for beta is 0. Data types supported: Same as @p input
+ * @param[in] gamma (Optional) Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for gamma is 1. Data types supported: Same as @p input
+ * @param[in] epsilon (Optional) Small value to avoid division with zero. Default value is 0.001f.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output,
+ const ITensorInfo *mean, const ITensorInfo *var,
+ const ITensorInfo *beta = nullptr, const ITensorInfo *gamma = nullptr,
+ float epsilon = 0.001f, ActivationLayerInfo act_info = ActivationLayerInfo());
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ ICLTensor *_input;
+ ICLTensor *_output;
+ const ICLTensor *_mean;
+ const ICLTensor *_var;
+ const ICLTensor *_beta;
+ const ICLTensor *_gamma;
+ float _epsilon;
+ bool _run_in_place;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLBATCHNORMALIZATIONLAYERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLBatchToSpaceLayerKernel.h"
+#include "src/core/CL/kernels/CLBatchToSpaceLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/ICLTensor.h"
--- /dev/null
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLBATCHTOSPACELAYERKERNEL_H
+#define ARM_COMPUTE_CLBATCHTOSPACELAYERKERNEL_H
+
+#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the batch to space kernel */
+class CLBatchToSpaceLayerKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLBatchToSpaceLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLBatchToSpaceLayerKernel(const CLBatchToSpaceLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLBatchToSpaceLayerKernel &operator=(const CLBatchToSpaceLayerKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLBatchToSpaceLayerKernel(CLBatchToSpaceLayerKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLBatchToSpaceLayerKernel &operator=(CLBatchToSpaceLayerKernel &&) = default;
+ /** Default destructor */
+ ~CLBatchToSpaceLayerKernel() = default;
+ /** Initialise the kernel's inputs and output.
+ *
+ * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
+ * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32
+ * @param[out] output Tensor output. Data types supported: same as @p input
+ */
+ void configure(const ICLTensor *input, const ICLTensor *block_shape, ICLTensor *output);
+ /** Initialise the kernel's inputs and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
+ * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32
+ * @param[out] output Tensor output. Data types supported: same as @p input
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *block_shape, ICLTensor *output);
+ /** Initialise the kernel's inputs and output (Static block shape).
+ *
+ * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
+ * @param[in] block_shape_x Block shape x value.
+ * @param[in] block_shape_y Block shape y value.
+ * @param[out] output Tensor output. Data types supported: same as @p input
+ */
+ void configure(const ICLTensor *input, const int32_t block_shape_x, const int32_t block_shape_y, ICLTensor *output);
+ /** Initialise the kernel's inputs and output (Static block shape).
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
+ * @param[in] block_shape_x Block shape x value.
+ * @param[in] block_shape_y Block shape y value.
+ * @param[out] output Tensor output. Data types supported: same as @p input
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, const int32_t block_shape_x, const int32_t block_shape_y, ICLTensor *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLBatchToSpaceLayerKernel
+ *
+ * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
+ * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32
+ * @param[in] output Tensor output. Data types supported: same as @p input
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLBatchToSpaceLayerKernel (Static block shape).
+ *
+ * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
+ * @param[in] block_shape_x Block shape x value.
+ * @param[in] block_shape_y Block shape y value.
+ * @param[in] output Tensor output. Data types supported: same as @p input
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const int32_t block_shape_x, const int32_t block_shape_y, const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input; /**< Source tensor */
+ const ICLTensor *_block_shape; /**< Block shape tensor */
+ ICLTensor *_output; /**< Destination tensor */
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CLBATCHTOSPACELAYERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLBitwiseAndKernel.h"
+#include "src/core/CL/kernels/CLBitwiseAndKernel.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLBITWISEANDKERNEL_H
+#define ARM_COMPUTE_CLBITWISEANDKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the bitwise AND operation kernel.
+ *
+ * Result is computed by:
+ * @f[ output(x,y) = input1(x,y) \land input2(x,y) @f]
+ */
+class CLBitwiseAndKernel : public ICLKernel
+{
+public:
+ /** Default constructor. */
+ CLBitwiseAndKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLBitwiseAndKernel(const CLBitwiseAndKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLBitwiseAndKernel &operator=(const CLBitwiseAndKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLBitwiseAndKernel(CLBitwiseAndKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLBitwiseAndKernel &operator=(CLBitwiseAndKernel &&) = default;
+ /** Set the inputs and output images
+ *
+ * @param[in] input1 Source tensor. Data types supported: U8.
+ * @param[in] input2 Source tensor. Data types supported: U8.
+ * @param[out] output Destination tensor. Data types supported: U8.
+ */
+ void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
+ /** Set the inputs and output images
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input1 Source tensor. Data types supported: U8.
+ * @param[in] input2 Source tensor. Data types supported: U8.
+ * @param[out] output Destination tensor. Data types supported: U8.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input1; /**< Source tensor 1 */
+ const ICLTensor *_input2; /**< Source tensor 2 */
+ ICLTensor *_output; /**< Destination tensor */
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CLBITWISEANDKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLBitwiseNotKernel.h"
+#include "src/core/CL/kernels/CLBitwiseNotKernel.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLBITWISENOTKERNEL_H
+#define ARM_COMPUTE_CLBITWISENOTKERNEL_H
+
+#include "src/core/CL/ICLSimple2DKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the bitwise NOT operation kernel.
+ *
+ * Result is computed by:
+ * @f[ output(x,y) = \lnot input(x,y) @f]
+ */
+class CLBitwiseNotKernel : public ICLSimple2DKernel
+{
+public:
+ /** Set the inputs and output images.
+ *
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[out] output Destination tensor. Data types supported: U8.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output);
+ /** Set the inputs and output images.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[out] output Destination tensor. Data types supported: U8.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CLBITWISENOTKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLBitwiseOrKernel.h"
+#include "src/core/CL/kernels/CLBitwiseOrKernel.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLBITWISEORKERNEL_H
+#define ARM_COMPUTE_CLBITWISEORKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the bitwise OR operation kernel.
+ *
+ * Result is computed by:
+ * @f[ output(x,y) = input1(x,y) \lor input2(x,y) @f]
+ */
+class CLBitwiseOrKernel : public ICLKernel
+{
+public:
+ /** Default constructor. */
+ CLBitwiseOrKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLBitwiseOrKernel(const CLBitwiseOrKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLBitwiseOrKernel &operator=(const CLBitwiseOrKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLBitwiseOrKernel(CLBitwiseOrKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLBitwiseOrKernel &operator=(CLBitwiseOrKernel &&) = default;
+ /** Set the inputs and output images
+ *
+ * @param[in] input1 Source tensor. Data types supported: U8.
+ * @param[in] input2 Source tensor. Data types supported: U8.
+ * @param[out] output Destination tensor. Data types supported: U8.
+ */
+ void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
+ /** Set the inputs and output images
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input1 Source tensor. Data types supported: U8.
+ * @param[in] input2 Source tensor. Data types supported: U8.
+ * @param[out] output Destination tensor. Data types supported: U8.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input1; /**< Source tensor 1 */
+ const ICLTensor *_input2; /**< Source tensor 2 */
+ ICLTensor *_output; /**< Destination tensor */
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CLBITWISEORKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLBitwiseXorKernel.h"
+#include "src/core/CL/kernels/CLBitwiseXorKernel.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLBITWISEXORKERNEL_H
+#define ARM_COMPUTE_CLBITWISEXORKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the bitwise XOR operation kernel.
+ *
+ * Result is computed by:
+ * @f[ output(x,y) = input1(x,y) \oplus input2(x,y) @f]
+ */
+class CLBitwiseXorKernel : public ICLKernel
+{
+public:
+ /** Default constructor. */
+ CLBitwiseXorKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLBitwiseXorKernel(const CLBitwiseXorKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLBitwiseXorKernel &operator=(const CLBitwiseXorKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLBitwiseXorKernel(CLBitwiseXorKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLBitwiseXorKernel &operator=(CLBitwiseXorKernel &&) = default;
+ /** Set the inputs and output images
+ *
+ * @param[in] input1 Source tensor. Data types supported: U8.
+ * @param[in] input2 Source tensor. Data types supported: U8.
+ * @param[out] output Destination tensor. Data types supported: U8.
+ */
+ void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
+ /** Set the inputs and output images
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input1 Source tensor. Data types supported: U8.
+ * @param[in] input2 Source tensor. Data types supported: U8.
+ * @param[out] output Destination tensor. Data types supported: U8.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input1; /**< Source tensor 1 */
+ const ICLTensor *_input2; /**< Source tensor 2 */
+ ICLTensor *_output; /**< Destination tensor */
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CLBITWISEXORKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLBoundingBoxTransformKernel.h"
+#include "src/core/CL/kernels/CLBoundingBoxTransformKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLBOUNDINGBOXTRANSFORMKERNEL_H
+#define ARM_COMPUTE_CLBOUNDINGBOXTRANSFORMKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the bounding box kernel */
+class CLBoundingBoxTransformKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLBoundingBoxTransformKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLBoundingBoxTransformKernel(const CLBoundingBoxTransformKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLBoundingBoxTransformKernel &operator=(const CLBoundingBoxTransformKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLBoundingBoxTransformKernel(CLBoundingBoxTransformKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLBoundingBoxTransformKernel &operator=(CLBoundingBoxTransformKernel &&) = default;
+ /** Default destructor */
+ ~CLBoundingBoxTransformKernel() = default;
+
+ /** Set the input and output tensors.
+ *
+ * @param[in] boxes Source tensor. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32.
+ * @param[out] pred_boxes Destination tensor. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input
+ * @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes.
+ * Data types supported: QASYMM8 if @p input is QASYMM16, otherwise same as @p input
+ * @param[in] info Contains BoundingBox operation information described in @ref BoundingBoxTransformInfo.
+ *
+ * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct.
+ *
+ */
+ void configure(const ICLTensor *boxes, ICLTensor *pred_boxes, const ICLTensor *deltas, const BoundingBoxTransformInfo &info);
+ /** Set the input and output tensors.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] boxes Source tensor. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32.
+ * @param[out] pred_boxes Destination tensor. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input
+ * @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes.
+ * Data types supported: QASYMM8 if @p input is QASYMM16, otherwise same as @p input
+ * @param[in] info Contains BoundingBox operation information described in @ref BoundingBoxTransformInfo.
+ *
+ * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct.
+ *
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *boxes, ICLTensor *pred_boxes, const ICLTensor *deltas, const BoundingBoxTransformInfo &info);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref CLBoundingBoxTransform
+ *
+ * @param[in] boxes Source tensor info. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32.
+ * @param[in] pred_boxes Destination tensor info. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input
+ * @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes.
+ * Data types supported: QASYMM8 if @p input is QASYMM16, otherwise same as @p input
+ * @param[in] info Contains BoundingBox operation information described in @ref BoundingBoxTransformInfo.
+ *
+ * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct.
+ *
+ * @return a Status
+ */
+ static Status validate(const ITensorInfo *boxes, const ITensorInfo *pred_boxes, const ITensorInfo *deltas, const BoundingBoxTransformInfo &info);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_boxes;
+ ICLTensor *_pred_boxes;
+ const ICLTensor *_deltas;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLBOUNDINGBOXTRANSFORMKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLBox3x3Kernel.h"
+#include "src/core/CL/kernels/CLBox3x3Kernel.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLBOX3X3KERNEL_H
+#define ARM_COMPUTE_CLBOX3X3KERNEL_H
+
+#include "src/core/CL/ICLSimple2DKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the box 3x3 filter kernel.
+ *
+ */
+class CLBox3x3Kernel : public ICLSimple2DKernel
+{
+public:
+ /**Initialise the kernel's input and output.
+ *
+ * @param[in] input An input tensor. Data types supported: U8
+ * @param[out] output The output tensor. Data types supported: U8.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined);
+ /**Initialise the kernel's input and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input An input tensor. Data types supported: U8
+ * @param[out] output The output tensor. Data types supported: U8.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined);
+
+ //Inherited methods overriden:
+ BorderSize border_size() const override;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLBOX3X3KERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLCannyEdgeKernel.h"
+#include "src/core/CL/kernels/CLCannyEdgeKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLCANNYEDGEKERNEL_H
+#define ARM_COMPUTE_CLCANNYEDGEKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** OpenCL kernel to perform Gradient computation.
+ */
+class CLGradientKernel : public ICLKernel
+{
+public:
+ /** Constructor */
+ CLGradientKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGradientKernel(const CLGradientKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGradientKernel &operator=(const CLGradientKernel &) = delete;
+ /** Initialise the kernel's sources, destinations and border mode.
+ *
+ * @note gx, gy and mag must all be the same size (either 16 or 32).
+ *
+ * @param[in] gx Source tensor - Gx component. Data types supported: S16/S32.
+ * @param[in] gy Source tensor - Gy component. Data types supported: Same as gx.
+ * @param[out] magnitude Destination tensor - Magnitude. Data types supported: U16/U32. Must match the pixel size of gx, gy.
+ * @param[out] phase Destination tensor - Quantized phase. Data types supported: U8.
+ * @param[in] norm_type Normalization type. if 1, L1-Norm otherwise L2-Norm.
+ */
+ void configure(const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase, int32_t norm_type);
+ /** Initialise the kernel's sources, destinations and border mode.
+ *
+ * @note gx, gy and mag must all be the same size (either 16 or 32).
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] gx Source tensor - Gx component. Data types supported: S16/S32.
+ * @param[in] gy Source tensor - Gy component. Data types supported: Same as gx.
+ * @param[out] magnitude Destination tensor - Magnitude. Data types supported: U16/U32. Must match the pixel size of gx, gy.
+ * @param[out] phase Destination tensor - Quantized phase. Data types supported: U8.
+ * @param[in] norm_type Normalization type. if 1, L1-Norm otherwise L2-Norm.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase, int32_t norm_type);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_gx; /**< Source tensor - Gx component */
+ const ICLTensor *_gy; /**< Source tensor - Gy component */
+ ICLTensor *_magnitude; /**< Destination tensor - Magnitude */
+ ICLTensor *_phase; /**< Destination tensor - Quantized phase */
+};
+
+/** OpenCL kernel to perform Non-Maxima suppression for Canny Edge.
+ *
+ * @note This kernel is meant to be used alongside CannyEdge and performs a non-maxima suppression using magnitude and phase of input
+ * to characterize points as possible edges. The output buffer needs to be cleared before this kernel is executed.
+ *
+ * @note Hysteresis is computed in @ref CLEdgeTraceKernel
+ */
+class CLEdgeNonMaxSuppressionKernel : public ICLKernel
+{
+public:
+ /** Constructor */
+ CLEdgeNonMaxSuppressionKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLEdgeNonMaxSuppressionKernel(const CLEdgeNonMaxSuppressionKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLEdgeNonMaxSuppressionKernel &operator=(const CLEdgeNonMaxSuppressionKernel &) = delete;
+ /** Initialise the kernel's sources, destination and border mode.
+ *
+ * @param[in] magnitude Source tensor - Magnitude. Data types supported: U16/U32.
+ * @param[in] phase Source tensor - Quantized phase. Data types supported: U8.
+ * @param[out] output Destination tensor. Data types supported: U16/U32.
+ * @param[in] lower_thr Lower threshold.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ICLTensor *magnitude, const ICLTensor *phase, ICLTensor *output, int32_t lower_thr, bool border_undefined);
+ /** Initialise the kernel's sources, destination and border mode.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] magnitude Source tensor - Magnitude. Data types supported: U16/U32.
+ * @param[in] phase Source tensor - Quantized phase. Data types supported: U8.
+ * @param[out] output Destination tensor. Data types supported: U16/U32.
+ * @param[in] lower_thr Lower threshold.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *magnitude, const ICLTensor *phase, ICLTensor *output, int32_t lower_thr, bool border_undefined);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+ BorderSize border_size() const override;
+
+private:
+ const ICLTensor *_magnitude; /**< Source tensor - Magnitude. */
+ const ICLTensor *_phase; /**< Source tensor - Quantized phase. */
+ ICLTensor *_output; /**< Destination tensor. */
+};
+
+/** OpenCL kernel to perform Edge tracing.
+ */
+class CLEdgeTraceKernel : public ICLKernel
+{
+public:
+ /** Constructor */
+ CLEdgeTraceKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLEdgeTraceKernel(const CLEdgeTraceKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLEdgeTraceKernel &operator=(const CLEdgeTraceKernel &) = delete;
+ /** Initialise the kernel's source, destination and border mode.
+ *
+ * @param[in] input Source tensor. Data types supported: U16/U32.
+ * @param[out] output Destination tensor. Data types supported: U8.
+ * @param[in] upper_thr Upper threshold used for the hysteresis
+ * @param[in] lower_thr Lower threshold used for the hysteresis
+ * @param[in,out] visited Tensor for keeping the visited pixels. Data types supported: U32.
+ * Expected to be initialized to 0 before each run.
+ * @param[in,out] recorded Tensor for keeping the recorded pixels. Data types supported: U32
+ * Expected to be initialized to 0 before each run.
+ * @param[in,out] l1_stack Tensor with the L1 stack for each pixel. Data types supported: S32.
+ * Expected to be initialized to 0 before each run.
+ * @param[in,out] l1_stack_counter Tensor for counting the elements in the L1 stack of each pixel. Data types supported: U8.
+ * Expected to be initialized to 0 before each run.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr,
+ ICLTensor *visited, ICLTensor *recorded, ICLTensor *l1_stack, ICLTensor *l1_stack_counter);
+ /** Initialise the kernel's source, destination and border mode.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: U16/U32.
+ * @param[out] output Destination tensor. Data types supported: U8.
+ * @param[in] upper_thr Upper threshold used for the hysteresis
+ * @param[in] lower_thr Lower threshold used for the hysteresis
+ * @param[in,out] visited Tensor for keeping the visited pixels. Data types supported: U32.
+ * Expected to be initialized to 0 before each run.
+ * @param[in,out] recorded Tensor for keeping the recorded pixels. Data types supported: U32
+ * Expected to be initialized to 0 before each run.
+ * @param[in,out] l1_stack Tensor with the L1 stack for each pixel. Data types supported: S32.
+ * Expected to be initialized to 0 before each run.
+ * @param[in,out] l1_stack_counter Tensor for counting the elements in the L1 stack of each pixel. Data types supported: U8.
+ * Expected to be initialized to 0 before each run.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr,
+ ICLTensor *visited, ICLTensor *recorded, ICLTensor *l1_stack, ICLTensor *l1_stack_counter);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input; /**< Source tensor. */
+ ICLTensor *_output; /**< Destination tensor. */
+ int32_t _lower_thr; /**< Lower threshold used for the hysteresis. */
+ int32_t _upper_thr; /**< Upper threshold used for the hysteresis. */
+ ICLTensor *_visited; /**< Marks visited elements */
+ ICLTensor *_recorded; /**< Marks recorded elements */
+ ICLTensor *_l1_stack; /**< L1 hysteris stack */
+ ICLTensor *_l1_stack_counter; /**< L1 hysteris stack counter */
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CLCANNYEDGEKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLChannelCombineKernel.h"
+#include "src/core/CL/kernels/CLChannelCombineKernel.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLMultiImage.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLCHANNELCOMBINEKERNEL_H
+#define ARM_COMPUTE_CLCHANNELCOMBINEKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+#include <array>
+#include <cstdint>
+
+namespace arm_compute
+{
+class ICLMultiImage;
+class ICLTensor;
+using ICLImage = ICLTensor;
+
+/** Interface for the channel combine kernel */
+class CLChannelCombineKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLChannelCombineKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLChannelCombineKernel(const CLChannelCombineKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLChannelCombineKernel &operator=(const CLChannelCombineKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLChannelCombineKernel(CLChannelCombineKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLChannelCombineKernel &operator=(CLChannelCombineKernel &&) = default;
+ /** Default destructor */
+ ~CLChannelCombineKernel() = default;
+ /** Configure function's inputs and outputs.
+ *
+ * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format.
+ * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format.
+ * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format.
+ * @param[in] plane3 The 2D plane that forms channel 3. Must be of U8 format.
+ * @param[out] output The single planar output tensor. Supported formats: RGB888/RGBA8888/YUYV422/UYVY422.
+ */
+ void configure(const ICLTensor *plane0, const ICLTensor *plane1, const ICLTensor *plane2, const ICLTensor *plane3, ICLTensor *output);
+ /** Configure function's inputs and outputs.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format.
+ * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format.
+ * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format.
+ * @param[in] plane3 The 2D plane that forms channel 3. Must be of U8 format.
+ * @param[out] output The single planar output tensor.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *plane0, const ICLTensor *plane1, const ICLTensor *plane2, const ICLTensor *plane3, ICLTensor *output);
+ /** Configure function's inputs and outputs.
+ *
+ * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format.
+ * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format.
+ * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format.
+ * @param[out] output The multi planar output tensor. Supported formats: RGB888/RGBA8888/YUYV422/UYVY422.
+ */
+ void configure(const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output);
+ /** Configure function's inputs and outputs.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format.
+ * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format.
+ * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format.
+ * @param[out] output The multi planar output tensor. Supported formats: RGB888/RGBA8888/YUYV422/UYVY422.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ std::array<const ICLTensor *, 4> _planes;
+ ICLTensor *_output;
+ ICLMultiImage *_output_multi;
+ std::array<uint32_t, 3> _x_subsampling;
+ std::array<uint32_t, 3> _y_subsampling;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CLCHANNELCOMBINEKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLChannelExtractKernel.h"
+#include "src/core/CL/kernels/CLChannelExtractKernel.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLMultiImage.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLCHANNELEXTRACTKERNEL_H
+#define ARM_COMPUTE_CLCHANNELEXTRACTKERNEL_H
+
+#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+class ICLMultiImage;
+class ICLTensor;
+using ICLImage = ICLTensor;
+
+/** Interface for the channel extract kernel */
+class CLChannelExtractKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLChannelExtractKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLChannelExtractKernel(const CLChannelExtractKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLChannelExtractKernel &operator=(const CLChannelExtractKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLChannelExtractKernel(CLChannelExtractKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLChannelExtractKernel &operator=(CLChannelExtractKernel &&) = default;
+ /** Default destructor */
+ ~CLChannelExtractKernel() = default;
+ /** Set the input and output of the kernel
+ *
+ * @param[in] input Source tensor. Formats supported: RGB888/RGBA8888/YUYV422/UYVY422
+ * @param[in] channel Channel to extract.
+ * @param[out] output Destination tensor. Must be of U8 format.
+ */
+ void configure(const ICLTensor *input, Channel channel, ICLTensor *output);
+ /** Set the input and output of the kernel
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Formats supported: RGB888/RGBA8888/YUYV422/UYVY422
+ * @param[in] channel Channel to extract.
+ * @param[out] output Destination tensor. Must be of U8 format.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, Channel channel, ICLTensor *output);
+ /** Set the input and output of the kernel
+ *
+ * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV/YUV444
+ * @param[in] channel Channel to extract.
+ * @param[out] output Single-planar 2D destination image. Must be of U8 format.
+ */
+ void configure(const ICLMultiImage *input, Channel channel, ICLImage *output);
+ /** Set the input and output of the kernel
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV/YUV444
+ * @param[in] channel Channel to extract.
+ * @param[out] output Single-planar 2D destination image. Must be of U8 format.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLMultiImage *input, Channel channel, ICLImage *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+ uint32_t _num_elems_processed_per_iteration;
+ uint32_t _subsampling;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CLCHANNELEXTRACTKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLChannelShuffleLayerKernel.h"
+#include "src/core/CL/kernels/CLChannelShuffleLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLCHANNELSHUFFLELAYERKERNEL_H
+#define ARM_COMPUTE_CLCHANNELSHUFFLELAYERKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the channel shuffle kernel */
+class CLChannelShuffleLayerKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLChannelShuffleLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLChannelShuffleLayerKernel(const CLChannelShuffleLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLChannelShuffleLayerKernel &operator=(const CLChannelShuffleLayerKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLChannelShuffleLayerKernel(CLChannelShuffleLayerKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLChannelShuffleLayerKernel &operator=(CLChannelShuffleLayerKernel &&) = default;
+ /** Default destructor */
+ ~CLChannelShuffleLayerKernel() = default;
+ /** Configure function's inputs and outputs.
+ *
+ * @param[in] input Input tensor. Data types supported: All.
+ * @param[out] output Output tensor. Data type supported: Same as @p input
+ * @param[in] num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, unsigned int num_groups);
+ /** Configure function's inputs and outputs.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Input tensor. Data types supported: All.
+ * @param[out] output Output tensor. Data type supported: Same as @p input
+ * @param[in] num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, unsigned int num_groups);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLChannelShuffleLayerKernel
+ *
+ * @param[in] input Input tensor info. Data types supported: All.
+ * @param[in] output Output tensor info. Data type supported: Same as @p input
+ * @param[in] num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int num_groups);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CLCHANNELSHUFFLELAYERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLCol2ImKernel.h"
+#include "src/core/CL/kernels/CLCol2ImKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLCOL2IMKERNEL_H
+#define ARM_COMPUTE_CLCOL2IMKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the col2im reshaping kernel.
+ *
+ * Rearranges each matrix column into image blocks. It's the inverse operation of @ref CLIm2ColKernel.
+ *
+ * For example, a vector of 9 elements can be reshaped to a block(image) of 3x3:
+ *
+ * @f[
+ * \left( \begin{array}{ccccccccc}
+ * a0 & a1 & a2 & a3 & a4 & a5 & a6 & a7 & a8 \\
+ * \end{array} \right)
+ * \rightarrow
+ * \left( \begin{array}{ccc}
+ * a0 & a1 & a2 \\
+ * a3 & a4 & a5 \\
+ * a6 & a7 & a8 \\
+ * \end{array} \right)
+ * @f]
+ */
+class CLCol2ImKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLCol2ImKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLCol2ImKernel(const CLCol2ImKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLCol2ImKernel &operator=(const CLCol2ImKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLCol2ImKernel(CLCol2ImKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLCol2ImKernel &operator=(CLCol2ImKernel &&) = default;
+ /** Default destructor */
+ ~CLCol2ImKernel() = default;
+ /** Set the input and output of the kernel.
+ *
+ * @param[in] input The input tensor to convert. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
+ * @param[out] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM],
+ * while the rest represent batch of outputs. Data types supported: Same as @p input. Data layout: NCHW
+ * @param[in] convolved_dims Output convolved dimensions.
+ * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, const Size2D &convolved_dims, unsigned int num_groups = 1);
+ /** Set the input and output of the kernel.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input The input tensor to convert. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
+ * @param[out] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM],
+ * while the rest represent batch of outputs. Data types supported: Same as @p input. Data layout: NCHW
+ * @param[in] convolved_dims Output convolved dimensions.
+ * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Size2D &convolved_dims, unsigned int num_groups = 1);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLCol2ImKernel
+ *
+ * @param[in] input The input tensor to convert. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
+ * @param[in] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM],
+ * while the rest represent batch of outputs. Data types supported: Same as @p input. Data layout: NCHW
+ * @param[in] convolved_dims Output convolved dimensions.
+ * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &convolved_dims, unsigned int num_groups = 1);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+public:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+ Size2D _convolved_dims;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLCOL2IMKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLColorConvertKernel.h"
+#include "src/core/CL/kernels/CLColorConvertKernel.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLMultiImage.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLCOLORCONVERTKERNEL_H
+#define ARM_COMPUTE_CLCOLORCONVERTKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLMultiImage;
+class ICLTensor;
+using ICLImage = ICLTensor;
+
+/** Interface for the color convert kernel.
+ *
+ */
+class CLColorConvertKernel : public ICLKernel
+{
+public:
+ /** Default constructor. */
+ CLColorConvertKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLColorConvertKernel(const CLColorConvertKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLColorConvertKernel &operator=(const CLColorConvertKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLColorConvertKernel(CLColorConvertKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLColorConvertKernel &operator=(CLColorConvertKernel &&) = default;
+ /** Default destructor. */
+ ~CLColorConvertKernel() = default;
+
+ /** Set the input and output of the kernel
+ *
+ * @param[in] input Source tensor. Formats supported: RGBA8888/UYVY422/YUYV422/RGB888
+ * @param[out] output Destination tensor. Formats supported: RGB888 (if the formats of @p input are RGBA8888/UYVY422/YUYV422),
+ * RGBA8888 (if the formats of @p input are UYVY422/YUYV422/RGB888/),
+ * U8 (if the formats of @p input is RGB888)
+ */
+ void configure(const ICLTensor *input, ICLTensor *output);
+ /** Set the input and output of the kernel
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Formats supported: RGBA8888/UYVY422/YUYV422/RGB888
+ * @param[out] output Destination tensor. Formats supported: RGB888 (if the formats of @p input are RGBA8888/UYVY422/YUYV422),
+ * RGBA8888 (if the formats of @p input are UYVY422/YUYV422/RGB888/),
+ * U8 (if the formats of @p input is RGB888)
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
+ /** Set the input and output of the kernel
+ *
+ * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV
+ * @param[out] output Single-planar destination image. Formats supported: RGB888/RGBA8888
+ */
+ void configure(const ICLMultiImage *input, ICLImage *output);
+ /** Set the input and output of the kernel
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV
+ * @param[out] output Single-planar destination image. Formats supported: RGB888/RGBA8888
+ */
+ void configure(const CLCompileContext &compile_context, const ICLMultiImage *input, ICLImage *output);
+ /** Set the input and output of the kernel
+ *
+ * @param[in] input Single-planar source image. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422
+ * @param[out] output Multi-planar destination image. Formats supported: NV12/IYUV/YUV444 (if the formats of @p input are RGB888/RGB8888)
+ */
+ void configure(const ICLImage *input, ICLMultiImage *output);
+ /** Set the input and output of the kernel
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Single-planar source image. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422
+ * @param[out] output Multi-planar destination image. Formats supported: NV12/IYUV/YUV444 (if the formats of @p input are RGB888/RGB8888)
+ */
+ void configure(const CLCompileContext &compile_context, const ICLImage *input, ICLMultiImage *output);
+ /** Set the input and output of the kernel
+ *
+ * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV
+ * @param[out] output Multi-planar destination image. Formats supported: YUV444/IYUV (if the formats of @p input are NV12/NV21)/NV12 (if the format of @p input is IYUV)
+ */
+ void configure(const ICLMultiImage *input, ICLMultiImage *output);
+ /** Set the input and output of the kernel
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV
+ * @param[out] output Multi-planar destination image. Formats supported: YUV444/IYUV (if the formats of @p input are NV12/NV21)/NV12 (if the format of @p input is IYUV)
+ */
+ void configure(const CLCompileContext &compile_context, const ICLMultiImage *input, ICLMultiImage *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input; /*pointer to single planar tensor input */
+ ICLTensor *_output; /*pointer to single planar tensor output */
+ const ICLMultiImage *_multi_input; /*pointer to multi-planar input */
+ ICLMultiImage *_multi_output; /*pointer to multi-planar output */
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CLCOLORCONVERTKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLComparisonKernel.h"
+#include "src/core/CL/kernels/CLComparisonKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/ICLTensor.h"
--- /dev/null
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLCOMPARISONKERNEL_H
+#define ARM_COMPUTE_CLCOMPARISONKERNEL_H
+
+#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+// Forward declarations
+class ICLTensor;
+
+/** Interface for the comparison kernel. */
+class CLComparisonKernel : public ICLKernel
+{
+public:
+ /** Default constructor. */
+ CLComparisonKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLComparisonKernel(const CLComparisonKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLComparisonKernel &operator=(const CLComparisonKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLComparisonKernel(CLComparisonKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLComparisonKernel &operator=(CLComparisonKernel &&) = default;
+ /** Default destructor */
+ ~CLComparisonKernel() = default;
+ /** Set the inputs and output tensors
+ *
+ * @param[in] input1 Source tensor. Data types supported: All.
+ * @param[in] input2 Source tensor. Data types supported: Same as @p input1.
+ * @param[out] output Destination tensor. Data types supported: U8.
+ * @param[in] operation Comparison operation to use.
+ */
+ void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ComparisonOperation operation);
+ /** Set the inputs and output tensors
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input1 Source tensor. Data types supported: All.
+ * @param[in] input2 Source tensor. Data types supported: Same as @p input1.
+ * @param[out] output Destination tensor. Data types supported: U8.
+ * @param[in] operation Comparison operation to use.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ComparisonOperation operation);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLComparisonKernel
+ *
+ * @param[in] input1 Source tensor. Data types supported: All.
+ * @param[in] input2 Source tensor. Data types supported: Same as @p input1.
+ * @param[in] output Destination tensor. Data types supported: U8.
+ * @param[in] operation Comparison operation to use.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ComparisonOperation operation);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+ BorderSize border_size() const override;
+
+private:
+ const ICLTensor *_input1; /**< Source tensor 1 */
+ const ICLTensor *_input2; /**< Source tensor 2 */
+ ICLTensor *_output; /**< Destination tensor */
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CLCOMPARISONKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h"
+#include "src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLCONVERTFULLYCONNECTEDWEIGHTSKERNEL_H
+#define ARM_COMPUTE_CLCONVERTFULLYCONNECTEDWEIGHTSKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface to convert the 2D Fully Connected weights from NCHW to NHWC or vice versa.
+ *
+ * @note This function can be applied to the 2D weights used by a Fully Connected layer if:
+ * - It follows a Convolution layer
+ * - The data layout used by the network does not match the one the model has been trained in.
+ *
+ * @note This function assumes the weights are already reshaped (transposed)
+ */
+class CLConvertFullyConnectedWeightsKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLConvertFullyConnectedWeightsKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLConvertFullyConnectedWeightsKernel(const CLConvertFullyConnectedWeightsKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLConvertFullyConnectedWeightsKernel &operator=(const CLConvertFullyConnectedWeightsKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLConvertFullyConnectedWeightsKernel(CLConvertFullyConnectedWeightsKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLConvertFullyConnectedWeightsKernel &operator=(CLConvertFullyConnectedWeightsKernel &&) = default;
+ /** Default destructor */
+ ~CLConvertFullyConnectedWeightsKernel() = default;
+ /** Set the input and output tensor.
+ *
+ * @param[in] input Source weights tensor to convert. Must be 2 dimensional. Data types supported: All.
+ * @param[out] output The converted weights tensor. Shape and Data Type: Same as @p input.
+ * @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer).
+ * @param[in] data_layout The data layout the weights have been trained in.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, const TensorShape &original_input_shape, DataLayout data_layout);
+ /** Set the input and output tensor.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source weights tensor to convert. Must be 2 dimensional. Data types supported: All.
+ * @param[out] output The converted weights tensor. Shape and Data Type: Same as @p input.
+ * @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer).
+ * @param[in] data_layout The data layout the weights have been trained in.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const TensorShape &original_input_shape, DataLayout data_layout);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLConvertFullyConnectedWeightsKernel
+ *
+ * @param[in] input Source weights tensor info to convert. Must be 2 dimensional. Data types supported: All.
+ * @param[in] output The converted weights tensor info. Shape and Data Type: Same as @p input.
+ * @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer).
+ * @param[in] data_layout The data layout the weights have been trained in.
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const TensorShape &original_input_shape, DataLayout data_layout);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLCONVERTFULLYCONNECTEDWEIGHTSKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLConvolutionKernel.h"
+#include "src/core/CL/kernels/CLConvolutionKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/CL/ICLKernel.h"
#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLCONVOLUTIONKERNEL_H
+#define ARM_COMPUTE_CLCONVOLUTIONKERNEL_H
+
+#include "src/core/CL/ICLSimple2DKernel.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/****************************************************************************************\
+ * Square Convolution *
+\****************************************************************************************/
+
+/** Interface for the kernel to run an arbitrary size convolution on a tensor. (Currently supports 3x3, 5x5, 7x7 and 9x9).
+ * The client can supply a convolution matrix \f$ C_{m,n} \f$.
+ * @f{eqnarray}{
+ * k_0 &=& \frac{m}{2} \\
+ * l_0 &=& \frac{n}{2} \\
+ * sum &=& \sum_{k=0,l=0}^{k=m-1,l=n-1} input(x+k-k_0, y+l-l_0) C_{k,l}
+ * @f}
+ *
+ * @note The above equation for this function is similar to the default OpenCV Filter2D function,
+ * which actually computes a correlation and not a convolution.
+ * In case of a real convolution the convolution matrix should be flipped both horizontally and vertically.
+ */
+template <unsigned int matrix_size>
+class CLConvolutionKernel : public ICLSimple2DKernel
+{
+public:
+ /** Initialise the kernel's input, output and border mode.
+ *
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[out] output Destination tensor, Data types supported: U8, S16.
+ * @param[in] conv Convolution matrix to apply to the input tensor.
+ * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined);
+ /** Initialise the kernel's input, output and border mode.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[out] output Destination tensor, Data types supported: U8, S16.
+ * @param[in] conv Convolution matrix to apply to the input tensor.
+ * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined);
+
+ // Inherited methods overridden:
+ BorderSize border_size() const override;
+};
+
+/** Interface for the kernel which applies a 3x3 convolution to a tensor. */
+using CLConvolution3x3Kernel = CLConvolutionKernel<3>;
+/** Interface for the kernel which applies a 5x5 convolution to a tensor. */
+using CLConvolution5x5Kernel = CLConvolutionKernel<5>;
+/** Interface for the kernel which applies a 7x7 convolution to a tensor. */
+using CLConvolution7x7Kernel = CLConvolutionKernel<7>;
+/** Interface for the kernel which applies a 9x9 convolution to a tensor. */
+using CLConvolution9x9Kernel = CLConvolutionKernel<9>;
+
+/****************************************************************************************\
+ * Separable Square Convolution *
+\****************************************************************************************/
+
+/** Kernel for the Horizontal pass of a Separable Convolution. Currently support 5x5, 7x7, 9x9 */
+template <unsigned int matrix_size>
+class CLSeparableConvolutionHorKernel : public ICLSimple2DKernel
+{
+public:
+ /** Default Constructor */
+ CLSeparableConvolutionHorKernel();
+ /** Initialise the kernel's input, output and border mode.
+ *
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[out] output Destination tensor, Data types supported: S16.
+ * @param[in] conv Convolution matrix to apply to the input tensor.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, bool border_undefined);
+ /** Initialise the kernel's input, output and border mode.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[out] output Destination tensor, Data types supported: U16/S16/S32.
+ * @param[in] conv Convolution matrix to apply to the input tensor.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, bool border_undefined);
+
+ // Inherited methods overridden:
+ BorderSize border_size() const override;
+
+private:
+ BorderSize _border_size; /**< Border size */
+};
+
+/** Interface for the kernel which applies a horizontal pass of 5x5 convolution to a tensor. */
+using CLSeparableConvolution5x5HorKernel = CLSeparableConvolutionHorKernel<5>;
+/** Interface for the kernel which applies a horizontal pass of 7x7 convolution to a tensor. */
+using CLSeparableConvolution7x7HorKernel = CLSeparableConvolutionHorKernel<7>;
+/** Interface for the kernel which applies a horizontal pass of 9x9 convolution to a tensor. */
+using CLSeparableConvolution9x9HorKernel = CLSeparableConvolutionHorKernel<9>;
+
+/** Kernel for the Vertical pass of a Separable Convolution. Currently supports 5x5, 7x7, 9x9 */
+template <unsigned int matrix_size>
+class CLSeparableConvolutionVertKernel : public ICLSimple2DKernel
+{
+public:
+ /** Initialise the kernel's input, output and border mode.
+ *
+ * @param[in] input Source tensor. Data types supported: U16/S16/S32.
+ * @param[out] output Destination tensor, Data types supported: U8, S16.
+ * @param[in] conv Convolution matrix to apply to the input tensor.
+ * @param[in] scale Scale of the convolution matrix.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ * @param[in] data_type Data type to use for intermeidate result. @sa data_type_for_convolution
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined, DataType data_type = DataType::S32);
+ /** Initialise the kernel's input, output and border mode.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: U16/S16/S32.
+ * @param[out] output Destination tensor, Data types supported: U8, S16.
+ * @param[in] conv Convolution matrix to apply to the input tensor.
+ * @param[in] scale Scale of the convolution matrix.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ * @param[in] data_type Data type to use for intermeidate result. @sa data_type_for_convolution
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined, DataType data_type = DataType::S32);
+
+ // Inherited methods overridden:
+ BorderSize border_size() const override;
+};
+
+/** Interface for the kernel which applies a vertical pass of 5x5 convolution to a tensor. */
+using CLSeparableConvolution5x5VertKernel = CLSeparableConvolutionVertKernel<5>;
+/** Interface for the kernel which applies a vertical pass of 7x7 convolution to a tensor. */
+using CLSeparableConvolution7x7VertKernel = CLSeparableConvolutionVertKernel<7>;
+/** Interface for the kernel which applies a vertical pass of 9x9 convolution to a tensor. */
+using CLSeparableConvolution9x9VertKernel = CLSeparableConvolutionVertKernel<9>;
+
+/****************************************************************************************\
+ * Rectangle Convolution *
+\****************************************************************************************/
+
+/** Kernel for the running convolution on a rectangle matrix.
+ *
+ * @note Supports combinations of 3,5,7 and 9.
+ */
+class CLConvolutionRectangleKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLConvolutionRectangleKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLConvolutionRectangleKernel(const CLConvolutionRectangleKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLConvolutionRectangleKernel &operator=(const CLConvolutionRectangleKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLConvolutionRectangleKernel(CLConvolutionRectangleKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLConvolutionRectangleKernel &operator=(CLConvolutionRectangleKernel &&) = default;
+ /** Initialise the kernel's input, output and border mode.
+ *
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[out] output Destination tensor, Data types supported: U8, S16.
+ * @param[in] conv Convolution matrix to apply to the input tensor.
+ * @param[in] width Width of convolution matrix (Number of columns)
+ * @param[in] height Height of convolution matrix (Number of rows)
+ * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t width, uint32_t height, uint32_t scale, bool border_undefined);
+ /** Initialise the kernel's input, output and border mode.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[out] output Destination tensor, Data types supported: U8, S16.
+ * @param[in] conv Convolution matrix to apply to the input tensor.
+ * @param[in] width Width of convolution matrix (Number of columns)
+ * @param[in] height Height of convolution matrix (Number of rows)
+ * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t width, uint32_t height, uint32_t scale, bool border_undefined);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+ BorderSize border_size() const override;
+
+private:
+ BorderSize _border_size;
+ const ICLTensor *_input;
+ ICLTensor *_output;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLCONVOLUTIONKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLCopyKernel.h"
+#include "src/core/CL/kernels/CLCopyKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLCOPYKERNEL_H
+#define ARM_COMPUTE_CLCOPYKERNEL_H
+
+#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** OpenCL kernel to perform a copy between two tensors */
+class CLCopyKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLCopyKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ CLCopyKernel(const CLCopyKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ CLCopyKernel &operator=(const CLCopyKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLCopyKernel(CLCopyKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLCopyKernel &operator=(CLCopyKernel &&) = default;
+ /** Initialize the kernel's input, output.
+ *
+ * @param[in] input Source tensor. Data types supported: All.
+ * @param[out] output Destination tensor. Data types supported: same as @p input.
+ * @param[in] output_window (Optional) Window to be used in case only copying into part of a tensor. Default is nullptr.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, Window *output_window = nullptr);
+ /** Initialize the kernel's input, output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: All.
+ * @param[out] output Destination tensor. Data types supported: same as @p input.
+ * @param[in] output_window (Optional) Window to be used in case only copying into part of a tensor. Default is nullptr.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, Window *output_window = nullptr);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLCopyKernel
+ *
+ * @param[in] input Source tensor info. Data types supported: All.
+ * @param[in] output Destination tensor info. Data types supported: same as @p input.
+ * @param[in] output_window (Optional) Window to be used in case only copying into part of a tensor. Default is nullptr.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, Window *output_window = nullptr);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+ Window _output_window;
+ bool _has_output_window;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLCOPYKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLCropKernel.h"
+#include "src/core/CL/kernels/CLCropKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2019-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLCROPKERNEL_H
+#define ARM_COMPUTE_CLCROPKERNEL_H
+
+#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** OpenCL kernel to perform a copy between two tensors */
+class CLCropKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLCropKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ CLCropKernel(const CLCropKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ CLCropKernel &operator=(const CLCropKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLCropKernel(CLCropKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLCropKernel &operator=(CLCropKernel &&) = default;
+ /** Configure kernel
+ *
+ * @note Supported tensor rank: up to 4
+ *
+ * @param[in] input Source tensor. Data type supported: All. Data layouts supported: NHWC.
+ * @param[out] output Destination tensor. Data type supported: F32
+ * @param[in] start Coordinates of where to start cropping the image.
+ * @param[in] end Coordinates of where to end cropping the image.
+ * @param[in] batch_index Fourth dimension index of the 3D image to crop in @p input.
+ * @param[in] extrapolation_value Value to be used for values outside of the image. Default is 0.
+ * @param[in] output_window Output window to be used in case cropped image is being copied into a tensor. Default is nullptr.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value = 0, Window *output_window = nullptr);
+ /** Configure kernel
+ *
+ * @note Supported tensor rank: up to 4
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data type supported: All. Data layouts supported: NHWC.
+ * @param[out] output Destination tensor. Data type supported: F32
+ * @param[in] start Coordinates of where to start cropping the image.
+ * @param[in] end Coordinates of where to end cropping the image.
+ * @param[in] batch_index Fourth dimension index of the 3D image to crop in @p input.
+ * @param[in] extrapolation_value Value to be used for values outside of the image. Default is 0.
+ * @param[in] output_window Output window to be used in case cropped image is being copied into a tensor. Default is nullptr.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value = 0,
+ Window *output_window = nullptr);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref CLStridedSliceKernel
+ *
+ * @note Supported tensor rank: up to 4
+ *
+ * @param[in] input Source tensor info. Data type supported: All. Data layouts supported: NHWC.
+ * @param[in] output Destination tensor info. Data type supported: F32
+ * @param[in] start Coordinates of where to start cropping the image.
+ * @param[in] end Coordinates of where to end cropping the image.
+ * @param[in] batch_index Fourth dimension index of the 3D image to crop in @p input.
+ * @param[in] extrapolation_value Value to be used for values outside of the image. Default is 0.
+ * @param[in] output_window Output window to be used in case cropped image is being copied into a tensor. Default is nullptr.
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value = 0,
+ Window *output_window = nullptr);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+ Coordinates2D _start;
+ uint32_t _batch_index;
+ float _extrapolation_value;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLCROPKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h"
+#include "src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLDECONVOLUTIONLAYERUPSAMPLEKERNEL_H
+#define ARM_COMPUTE_CLDECONVOLUTIONLAYERUPSAMPLEKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the Deconvolution layer kernel on OpenCL.
+ */
+class CLDeconvolutionLayerUpsampleKernel : public ICLKernel
+{
+public:
+ /** Constructor */
+ CLDeconvolutionLayerUpsampleKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLDeconvolutionLayerUpsampleKernel(const CLDeconvolutionLayerUpsampleKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLDeconvolutionLayerUpsampleKernel &operator=(const CLDeconvolutionLayerUpsampleKernel &) = delete;
+ /** Default Move Constructor. */
+ CLDeconvolutionLayerUpsampleKernel(CLDeconvolutionLayerUpsampleKernel &&) = default;
+ /** Default move assignment operator */
+ CLDeconvolutionLayerUpsampleKernel &operator=(CLDeconvolutionLayerUpsampleKernel &&) = default;
+ /** Default destructor */
+ ~CLDeconvolutionLayerUpsampleKernel() = default;
+
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] input Source tensor. Data types supported: All.
+ * @param[out] output Destination tensor. Data types supported: same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
+ * @param[in] info Contains padding and stride information described in @ref PadStrideInfo.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, const PadStrideInfo &info);
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: All.
+ * @param[out] output Destination tensor. Data types supported: same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
+ * @param[in] info Contains padding and stride information described in @ref PadStrideInfo.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PadStrideInfo &info);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLDeconvolutionLayerUpsample
+ *
+ * @param[in] input Source tensor info. Data types supported: All.
+ * @param[in] output Destination tensor info. Data types supported: same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
+ * @param[in] info Contains padding and stride information described in @ref PadStrideInfo.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PadStrideInfo &info);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+ PadStrideInfo _info;
+ DataLayout _data_layout;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLDECONVOLUTIONLAYERUPSAMPLEKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h"
+#include "src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2019-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLDECONVOLUTIONLAYERRESHAPEOUTPUTKERNEL_H
+#define ARM_COMPUTE_CLDECONVOLUTIONLAYERRESHAPEOUTPUTKERNEL_H
+
+#include "src/core/CL/ICLSimpleKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the OpenCL kernel to be used for reshaping the tensor before returning the result of deconvolution.
+ *
+ * The input tensor to this OpenCL kernel is expected to be the result of a @ref CLGEMM operation between the Deconvolution input and the Deconvolution filter.
+ *
+ * The input tensor should have the following shape: [filter_width * filter_height * ofms, width, height, batch_size]
+ *
+ * The output tensor should have the following shape: [stride_x * (input_width - 1) + filter_width - 2 * padx, stride_y * (input_height - 1) + filter_height - 2 * pady, ofms, batch_size]
+ *
+ * For example, given a tensor with dimensions [4, 2, 2] this function returns a tensor with dimensions [1, 4, 4].
+ *
+ */
+class CLDeconvolutionReshapeOutputKernel : public ICLSimpleKernel
+{
+public:
+ /** Default constructor */
+ CLDeconvolutionReshapeOutputKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLDeconvolutionReshapeOutputKernel(const CLDeconvolutionReshapeOutputKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLDeconvolutionReshapeOutputKernel &operator=(const CLDeconvolutionReshapeOutputKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLDeconvolutionReshapeOutputKernel(CLDeconvolutionReshapeOutputKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLDeconvolutionReshapeOutputKernel &operator=(CLDeconvolutionReshapeOutputKernel &&) = default;
+ /** Default destructor */
+ ~CLDeconvolutionReshapeOutputKernel() = default;
+
+ /** Initialise the kernel's source and destination.
+ *
+ * @param[in] input Input tensor. Supported data types: QASYMM8/QASYMM8_SIGNED/S32/F16/F32.
+ * @param[in] bias Bias tensor to be added directly during the reshape operation. Supported data types: same as @p input. Supported data layouts: same as @p input.
+ * @param[out] output Output tensor with the following shape: [stride_x * (input_width - 1) + filter_width - 2 * padx, stride_y * (input_height - 1) + filter_height - 2 * pady, ofms, batch_size]
+ * Supported data types: same as @p input. Supported data layouts: same as @p input.
+ * @param[in] input_info Deconvolution input tensor info. Supported data types: same as @p input. Supported data layouts: same as @p input.
+ * @param[in] weights_info Deconvolution weights tensor info. Supported data types: same as @p input. Supported data layouts: same as @p input.
+ * @param[in] deconv_info Contains padding and policies to be used in the deconvolution, this is described in @ref PadStrideInfo. This kernel supports only stride_x = weights.width && stride_y = weights.height. Moreover, padding is not supported.
+ */
+ void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const ITensorInfo *input_info, const ITensorInfo *weights_info, const PadStrideInfo &deconv_info);
+ /** Initialise the kernel's source and destination.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Input tensor. Supported data types: QASYMM8/QASYMM8_SIGNED/S32/F16/F32.
+ * @param[in] bias Bias tensor to be added directly during the reshape operation. Supported data types: same as @p input. Supported data layouts: same as @p input.
+ * @param[out] output Output tensor with the following shape: [stride_x * (input_width - 1) + filter_width - 2 * padx, stride_y * (input_height - 1) + filter_height - 2 * pady, ofms, batch_size]
+ * Supported data types: same as @p input. Supported data layouts: same as @p input.
+ * @param[in] input_info Deconvolution input tensor info. Supported data types: same as @p input. Supported data layouts: same as @p input.
+ * @param[in] weights_info Deconvolution weights tensor info. Supported data types: same as @p input. Supported data layouts: same as @p input.
+ * @param[in] deconv_info Contains padding and policies to be used in the deconvolution, this is described in @ref PadStrideInfo. This kernel supports only stride_x = weights.width && stride_y = weights.height. Moreover, padding is not supported.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const ITensorInfo *input_info, const ITensorInfo *weights_info,
+ const PadStrideInfo &deconv_info);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref CLDeconvolutionReshapeOutputKernel.
+ *
+ * @param[in] input GEMM output tensor info to be reshaped. Supported data types: QASYMM8/QASYMM8_SIGNED/S32/F16/F32.
+ * @param[in] bias (Optional) Optional bias tensor info to be added directly during the reshape operation. Supported data types: same as @p input. Supported data layouts: same as @p input.
+ * @param[in] output Reshaped output tensor info. Supported data types: same as @p input. Supported data layouts: same as @p input.
+ * @param[in] input_info Original input tensor info. Supported data types: same as @p input. Supported data layouts: same as @p input.
+ * @param[in] weights_info Original weights tensor info output. Supported data types: same as @p input. Supported data layouts: same as @p input.
+ * @param[in] deconv_info Contains padding and policies to be used in the deconvolution, this is described in @ref PadStrideInfo. This kernel supports only stride_x = weights.width && stride_y = weights.height. Moreover, padding is not supported.
+ *
+ * @return a Status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const ITensorInfo *input_info, const ITensorInfo *weights_info, const PadStrideInfo &deconv_info);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ bool _add_bias;
+ const ICLTensor *_bias;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLDECONVOLUTIONLAYERRESHAPEOUTPUTKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h"
+#include "src/core/CL/kernels/CLDepthConcatenateLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H
+#define ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H
+
+#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+/** Interface for the depth concatenate kernel.
+ * The input tensor will be concatenated into the output tensor.
+ */
+class CLDepthConcatenateLayerKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLDepthConcatenateLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLDepthConcatenateLayerKernel(const CLDepthConcatenateLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLDepthConcatenateLayerKernel &operator=(const CLDepthConcatenateLayerKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLDepthConcatenateLayerKernel(CLDepthConcatenateLayerKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLDepthConcatenateLayerKernel &operator=(CLDepthConcatenateLayerKernel &&) = default;
+ /** Default destructor */
+ ~CLDepthConcatenateLayerKernel() = default;
+ /** Initialise the kernel's inputs and output
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] depth_offset The offset on the Z axis.
+ * @param[in,out] output Output tensor. Data types supported: Same as @p input.
+ *
+ * @note: The output tensor's low two dimensions can't be smaller than the input one's.
+ * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2.
+ *
+ */
+ void configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int depth_offset, ITensorInfo *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLDepthConcatenateLayerKernel
+ *
+ * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
+ * @param[in] depth_offset The offset on the Z axis.
+ * @param[in] output Output tensor info. Data types supported: Same as @p input.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, unsigned int depth_offset, const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ unsigned int _depth_offset;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h"
+#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLDEPTHCONVERTKERNEL_H
+#define ARM_COMPUTE_CLDEPTHCONVERTKERNEL_H
+
+#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLSimple3DKernel.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the depth conversion kernel. */
+class CLDepthConvertLayerKernel : public ICLSimple3DKernel
+{
+public:
+ /** Set the input and output of the kernel.
+ *
+ * Valid conversions Input -> Output :
+ *
+ * - QSYMM8_PER_CHANNEL -> QASYMM8 (ATTENTION: it is the user's responsibility to keep track of the quantization info in the TensorInfo meta-data)
+ * - U8 -> S8, U16, S16, U32, S32, F16, F32
+ * - U16 -> U8, S8, S16, U32, S32, F16, F32
+ * - S16 -> U8, S8, U16, U32, S32, F16, F32
+ * - U32 -> U8, S8, U16, S16, S32, F16, F32
+ * - S32 -> U8, S8, U16, S16, U32, F16, F32
+ * - F16 -> U8, S8, U16, S16, U32, F32
+ * - F32 -> U8, S8, U16, S16, U32, F16
+ *
+ * @param[in] input The input tensor to convert. Data types supported: U8/S8/QSYMM8_PER_CHANNEL/U16/S16/U32/S32/F16/F32.
+ * @param[out] output The output tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
+ * @param[in] policy Conversion policy
+ * @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift);
+ /** Set the input and output of the kernel.
+ *
+ * Valid conversions Input -> Output :
+ *
+ * - QSYMM8_PER_CHANNEL -> QASYMM8 (ATTENTION: it is the user's responsibility to keep track of the quantization info in the TensorInfo meta-data)
+ * - U8 -> S8, U16, S16, U32, S32, F16, F32
+ * - U16 -> U8, S8, S16, U32, S32, F16, F32
+ * - S16 -> U8, S8, U16, U32, S32, F16, F32
+ * - U32 -> U8, S8, U16, S16, S32, F16, F32
+ * - S32 -> U8, S8, U16, S16, U32, F16, F32
+ * - F16 -> U8, S8, U16, S16, U32, F32
+ * - F32 -> U8, S8, U16, S16, U32, F16
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input The input tensor to convert. Data types supported: U8/S8/QSYMM8_PER_CHANNEL/U16/S16/U32/S32/F16/F32.
+ * @param[out] output The output tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
+ * @param[in] policy Conversion policy
+ * @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLDepthConvertLayerKernel
+ *
+ * @param[in] input Source tensor info. Data types supported: U8/S8/QSYMM8_PER_CHANNEL/U16/S16/U32/S32/F16/F32.
+ * @param[in] output Destination tensor info. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
+ * @param[in] policy Conversion policy
+ * @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift);
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLDEPTHCONVERTKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLDepthToSpaceLayerKernel.h"
+#include "src/core/CL/kernels/CLDepthToSpaceLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/ICLTensor.h"
--- /dev/null
+/*
+ * Copyright (c) 2019-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLDEPTHTOSPACELAYERKERNEL_H
+#define ARM_COMPUTE_CLDEPTHTOSPACELAYERKERNEL_H
+
+#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the depth to space kernel */
+class CLDepthToSpaceLayerKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLDepthToSpaceLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLDepthToSpaceLayerKernel(const CLDepthToSpaceLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLDepthToSpaceLayerKernel &operator=(const CLDepthToSpaceLayerKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLDepthToSpaceLayerKernel(CLDepthToSpaceLayerKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLDepthToSpaceLayerKernel &operator=(CLDepthToSpaceLayerKernel &&) = default;
+ /** Default destructor */
+ ~CLDepthToSpaceLayerKernel() = default;
+ /** Initialise the kernel's inputs and output.
+ *
+ * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
+ * @param[out] output Tensor output. Data types supported: same as @p input
+ * @param[in] block_shape Block shape value.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, int32_t block_shape);
+ /** Initialise the kernel's inputs and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
+ * @param[out] output Tensor output. Data types supported: same as @p input
+ * @param[in] block_shape Block shape value.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t block_shape);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLDepthToSpaceLayerKernel.
+ *
+ * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All.
+ * @param[in] output Tensor output info. Data types supported: same as @p input
+ * @param[in] block_shape Block shape value.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input; /**< Source tensor */
+ ICLTensor *_output; /**< Destination tensor */
+ int32_t _block_shape; /**< Block shape */
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CLDEPTHTOSPACELAYERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h"
+#include "src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "src/core/AccessWindowStatic.h"
#include "src/core/CL/CLValidate.h"
+#include "src/core/CL/ICLKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
--- /dev/null
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLDEPTHWISECONVOLUTIONNCHWKERNEL3x3_H
+#define ARM_COMPUTE_CLDEPTHWISECONVOLUTIONNCHWKERNEL3x3_H
+
+#include "src/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the kernel to run a 3x3 depthwise convolution on a tensor when the data layout is NCHW.
+ */
+class CLDepthwiseConvolutionLayer3x3NCHWKernel : public ICLDepthwiseConvolutionLayer3x3Kernel
+{
+public:
+ /** Default constructor */
+ CLDepthwiseConvolutionLayer3x3NCHWKernel();
+ /** Initialize the function's source, destination, conv and border_size.
+ *
+ * @param[in] input Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] weights Weights tensor. A 3D tensor with dimensions [3, 3, IFM].
+ * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
+ * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
+ * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
+ * @param[out] output Destination tensor. Data type supported: Same as @p input.
+ * @param[in] conv_info Padding and stride information to use for the convolution.
+ * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for QASYMM8 supported.
+ * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+ * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization,
+ * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
+ * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization,
+ * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
+ */
+ void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
+ unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U),
+ const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) override;
+ /** Initialize the function's source, destination, conv and border_size.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] weights Weights tensor. A 3D tensor with dimensions [3, 3, IFM].
+ * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
+ * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
+ * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
+ * @param[out] output Destination tensor. Data type supported: Same as @p input.
+ * @param[in] conv_info Padding and stride information to use for the convolution.
+ * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for QASYMM8 supported.
+ * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+ * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization,
+ * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
+ * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization,
+ * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
+ unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U),
+ const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) override;
+ /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3NCHWKernel
+ *
+ * @param[in] input Source tensor info. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] weights Weights tensor info. A 3D tensor with dimensions [3, 3, IFM].
+ * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
+ * @param[in] biases Biases tensor info. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
+ * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
+ * @param[in] output Destination tensor. Data type supported: Same as @p input.
+ * @param[in] conv_info Padding and stride information to use for the convolution.
+ * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported.
+ * @param[in] gpu_target (Optional) GPU target to validate the kernel for. Defaults to midgard.
+ * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+ * @param[in] output_multipliers (Optional) Output multipliers tensor info for quantized computations. In case of per-channel quantization,
+ * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
+ * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization,
+ * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
+ unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), GPUTarget gpu_target = GPUTarget::MIDGARD,
+ const Size2D &dilation = Size2D(1U, 1U), const ITensorInfo *output_multipliers = nullptr, const ITensorInfo *output_shifts = nullptr);
+
+ void run(const Window &window, cl::CommandQueue &queue) override;
+ BorderSize border_size() const override;
+
+private:
+ unsigned int _conv_stride_x;
+ unsigned int _conv_pad_top;
+ unsigned int _conv_pad_left;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLDEPTHWISECONVOLUTIONNCHWKERNEL3x3_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h"
+#include "src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "src/core/AccessWindowStatic.h"
#include "src/core/CL/CLValidate.h"
+#include "src/core/CL/ICLKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
--- /dev/null
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLDEPTHWISECONVOLUTIONNHWCKERNEL3x3_H
+#define ARM_COMPUTE_CLDEPTHWISECONVOLUTIONNHWCKERNEL3x3_H
+
+#include "src/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the kernel to run a 3x3 depthwise convolution on a tensor when the data layout is NHWC.
+ */
+class CLDepthwiseConvolutionLayer3x3NHWCKernel : public ICLDepthwiseConvolutionLayer3x3Kernel
+{
+public:
+ /** Default constructor */
+ CLDepthwiseConvolutionLayer3x3NHWCKernel();
+ /** Default move assignment operator. */
+ /** Initialize the function's source, destination, conv and border_size.
+ *
+ * @param[in] input Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] weights Weights tensor. A 3D tensor with dimensions [IFM, 3, 3].
+ * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
+ * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
+ * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
+ * @param[out] output Destination tensor. Data type supported: Same as @p input.
+ * @param[in] conv_info Padding and stride information to use for the convolution.
+ * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported.
+ * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+ * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization,
+ * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
+ * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization,
+ * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
+ */
+ void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
+ unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U),
+ const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) override;
+ /** Initialize the function's source, destination, conv and border_size.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] weights Weights tensor. A 3D tensor with dimensions [IFM, 3, 3].
+ * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
+ * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
+ * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
+ * @param[out] output Destination tensor. Data type supported: Same as @p input.
+ * @param[in] conv_info Padding and stride information to use for the convolution.
+ * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported.
+ * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+ * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization,
+ * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
+ * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization,
+ * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
+ unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U),
+ const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) override;
+ /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3NHWCKernel
+ *
+ * @param[in] input Source tensor info. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] weights Weights tensor info. A 3D tensor with dimensions [IFM, 3, 3].
+ * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
+ * @param[in] biases Biases tensor info. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
+ * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
+ * @param[in] output Destination tensor info. Data type supported: Same as @p input.
+ * @param[in] conv_info Padding and stride information to use for the convolution.
+ * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported.
+ * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+ * @param[in] output_multipliers (Optional) Output multipliers tensor info for quantized computations. In case of per-channel quantization,
+ * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
+ * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization,
+ * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
+ unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U),
+ const ITensorInfo *output_multipliers = nullptr, const ITensorInfo *output_shifts = nullptr);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+ BorderSize border_size() const override;
+
+private:
+ unsigned int _num_planes_processed_per_iteration;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLDEPTHWISECONVOLUTIONNHWCKERNEL3x3_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h"
+#include "src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "src/core/CL/CLValidate.h"
+#include "src/core/CL/ICLKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
--- /dev/null
+/*
+ * Copyright (c) 2019-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H
+#define ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+#include "arm_compute/core/KernelDescriptors.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the kernel to run a MxN depthwise convolution. M and N are respectively the rows and columns of the filter
+ This kernel assumes that tensor for the weights is NOT reshaped (Native version) */
+class CLDepthwiseConvolutionLayerNativeKernel : public ICLKernel
+{
+public:
+ /** Default Constructor */
+ CLDepthwiseConvolutionLayerNativeKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLDepthwiseConvolutionLayerNativeKernel(const CLDepthwiseConvolutionLayerNativeKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLDepthwiseConvolutionLayerNativeKernel &operator=(const CLDepthwiseConvolutionLayerNativeKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLDepthwiseConvolutionLayerNativeKernel(CLDepthwiseConvolutionLayerNativeKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLDepthwiseConvolutionLayerNativeKernel &operator=(CLDepthwiseConvolutionLayerNativeKernel &&) = default;
+ /** Initialize the function's source, destination and parameters
+ *
+ * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/FP32/FP16. Data layout supported: NHWC
+ * @param[in] weights Weights tensor. A 3D tensor with dimensions [IFM, N, M].
+ * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
+ * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
+ * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
+ * @param[out] output Destination tensor. Data type supported: Same as @p input.
+ * @param[in] dwc_weights_info Depthwise convolution layer weights info to retrieve the number of output elements processed by each thread
+ * @param[in] dwc_info Depthwise convolution layer info
+ * @param[in] conv_info Padding and stride information to use for the convolution.
+ * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+ * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+ * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization,
+ * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
+ * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization,
+ * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
+ */
+ void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const DWCWeightsKernelInfo &dwc_weights_info,
+ const DWCKernelInfo &dwc_info, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, const Size2D &dilation = Size2D(1U, 1U),
+ const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr);
+ /** Initialize the function's source, destination and parameters
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/FP32/FP16. Data layout supported: NHWC
+ * @param[in] weights Weights tensor. A 3D tensor with dimensions [IFM, N, M].
+ * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
+ * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
+ * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
+ * @param[out] output Destination tensor. Data type supported: Same as @p input.
+ * @param[in] dwc_weights_info Depthwise convolution layer weights info to retrieve the number of output elements processed by each thread
+ * @param[in] dwc_info Depthwise convolution layer info
+ * @param[in] conv_info Padding and stride information to use for the convolution.
+ * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+ * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+ * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization,
+ * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
+ * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization,
+ * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const DWCWeightsKernelInfo &dwc_weights_info,
+ const DWCKernelInfo &dwc_info, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, const Size2D &dilation = Size2D(1U, 1U),
+ const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayerNativeKernel
+ *
+ * @param[in] input Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/FP32/FP16. Data layout supported: NHWC
+ * @param[in] weights Weights tensor info. A 3D tensor with dimensions [IFM, N, M].
+ * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
+ * @param[in] biases Biases tensor info. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
+ * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
+ * @param[in] output Destination tensor info. Data type supported: Same as @p input.
+ * @param[in] dwc_weights_info Depthwise convolution layer weights info to retrieve the number of output elements processed by each thread
+ * @param[in] dwc_info Depthwise convolution layer info
+ * @param[in] conv_info Padding and stride information to use for the convolution.
+ * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+ * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+ * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization,
+ * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
+ * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization,
+ * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const DWCWeightsKernelInfo &dwc_weights_info,
+ const DWCKernelInfo &dwc_info, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, const Size2D &dilation = Size2D(1U, 1U),
+ const ITensorInfo *output_multipliers = nullptr, const ITensorInfo *output_shifts = nullptr);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input;
+ const ICLTensor *_weights;
+ const ICLTensor *_biases;
+ ICLTensor *_output;
+ unsigned int _depth_multiplier;
+ const ICLTensor *_output_multipliers;
+ const ICLTensor *_output_shifts;
+ bool _is_quantized;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h"
+#include "src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "src/core/AccessWindowStatic.h"
#include "src/core/CL/CLValidate.h"
+#include "src/core/CL/ICLKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
--- /dev/null
+/*
+ * Copyright (c) 2019-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERRESHAPEWEIGHTSKERNEL_H
+#define ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERRESHAPEWEIGHTSKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the kernel to reshape the weights of depthwise convolution. */
+class CLDepthwiseConvolutionLayerReshapeWeightsKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLDepthwiseConvolutionLayerReshapeWeightsKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLDepthwiseConvolutionLayerReshapeWeightsKernel(const CLDepthwiseConvolutionLayerReshapeWeightsKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLDepthwiseConvolutionLayerReshapeWeightsKernel &operator=(const CLDepthwiseConvolutionLayerReshapeWeightsKernel &) = delete;
+ /** Default Move Constructor. */
+ CLDepthwiseConvolutionLayerReshapeWeightsKernel(CLDepthwiseConvolutionLayerReshapeWeightsKernel &&) = default;
+ /** Default move assignment operator */
+ CLDepthwiseConvolutionLayerReshapeWeightsKernel &operator=(CLDepthwiseConvolutionLayerReshapeWeightsKernel &&) = default;
+
+ /** Initialize the function's source and destination.
+ *
+ * @param[in] input The input tensor of dimension [IFM, W, H]. Data types supported: All. Data layouts supported: NHWC
+ * @param[out] output The output tensor of dimension [W*H*C0, ceil(IFM/C0)]. C0 is the number of channels read by each thread. Data types supported: same as @p weights.
+ * @param[in] info Depthwise convolution information to reshape the input tensor.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, const DepthwiseConvolutionReshapeInfo &info);
+ /** Initialize the function's source and destination.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input The input tensor of dimension [IFM, W, H]. Data types supported: All. Data layouts supported: NHWC
+ * @param[out] output The output tensor of dimension [W*H*C0, ceil(IFM/C0)]. C0 is the number of channels read by each thread. Data types supported: same as @p weights.
+ * @param[in] info Depthwise convolution information to reshape the input tensor.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const DepthwiseConvolutionReshapeInfo &info);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3NHWCKernel
+ *
+ * @param[in] input The input tensor info of dimension [IFM, W, H]. Data types supported: All. Data layouts supported: NHWC
+ * @param[in] output The output tensor info of dimension [W*H*C0, ceil(IFM/C0)]. C0 is the number of channels read by each thread. Data types supported: same as @p weights.
+ * @param[in] info Depthwise convolution information to reshape the input tensor.
+ *
+ * @return a Status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const DepthwiseConvolutionReshapeInfo &info);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+
+ void configure_dot_product(const DepthwiseConvolutionReshapeInfo &info);
+ void configure_generic(const DepthwiseConvolutionReshapeInfo &info);
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERRESHAPEWEIGHTSKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h"
+#include "src/core/CL/kernels/CLDequantizationLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLDEQUANTIZATIONLAYERKERNEL_H
+#define ARM_COMPUTE_CLDEQUANTIZATIONLAYERKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the dequantization layer kernel. */
+class CLDequantizationLayerKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLDequantizationLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLDequantizationLayerKernel(const CLDequantizationLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLDequantizationLayerKernel &operator=(const CLDequantizationLayerKernel &) = delete;
+ /** Default Move Constructor. */
+ CLDequantizationLayerKernel(CLDequantizationLayerKernel &&) = default;
+ /** Default move assignment operator */
+ CLDequantizationLayerKernel &operator=(CLDequantizationLayerKernel &&) = default;
+ /** Default destructor */
+ ~CLDequantizationLayerKernel() = default;
+ /** Set the input, output, min and max.
+ *
+ * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
+ * @param[out] output Destination tensor. Data types supported: F16/F32.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output);
+ /** Set the input, output, min and max.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
+ * @param[out] output Destination tensor. Data types supported: F16/F32.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLDequantizationLayerKernel
+ *
+ * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
+ * @param[in] output Output tensor info. Data types supported: F16/F32.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLDEQUANTIZATIONLAYERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLDerivativeKernel.h"
+#include "src/core/CL/kernels/CLDerivativeKernel.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLDERIVATIVEKERNEL_H
+#define ARM_COMPUTE_CLDERIVATIVEKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the derivative kernel. */
+class CLDerivativeKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLDerivativeKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLDerivativeKernel(const CLDerivativeKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLDerivativeKernel &operator=(const CLDerivativeKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLDerivativeKernel(CLDerivativeKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLDerivativeKernel &operator=(CLDerivativeKernel &&) = default;
+ /** Default destructor */
+ ~CLDerivativeKernel() = default;
+ /** Initialise the kernel's sources, destination and border
+ *
+ * @note At least one of output_x or output_y must be set
+ *
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16.
+ * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
+ /** Initialise the kernel's sources, destination and border
+ *
+ * @note At least one of output_x or output_y must be set
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16.
+ * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+ BorderSize border_size() const override;
+
+private:
+ const ICLTensor *_input; /**< Input tensor */
+ ICLTensor *_output_x; /**< Output tensor - Derivate along the X direction */
+ ICLTensor *_output_y; /**< Output tensor - Derivate along the Y direction */
+ bool _run_derivative_x; /**< Do we need to run Derivative X ? */
+ bool _run_derivative_y; /**< Do we need to run Derivative Y ? */
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLDERIVATIVEKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLDilateKernel.h"
+#include "src/core/CL/kernels/CLDilateKernel.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLDILATEKERNEL_H
+#define ARM_COMPUTE_CLDILATEKERNEL_H
+
+#include "src/core/CL/ICLSimple2DKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the dilate kernel.
+ *
+ */
+class CLDilateKernel : public ICLSimple2DKernel
+{
+public:
+ /**Initialise the kernel's input and output.
+ *
+ * @param[in] input An input tensor. Data types supported: U8
+ * @param[out] output The output tensor. Data types supported: U8.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined);
+ /**Initialise the kernel's input and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input An input tensor. Data types supported: U8
+ * @param[out] output The output tensor. Data types supported: U8.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined);
+
+ // Inherited methods overridden:
+ BorderSize border_size() const override;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLDILATEKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h"
+#include "src/core/CL/kernels/CLDirectConvolutionLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLDIRECTCONVOLUTIONLAYERKERNEL_H
+#define ARM_COMPUTE_CLDIRECTCONVOLUTIONLAYERKERNEL_H
+
+#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the direct convolution kernel.
+ */
+class CLDirectConvolutionLayerKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLDirectConvolutionLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLDirectConvolutionLayerKernel(const CLDirectConvolutionLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLDirectConvolutionLayerKernel &operator=(const CLDirectConvolutionLayerKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLDirectConvolutionLayerKernel(CLDirectConvolutionLayerKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLDirectConvolutionLayerKernel &operator=(CLDirectConvolutionLayerKernel &&) = default;
+ /** Default destructor */
+ ~CLDirectConvolutionLayerKernel() = default;
+ /** Set the input, weights, biases and output tensors.
+ *
+ * @note: DirectConvolution only works in the following configurations:
+ * 1x1 convolution with stride_x = 1/2/3, stride_y = 1/2/3
+ * 3x3 convolution with stride_x = 1/2, stride_y = 1/2
+ * 5x5 convolution with stride_x = 1/2, stride_y = 1/2
+ * 9x9 convolution with stride_x = 1/2, stride_y = 1/2
+ *
+ * @param[in] input The input tensor to convolve. 3 lower dimensions represent a single input [width, height, IFM],
+ * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
+ * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
+ * The 3rd dimension must be the same as the input's volume 3rd dimension.
+ * Data type supported:Same as @p input.
+ * @param[in] biases Biases tensor. Biases are 1D tensor with dimension [OFM].
+ * Data type supported: Should match @p input data type, except for input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type
+ * @param[out] output Output tensor.
+ * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: Same as @p input.
+ * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
+ */
+ void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info);
+ /** Set the input, weights, biases and output tensors.
+ *
+ * @note: DirectConvolution only works in the following configurations:
+ * 1x1 convolution with stride_x = 1/2/3, stride_y = 1/2/3
+ * 3x3 convolution with stride_x = 1/2, stride_y = 1/2
+ * 5x5 convolution with stride_x = 1/2, stride_y = 1/2
+ * 9x9 convolution with stride_x = 1/2, stride_y = 1/2
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input The input tensor to convolve. 3 lower dimensions represent a single input [width, height, IFM],
+ * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
+ * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
+ * The 3rd dimension must be the same as the input's volume 3rd dimension.
+ * Data type supported:Same as @p input.
+ * @param[in] biases Biases tensor. Biases are 1D tensor with dimension [OFM].
+ * Data type supported: Should match @p input data type, except for input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type
+ * @param[out] output Output tensor.
+ * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: Same as @p input.
+ * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLDirectConvolutionLayerKernel
+ *
+ * @param[in] input The input tensor to convolve. 3 lower dimensions represent a single input [width, height, IFM],
+ * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
+ * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
+ * The 3rd dimension must be the same as the input's volume 3rd dimension.
+ * Data type supported:Same as @p input.
+ * @param[in] biases Biases tensor. Biases are 1D tensor with dimension [OFM].
+ * Data type supported: Should match @p input data type, except for input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type.
+ * @param[in] output Output tensor.
+ * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: Same as @p input.
+ * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
+ * @param[in] target Target GPU architecture.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, const GPUTarget target);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+ BorderSize border_size() const override;
+
+public:
+ const ICLTensor *_input;
+ const ICLTensor *_biases;
+ const ICLTensor *_weights;
+ ICLTensor *_output;
+ DataLayout _data_layout;
+ BorderSize _border_size;
+ int _conv_stride_x;
+ int _conv_stride_y;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLDIRECTCONVOLUTIONLAYERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLElementWiseUnaryLayerKernel.h"
+#include "src/core/CL/kernels/CLElementWiseUnaryLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/ICLTensor.h"
--- /dev/null
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLELEMENTWISEUNARYLAYERKERNEL_H
+#define ARM_COMPUTE_CLELEMENTWISEUNARYLAYERKERNEL_H
+
+#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLSimpleKernel.h"
+
+namespace arm_compute
+{
+/** Interface for the elementwise unary operator */
+class CLElementWiseUnaryLayerKernel : public ICLKernel
+{
+public:
+ /** Initialise the kernel's inputs, output.
+ *
+ * @param[in] input First tensor input info. Data types supported: F16/F32.
+ * @param[out] output Output tensor info. Data types supported: Same as @p input.
+ * @param[in] op Element wise unary operation to perform.
+ */
+ void configure(const ITensorInfo *input, ITensorInfo *output, const ElementWiseUnary &op);
+ /** Initialise the kernel's inputs, output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input First tensor input info. Data types supported: F16/F32.
+ * @param[out] output Output tensor info. Data types supported: Same as @p input.
+ * @param[in] op Element wise unary operation to perform.
+ */
+ void configure(const CLCompileContext &compile_context, const ITensorInfo *input, ITensorInfo *output, const ElementWiseUnary &op);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLElementWiseUnaryLayerKernel
+ *
+ * @param[in] input First tensor input info. Data types supported: F16/F32.
+ * @param[in] output Output tensor info. Data types supported: Same as @p input.
+ * @param[in] op Element wise unary operation to perform.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ElementWiseUnary &op);
+
+ // Inherited methods overridden:
+ void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CLELEMENTWISEUNARYLAYERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h"
+#include "src/core/CL/kernels/CLElementwiseOperationKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/ICLTensor.h"
--- /dev/null
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLELEMENTWISEOPERATIONKERNEL_H
+#define ARM_COMPUTE_CLELEMENTWISEOPERATIONKERNEL_H
+
+#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for an element-wise operation kernel
+ *
+ * Element-wise operation is computed by:
+ * @f[ output(x,y) = OP(input1(x,y), input2(x,y))@f]
+ *
+ */
+class CLElementwiseOperationKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLElementwiseOperationKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLElementwiseOperationKernel(const CLElementwiseOperationKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLElementwiseOperationKernel &operator=(const CLElementwiseOperationKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLElementwiseOperationKernel(CLElementwiseOperationKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLElementwiseOperationKernel &operator=(CLElementwiseOperationKernel &&) = default;
+ /** Default destructor */
+ ~CLElementwiseOperationKernel() = default;
+
+ // Inherited methods overridden:
+ void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
+
+protected:
+ /** The name of the operation */
+ virtual std::string name() = 0;
+
+ /** Initialise the kernel's output.
+ *
+ * @param[in] input1 First tensor input info. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/F16/U32/S32/F32.
+ * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
+ * @param[in] output Output tensor info. Data types supported: Same as @p input1.
+ *
+ * @return a pair of Status and Window
+ */
+ virtual std::pair<Status, Window> validate_and_configure_window(ITensorInfo &input1, ITensorInfo &input2, ITensorInfo &output) = 0;
+
+ /** Generate the build options for the specific kernel
+ *
+ * @reutrn a CLBuildOptions struct
+ */
+ virtual CLBuildOptions generate_build_options(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output) = 0;
+
+ /** Generate the identifier for tuning
+ *
+ * @reutrn a string
+ */
+ virtual std::string generate_id_for_tuning(const std::string &kernel_name, const ITensorInfo &input1, const ITensorInfo &output) = 0;
+
+ /** Commmon configure function for element-wise operators with no additional options (e.g., Div, Min, Max, SquaredDiff)
+ *
+ */
+ void configure_common(ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output);
+ /** Commmon configure function for element-wise operators with no additional options (e.g., Div, Min, Max, SquaredDiff)
+ *
+ */
+ void configure_common(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output);
+
+ ActivationLayerInfo _act_info;
+
+private:
+ const ITensorInfo *_input1; /**< Source tensor info 1 */
+ const ITensorInfo *_input2; /**< Source tensor info 2 */
+ ITensorInfo *_output; /**< Destination tensor info */
+};
+
+/** Addition operation */
+class CLSaturatedArithmeticOperationKernel : public CLElementwiseOperationKernel
+{
+public:
+ CLSaturatedArithmeticOperationKernel()
+ : CLElementwiseOperationKernel(), _policy(), _op()
+ {
+ }
+
+ /** Static function to check if given info will lead to a valid configuration of @ref CLSaturatedArithmeticOperationKernel
+ *
+ * @param[in] op Arithmetic operation to be executed.
+ * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32.
+ * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
+ * @param[in] output Output tensor info. Data types supported: Same as @p input1.
+ * @param[in] policy Policy to use to handle overflow.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
+ */
+ void configure(ArithmeticOperation op, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, const ConvertPolicy &policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ /** Static function to check if given info will lead to a valid configuration of @ref CLSaturatedArithmeticOperationKernel
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] op Arithmetic operation to be executed.
+ * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32.
+ * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
+ * @param[in] output Output tensor info. Data types supported: Same as @p input1.
+ * @param[in] policy Policy to use to handle overflow.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
+ */
+ void configure(const CLCompileContext &compile_context, ArithmeticOperation op, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, const ConvertPolicy &policy,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
+
+ /** Static function to check if given info will lead to a valid configuration of @ref CLSaturatedArithmeticOperationKernel
+ *
+ * @param[in] op Arithmetic operation to be executed.
+ * @param[in] input1 First tensor input info info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32.
+ * @param[in] input2 Second tensor input info info. Data types supported: Same as @p input1.
+ * @param[in] output Output tensor info info. Data types supported: Same as @p input1.
+ * @param[in] policy Policy to use to handle overflow.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
+ *
+ * @return a Status
+ */
+ static Status validate(ArithmeticOperation op, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ConvertPolicy &policy,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
+
+protected:
+ // Inherited methods overridden:
+ std::string name() override;
+ std::pair<Status, Window> validate_and_configure_window(ITensorInfo &input1, ITensorInfo &input2, ITensorInfo &output) override;
+ CLBuildOptions generate_build_options(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output) override;
+ std::string generate_id_for_tuning(const std::string &kernel_name, const ITensorInfo &input1, const ITensorInfo &output) override;
+
+private:
+ ConvertPolicy _policy;
+ ArithmeticOperation _op;
+};
+
+class CLArithmeticOperationKernel : public CLElementwiseOperationKernel
+{
+public:
+ CLArithmeticOperationKernel()
+ : CLElementwiseOperationKernel(), _op()
+ {
+ }
+
+ /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel
+ *
+ * @param[in] op Arithmetic operation to be executed.
+ * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32.
+ * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
+ * @param[in] output Output tensor info. Data types supported: Same as @p input1.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
+ */
+ void configure(ArithmeticOperation op, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] op Arithmetic operation to be executed.
+ * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32.
+ * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
+ * @param[in] output Output tensor info. Data types supported: Same as @p input1.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
+ */
+ void configure(const CLCompileContext &compile_context, ArithmeticOperation op, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
+
+ /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel
+ *
+ * @param[in] op Arithmetic operation to be executed.
+ * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32.
+ * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
+ * @param[in] output Output tensor info. Data types supported: Same as @p input1.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
+ *
+ * @return a Status
+ */
+ static Status validate(ArithmeticOperation op, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+
+protected:
+ // Inherited methods overridden:
+ std::string name() override;
+ std::pair<Status, Window> validate_and_configure_window(ITensorInfo &input1, ITensorInfo &input2, ITensorInfo &output) override;
+ CLBuildOptions generate_build_options(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output) override;
+ std::string generate_id_for_tuning(const std::string &kernel_name, const ITensorInfo &input1, const ITensorInfo &output) override;
+
+private:
+ ArithmeticOperation _op;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CLELEMENTWISEOPERATIONKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLErodeKernel.h"
+#include "src/core/CL/kernels/CLErodeKernel.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLERODEKERNEL_H
+#define ARM_COMPUTE_CLERODEKERNEL_H
+
+#include "src/core/CL/ICLSimple2DKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the erode kernel.
+ *
+ */
+class CLErodeKernel : public ICLSimple2DKernel
+{
+public:
+ /**Initialise the kernel's input and output.
+ *
+ * @param[in] input An input tensor. Data types supported: U8
+ * @param[out] output The output tensor. Data types supported: U8.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined);
+ /**Initialise the kernel's input and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input An input tensor. Data types supported: U8
+ * @param[out] output The output tensor. Data types supported: U8.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined);
+
+ // Inherited methods overridden:
+ BorderSize border_size() const override;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLERODEKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLFFTDigitReverseKernel.h"
+#include "src/core/CL/kernels/CLFFTDigitReverseKernel.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
--- /dev/null
+/*
+ * Copyright (c) 2019-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLFFTDIGITREVERSEKERNEL_H
+#define ARM_COMPUTE_CLFFTDIGITREVERSEKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+#include "arm_compute/core/KernelDescriptors.h"
+
+namespace arm_compute
+{
+// Forward declarations
+class ICLTensor;
+
+/** Interface for the digit reverse operation kernel. */
+class CLFFTDigitReverseKernel : public ICLKernel
+{
+public:
+ /** Constructor */
+ CLFFTDigitReverseKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLFFTDigitReverseKernel(const CLFFTDigitReverseKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLFFTDigitReverseKernel &operator=(const CLFFTDigitReverseKernel &) = delete;
+ /** Default Move Constructor. */
+ CLFFTDigitReverseKernel(CLFFTDigitReverseKernel &&) = default;
+ /** Default move assignment operator */
+ CLFFTDigitReverseKernel &operator=(CLFFTDigitReverseKernel &&) = default;
+ /** Default destructor */
+ ~CLFFTDigitReverseKernel() = default;
+ /** Set the input and output tensors.
+ *
+ * @param[in] input Source tensor. Data types supported: F32.
+ * @param[out] output Destination tensor. Data type supported: same as @p input
+ * @param[in] idx Digit reverse index tensor. Data type supported: U32
+ * @param[in] config Kernel configuration.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *idx, const FFTDigitReverseKernelInfo &config);
+ /** Set the input and output tensors.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: F32.
+ * @param[out] output Destination tensor. Data type supported: same as @p input
+ * @param[in] idx Digit reverse index tensor. Data type supported: U32
+ * @param[in] config Kernel configuration.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *idx, const FFTDigitReverseKernelInfo &config);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLFFTDigitReverseKernel
+ *
+ * @param[in] input Source tensor info. Data types supported: F32.
+ * @param[in] output Destination tensor info. Data type supported: same as @p input
+ * @param[in] idx Digit reverse index tensor info. Data type supported: U32
+ * @param[in] config Kernel configuration.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *idx, const FFTDigitReverseKernelInfo &config);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+ const ICLTensor *_idx;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLFFTDIGITREVERSEKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLFFTRadixStageKernel.h"
+#include "src/core/CL/kernels/CLFFTRadixStageKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2019-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLFFTRADIXSTAGEKERNEL_H
+#define ARM_COMPUTE_CLFFTRADIXSTAGEKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+#include "arm_compute/core/KernelDescriptors.h"
+
+#include <set>
+
+namespace arm_compute
+{
+// Forward declarations
+class ICLTensor;
+
+/** Interface for the FFT radix stage kernel. */
+class CLFFTRadixStageKernel : public ICLKernel
+{
+public:
+ /** Constructor */
+ CLFFTRadixStageKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLFFTRadixStageKernel(const CLFFTRadixStageKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLFFTRadixStageKernel &operator=(const CLFFTRadixStageKernel &) = delete;
+ /** Default Move Constructor. */
+ CLFFTRadixStageKernel(CLFFTRadixStageKernel &&) = default;
+ /** Default move assignment operator */
+ CLFFTRadixStageKernel &operator=(CLFFTRadixStageKernel &&) = default;
+ /** Default destructor */
+ ~CLFFTRadixStageKernel() = default;
+ /** Set the input and output tensors.
+ *
+ * @note If the output tensor is nullptr, the FFT will be performed in-place
+ *
+ * @param[in,out] input Source tensor. Data types supported: F32.
+ * @param[out] output Destination tensor. Can be nullptr. Data type supported: same as @p input
+ * @param[in] config FFT descriptor metadata.
+ */
+ void configure(ICLTensor *input, ICLTensor *output, const FFTRadixStageKernelInfo &config);
+ /** Set the input and output tensors.
+ *
+ * @note If the output tensor is nullptr, the FFT will be performed in-place
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in,out] input Source tensor. Data types supported: F32.
+ * @param[out] output Destination tensor. Can be nullptr. Data type supported: same as @p input
+ * @param[in] config FFT descriptor metadata.
+ */
+ void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const FFTRadixStageKernelInfo &config);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLFFTRadixStageKernel
+ *
+ * @param[in] input Source tensor info. Data types supported: F32.
+ * @param[in] output Destination tensor info. Can be nullptr. Data type supported: same as @p input
+ * @param[in] config FFT descriptor metadata.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const FFTRadixStageKernelInfo &config);
+ /** Returns the radix that are support by the FFT kernel
+ *
+ * @return A set of supported radix
+ */
+ static std::set<unsigned int> supported_radix();
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ ICLTensor *_input;
+ ICLTensor *_output;
+ bool _run_in_place;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLFFTRADIXSTAGEKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLFFTScaleKernel.h"
+#include "src/core/CL/kernels/CLFFTScaleKernel.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
--- /dev/null
+/*
+ * Copyright (c) 2019-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLFFTSCALEKERNEL_H
+#define ARM_COMPUTE_CLFFTSCALEKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+#include "arm_compute/core/KernelDescriptors.h"
+
+namespace arm_compute
+{
+// Forward declarations
+class ICLTensor;
+
+/** Interface for the inverse fft scale kernel. */
+class CLFFTScaleKernel : public ICLKernel
+{
+public:
+ /** Constructor */
+ CLFFTScaleKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLFFTScaleKernel(const CLFFTScaleKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLFFTScaleKernel &operator=(const CLFFTScaleKernel &) = delete;
+ /** Default Move Constructor. */
+ CLFFTScaleKernel(CLFFTScaleKernel &&) = default;
+ /** Default move assignment operator */
+ CLFFTScaleKernel &operator=(CLFFTScaleKernel &&) = default;
+ /** Default destructor */
+ ~CLFFTScaleKernel() = default;
+ /** Set the input and output tensors.
+ *
+ * @param[in,out] input Source tensor. Data types supported: F32.
+ * @param[out] output Destination tensor. Data type supported: same as @p input
+ * @param[in] config Kernel configuration
+ */
+ void configure(ICLTensor *input, ICLTensor *output, const FFTScaleKernelInfo &config);
+ /** Set the input and output tensors.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in,out] input Source tensor. Data types supported: F32.
+ * @param[out] output Destination tensor. Data type supported: same as @p input
+ * @param[in] config Kernel configuration
+ */
+ void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const FFTScaleKernelInfo &config);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLFFTScaleKernel
+ *
+ * @param[in] input Source tensor info. Data types supported: F32.
+ * @param[in] output Destination tensor info. Data type supported: same as @p input
+ * @param[in] config Kernel configuration
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const FFTScaleKernelInfo &config);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ ICLTensor *_input;
+ ICLTensor *_output;
+ bool _run_in_place;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLFFTSCALEKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLFastCornersKernel.h"
+#include "src/core/CL/kernels/CLFastCornersKernel.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLFASTCORNERSKERNEL_H
+#define ARM_COMPUTE_CLFASTCORNERSKERNEL_H
+
+#include "arm_compute/core/CL/ICLArray.h"
+#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
+
+#include <cstdint>
+
+namespace cl
+{
+class Buffer;
+}
+
+namespace arm_compute
+{
+class ICLTensor;
+using ICLImage = ICLTensor;
+
+/** CL kernel to perform fast corners */
+class CLFastCornersKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLFastCornersKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLFastCornersKernel(const CLFastCornersKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLFastCornersKernel &operator=(const CLFastCornersKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLFastCornersKernel(CLFastCornersKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLFastCornersKernel &operator=(CLFastCornersKernel &&) = default;
+ /** Default destructor */
+ ~CLFastCornersKernel() = default;
+
+ /** Initialise the kernel.
+ *
+ * @param[in] input Source image. Data types supported: U8.
+ * @param[out] output Output image. Data types supported: U8.
+ * @param[in] threshold Threshold on difference between intensity of the central pixel and pixels on Bresenham's circle of radius 3.
+ * @param[in] non_max_suppression True if non-maxima suppresion is applied, false otherwise.
+ * @param[in] border_mode Strategy to use for borders.
+ */
+ void configure(const ICLImage *input, ICLImage *output, float threshold, bool non_max_suppression, BorderMode border_mode);
+ /** Initialise the kernel.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source image. Data types supported: U8.
+ * @param[out] output Output image. Data types supported: U8.
+ * @param[in] threshold Threshold on difference between intensity of the central pixel and pixels on Bresenham's circle of radius 3.
+ * @param[in] non_max_suppression True if non-maxima suppresion is applied, false otherwise.
+ * @param[in] border_mode Strategy to use for borders.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLImage *input, ICLImage *output, float threshold, bool non_max_suppression, BorderMode border_mode);
+
+ // Inherited methods overridden
+ void run(const Window &window, cl::CommandQueue &queue) override;
+ BorderSize border_size() const override;
+
+private:
+ const ICLImage *_input;
+ ICLImage *_output;
+};
+
+/** CL kernel to copy keypoints information to ICLKeyPointArray and counts the number of key points */
+class CLCopyToArrayKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLCopyToArrayKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLCopyToArrayKernel(const CLCopyToArrayKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLCopyToArrayKernel &operator=(const CLCopyToArrayKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLCopyToArrayKernel(CLCopyToArrayKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLCopyToArrayKernel &operator=(CLCopyToArrayKernel &&) = default;
+ /** Default destructor */
+ ~CLCopyToArrayKernel() = default;
+
+ /** Initialise the kernel.
+ *
+ * @param[in] input Source image. Data types supported: U8.
+ * @param[in] update_number Flag to indicate whether we need to update the number of corners
+ * @param[out] corners Array of keypoints to store the results.
+ * @param[out] num_buffers Number of keypoints to store the results.
+ */
+ void configure(const ICLImage *input, bool update_number, ICLKeyPointArray *corners, cl::Buffer *num_buffers);
+ /** Initialise the kernel.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source image. Data types supported: U8.
+ * @param[in] update_number Flag to indicate whether we need to update the number of corners
+ * @param[out] corners Array of keypoints to store the results.
+ * @param[out] num_buffers Number of keypoints to store the results.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLImage *input, bool update_number, ICLKeyPointArray *corners, cl::Buffer *num_buffers);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLImage *_input; /**< source image */
+ ICLKeyPointArray *_corners; /**< destination array */
+ cl::Buffer *_num_buffer; /**< CL memory to record number of key points in the array */
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CLFASTCORNERSKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLFILLBORDERKERNEL_H
+#define ARM_COMPUTE_CLFILLBORDERKERNEL_H
+
+#include "arm_compute/core/PixelValue.h"
+#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for filling the border of a kernel */
+class CLFillBorderKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLFillBorderKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLFillBorderKernel(const CLFillBorderKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLFillBorderKernel &operator=(const CLFillBorderKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLFillBorderKernel(CLFillBorderKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLFillBorderKernel &operator=(CLFillBorderKernel &&) = default;
+ /** Default destructor */
+ ~CLFillBorderKernel() = default;
+
+ /** Initialise the kernel's input, output and border mode.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in,out] tensor Tensor to process Data types supported: U8/QASYMM8/S8/QASYMM8_SIGNED/U16/S16/U32/S32/F16/F32.
+ * @param[in] border_size Size of the border to fill in elements.
+ * @param[in] border_mode Border mode to use for the convolution.
+ * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+ */
+ void configure(const CLCompileContext &compile_context, ICLTensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue());
+ /** Initialise the kernel's input, output and border mode.
+ *
+ * @param[in,out] tensor Tensor to process Data types supported: U8/QASYMM8/S8/QASYMM8_SIGNED/U16/S16/U32/S32/F16/F32.
+ * @param[in] border_size Size of the border to fill in elements.
+ * @param[in] border_mode Border mode to use for the convolution.
+ * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+ */
+ void configure(ICLTensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue());
+ /** Initialise the kernel's input, output and border mode.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in,out] tensor Tensor to process Data types supported: U8/QASYMM8/S8/QASYMM8_SIGNED/U16/S16/U32/S32/F16/F32.
+ * @param[in] border_size Size of the border to fill in elements.
+ * @param[in] border_mode Border mode to use for the convolution.
+ * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+ */
+ void configure(const CLCompileContext &compile_context, ITensorInfo *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue());
+
+ /** Function to set the constant value on fill border kernel depending on type.
+ *
+ * @param[in] idx Index of the kernel argument to set.
+ * @param[in] constant_border_value Constant value to use for borders if border_mode is set to CONSTANT.
+ */
+ template <class T>
+ void set_constant_border(unsigned int idx, const PixelValue &constant_border_value);
+
+ // Inherited methods overridden:
+ void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
+ void run(const Window &window, cl::CommandQueue &queue) override;
+ bool is_parallelisable() const override;
+
+private:
+ ICLTensor *_tensor;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLFILLBORDERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLFlattenLayerKernel.h"
+#include "src/core/CL/kernels/CLFlattenLayerKernel.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
--- /dev/null
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLFLATTENLAYERKERNEL_H
+#define ARM_COMPUTE_CLFLATTENLAYERKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** OpenCL interface for the flatten kernel.*/
+class CLFlattenLayerKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLFlattenLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLFlattenLayerKernel(const CLFlattenLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLFlattenLayerKernel &operator=(const CLFlattenLayerKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLFlattenLayerKernel(CLFlattenLayerKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLFlattenLayerKernel &operator=(CLFlattenLayerKernel &&) = default;
+ /** Set the input and output of the kernel.
+ *
+ * @param[in] input First input tensor to flatten with at least 3 dimensions.
+ * The dimensions above the third will be interpreted as batches. Data types supported: All.
+ * @param[out] output Output tensor with shape [w*h*d, input_batches] where:
+ * w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input
+ */
+ void configure(const ICLTensor *input, ICLTensor *output);
+ /** Set the input and output of the kernel.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input First input tensor to flatten with at least 3 dimensions.
+ * The dimensions above the third will be interpreted as batches. Data types supported: All.
+ * @param[out] output Output tensor with shape [w*h*d, input_batches] where:
+ * w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLFlattenLayerKernel
+ *
+ * @param[in] input First input tensor to flatten with at least 3 dimensions.
+ * The dimensions above the third will be interpreted as batches. Data types supported: All.
+ * @param[out] output Output tensor with shape [w*h*d, input_batches] where:
+ * w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+public:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLFLATTENLAYERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLFloorKernel.h"
+#include "src/core/CL/kernels/CLFloorKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLFLOORKERNEL_H
+#define ARM_COMPUTE_CLFLOORKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** OpenCL kernel to perform a floor operation */
+class CLFloorKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLFloorKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLFloorKernel(const CLFloorKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLFloorKernel &operator=(const CLFloorKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLFloorKernel(CLFloorKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLFloorKernel &operator=(CLFloorKernel &&) = default;
+ /** Default destructor */
+ ~CLFloorKernel() = default;
+ /** Set the source, destination of the kernel
+ *
+ * @param[in] input Source tensor. Data type supported: F16/F32.
+ * @param[out] output Destination tensor. Same as @p input
+ */
+ void configure(const ICLTensor *input, ICLTensor *output);
+
+ /** Set the source, destination of the kernel
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data type supported: F16/F32.
+ * @param[out] output Destination tensor. Same as @p input
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref CLFloorKernel
+ *
+ * @param[in] input Source tensor info. Data type supported: F16/F32.
+ * @param[in] output Destination tensor info. Same as @p input
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLFLOORKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLFuseBatchNormalizationKernel.h"
+#include "src/core/CL/kernels/CLFuseBatchNormalizationKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLFUSEBATCHNORMALIZATIONKERNEL_H
+#define ARM_COMPUTE_CLFUSEBATCHNORMALIZATIONKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+// Forward declarations
+class ICLTensor;
+
+/** OpenCL kernel to fuse the batch normalization node to a preceding convolution node */
+class CLFuseBatchNormalizationKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLFuseBatchNormalizationKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLFuseBatchNormalizationKernel(const CLFuseBatchNormalizationKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLFuseBatchNormalizationKernel &operator=(const CLFuseBatchNormalizationKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLFuseBatchNormalizationKernel(CLFuseBatchNormalizationKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLFuseBatchNormalizationKernel &operator=(CLFuseBatchNormalizationKernel &&) = default;
+ /** Default destructor */
+ ~CLFuseBatchNormalizationKernel() = default;
+ /** Set the source, destination of the kernel
+ *
+ * @param[in] input_weights Input weights tensor for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC
+ * @param[in] bn_mean Batch normalization layer mean tensor. Same as @p input_weights
+ * @param[in] bn_var Batch normalization layer variance tensor. Same as @p input_weights
+ * @param[out] fused_weights Output fused weights tensor. It can be a nullptr in case of in-place computation. Same as @p input_weights
+ * @param[out] fused_bias Output fused bias tensor. It can be a nullptr in case of in-place computation and input_bias != nullptr. Same as @p input_weights
+ * @param[in] input_bias (Optional) Input bias tensor for convolution or depthwise convolution layer. It can be a nullptr in case the bias tensor is not required. Same as @p input_weights
+ * @param[in] bn_beta (Optional) Batch normalization layer beta tensor. It can be a nullptr in case the beta tensor is not required. Same as @p input_weights
+ * @note if nullptr, bn_beta is set to 0.0
+ * @param[in] bn_gamma (Optional) Batch normalization layer gamma tensor. It can be a nullptr in case the gamma tensor is not required. Same as @p input_weights
+ * @note if nullptr, bn_gamma is set to 1.0
+ * @param[in] epsilon (Optional) Batch normalization layer epsilon parameter. Defaults to 0.001f.
+ * @param[in] fbn_type (Optional) Fused batch normalization type. Defaults to CONVOLUTION.
+ */
+ void configure(const ICLTensor *input_weights, const ICLTensor *bn_mean, const ICLTensor *bn_var, ICLTensor *fused_weights, ICLTensor *fused_bias,
+ const ICLTensor *input_bias = nullptr, const ICLTensor *bn_beta = nullptr, const ICLTensor *bn_gamma = nullptr,
+ float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION);
+ /** Set the source, destination of the kernel
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input_weights Input weights tensor for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC
+ * @param[in] bn_mean Batch normalization layer mean tensor. Same as @p input_weights
+ * @param[in] bn_var Batch normalization layer variance tensor. Same as @p input_weights
+ * @param[out] fused_weights Output fused weights tensor. It can be a nullptr in case of in-place computation. Same as @p input_weights
+ * @param[out] fused_bias Output fused bias tensor. It can be a nullptr in case of in-place computation and input_bias != nullptr. Same as @p input_weights
+ * @param[in] input_bias (Optional) Input bias tensor for convolution or depthwise convolution layer. It can be a nullptr in case the bias tensor is not required. Same as @p input_weights
+ * @param[in] bn_beta (Optional) Batch normalization layer beta tensor. It can be a nullptr in case the beta tensor is not required. Same as @p input_weights
+ * @note if nullptr, bn_beta is set to 0.0
+ * @param[in] bn_gamma (Optional) Batch normalization layer gamma tensor. It can be a nullptr in case the gamma tensor is not required. Same as @p input_weights
+ * @note if nullptr, bn_gamma is set to 1.0
+ * @param[in] epsilon (Optional) Batch normalization layer epsilon parameter. Defaults to 0.001f.
+ * @param[in] fbn_type (Optional) Fused batch normalization type. Defaults to CONVOLUTION.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input_weights, const ICLTensor *bn_mean, const ICLTensor *bn_var, ICLTensor *fused_weights, ICLTensor *fused_bias,
+ const ICLTensor *input_bias = nullptr, const ICLTensor *bn_beta = nullptr, const ICLTensor *bn_gamma = nullptr,
+ float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLFuseBatchNormalizationKernel
+ *
+ * @param[in] input_weights Input weights tensor info for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC
+ * @param[in] bn_mean Batch normalization layer mean tensor info. Same as @p input_weights
+ * @param[in] bn_var Batch normalization layer variance tensor info. Same as @p input_weights
+ * @param[in] fused_weights Output fused weights tensor info. It can be a nullptr in case of in-place computation. Same as @p input_weights
+ * @param[in] fused_bias Output fused bias tensor info. It can be a nullptr in case of in-place computation and input_bias != nullptr. Same as @p input_weights
+ * @param[in] input_bias (Optional) Input bias tensor info for convolution or depthwise convolution layer. It can be a nullptr in case the bias tensor is not required. Same as @p input_weights
+ * @param[in] bn_beta (Optional) Batch normalization layer beta tensor info. It can be a nullptr in case the beta tensor is not required. Same as @p input_weights
+ * @note if nullptr, bn_beta is set to 0.0
+ * @param[in] bn_gamma (Optional) Batch normalization layer gamma tensor info. It can be a nullptr in case the gamma tensor is not required. Same as @p input_weights
+ * @note if nullptr, bn_gamma is set to 1.0
+ * @param[in] epsilon (Optional) Batch normalization layer epsilon parameter. Defaults to 0.001f.
+ * @param[in] fbn_type (Optional) Fused batch normalization type. Defaults to CONVOLUTION.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input_weights, const ITensorInfo *bn_mean, const ITensorInfo *bn_var,
+ const ITensorInfo *fused_weights, const ITensorInfo *fused_bias,
+ const ITensorInfo *input_bias = nullptr, const ITensorInfo *bn_beta = nullptr, const ITensorInfo *bn_gamma = nullptr,
+ float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input_weights;
+ const ICLTensor *_input_bias;
+ const ICLTensor *_bn_mean;
+ const ICLTensor *_bn_var;
+ const ICLTensor *_bn_gamma;
+ const ICLTensor *_bn_beta;
+ ICLTensor *_fused_weights;
+ ICLTensor *_fused_bias;
+ float _epsilon;
+ bool _run_in_place_weights;
+ bool _run_in_place_bias;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLFUSEBATCHNORMALIZATIONKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2019-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYNATIVEKERNEL_H
+#define ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYNATIVEKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** OpenCL kernel to multiply matrices with QASYMM8/QASYMM8_SIGNED data type */
+class CLGEMMLowpMatrixMultiplyNativeKernel : public ICLKernel
+{
+public:
+ /** Default Constructor */
+ CLGEMMLowpMatrixMultiplyNativeKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGEMMLowpMatrixMultiplyNativeKernel(const CLGEMMLowpMatrixMultiplyNativeKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGEMMLowpMatrixMultiplyNativeKernel &operator=(const CLGEMMLowpMatrixMultiplyNativeKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLGEMMLowpMatrixMultiplyNativeKernel(CLGEMMLowpMatrixMultiplyNativeKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLGEMMLowpMatrixMultiplyNativeKernel &operator=(CLGEMMLowpMatrixMultiplyNativeKernel &&) = default;
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: QASYMM8/QASYMM8_SIGNED
+ * @param[in] input1 Input tensor containing the RHS matrix. Data type supported: same as @p input0
+ * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: S32
+ * @param[in] lhs_info LHS matrix information used to retrieve the number of rows to be processed by each thread
+ * lhs_info.m0: 2,3,4,5,6,7,8
+ * lhs_info.k0: 2,3,4,8,16
+ * @param[in] rhs_info RHS matrix information used to retrieve the number of columns to be processed by each thread
+ * rhs_info.n0: 2,3,4,8,16
+ * rhs_info.k0: same as lhs_info.k0
+ * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices
+ */
+ void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, const GEMMReshapeInfo &gemm_info);
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: QASYMM8/QASYMM8_SIGNED
+ * @param[in] input1 Input tensor containing the RHS matrix. Data type supported: same as @p input0
+ * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: S32
+ * @param[in] lhs_info LHS matrix information used to retrieve the number of rows to be processed by each thread
+ * lhs_info.m0: 2,3,4,5,6,7,8
+ * lhs_info.k0: 2,3,4,8,16
+ * @param[in] rhs_info RHS matrix information used to retrieve the number of columns to be processed by each thread
+ * rhs_info.n0: 2,3,4,8,16
+ * rhs_info.k0: same as lhs_info.k0
+ * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info,
+ const GEMMReshapeInfo &gemm_info);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixMultiplyNativeKernel
+ *
+ * @param[in] input0 Input tensor info for the LHS matrix. Data type supported: QASYMM8/QASYMM8_SIGNED
+ * @param[in] input1 Input tensor info for the RHS matrix. Data type supported: same as @p input0
+ * @param[in] output Output tensor info. Data type supported: S32
+ * @param[in] lhs_info LHS matrix information used to retrieve the number of rows to be processed by each thread
+ * lhs_info.m0: 2,3,4,5,6,7,8
+ * lhs_info.k0: 2,3,4,8,16
+ * @param[in] rhs_info RHS matrix information used to retrieve the number of columns to be processed by each thread
+ * rhs_info.n0: 2,3,4,8,16
+ * rhs_info.k0: same as lhs_info.k0
+ * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info,
+ const GEMMReshapeInfo &gemm_info);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input0;
+ const ICLTensor *_input1;
+ ICLTensor *_output;
+ bool _slide_matrix_b;
+ bool _reinterpret_input_as_3d;
+ bool _reinterpret_output_as_3d;
+ bool _use_dummy_work_items;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYNATIVEKERNEL_H*/
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2019-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYRESHAPEDKERNEL_H
+#define ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYRESHAPEDKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** OpenCL kernel to multiply matrices when both the input matrices LHS (input0) and RHS (input1) have been reshaped
+ *
+ * @note The input matrices @p input0 and @p input1 must be reshaped through @ref CLGEMMReshapeLHSMatrixKernel and @ref CLGEMMReshapeRHSMatrixKernel
+ */
+class CLGEMMLowpMatrixMultiplyReshapedKernel : public ICLKernel
+{
+public:
+ /** Default Constructor */
+ CLGEMMLowpMatrixMultiplyReshapedKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGEMMLowpMatrixMultiplyReshapedKernel(const CLGEMMLowpMatrixMultiplyReshapedKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGEMMLowpMatrixMultiplyReshapedKernel &operator=(const CLGEMMLowpMatrixMultiplyReshapedKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLGEMMLowpMatrixMultiplyReshapedKernel(CLGEMMLowpMatrixMultiplyReshapedKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLGEMMLowpMatrixMultiplyReshapedKernel &operator=(CLGEMMLowpMatrixMultiplyReshapedKernel &&) = default;
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] input0 Input tensor containing the LHS reshaped matrix. Data type supported: QASYMM8/QASYMM8_SIGNED. The number of dimensions for the LHS matrix must be less or equal than 4.
+ * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3.
+ * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: S32
+ * @param[in] lhs_info LHS matrix information used for reshaping the input0 tensor. Only the following values are supported:
+ * lhs_info.m0: 2,3,4,5,6,7,8
+ * lhs_info.k0: 2,3,4,8,16
+ * lhs_info.transpose: false
+ * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported:
+ * rhs_info.n0: 2,3,4,8,16
+ * rhs_info.k0: same as lhs_info.k0
+ * rhs_info.transpose: true
+ * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices
+ *
+ * @note lhs_info.k0 must be equal to rhs_info.k0
+ */
+ void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, const GEMMReshapeInfo &gemm_info);
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input0 Input tensor containing the LHS reshaped matrix. Data type supported: QASYMM8/QASYMM8_SIGNED. The number of dimensions for the LHS matrix must be less or equal than 4.
+ * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3.
+ * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: S32
+ * @param[in] lhs_info LHS matrix information used for reshaping the input0 tensor. Only the following values are supported:
+ * lhs_info.m0: 2,3,4,5,6,7,8
+ * lhs_info.k0: 2,3,4,8,16
+ * lhs_info.transpose: false
+ * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported:
+ * rhs_info.n0: 2,3,4,8,16
+ * rhs_info.k0: same as lhs_info.k0
+ * rhs_info.transpose: true
+ * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices
+ *
+ * @note lhs_info.k0 must be equal to rhs_info.k0
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info,
+ const GEMMReshapeInfo &gemm_info);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixMultiplyReshapedKernel
+ *
+ * @param[in] input0 Input tensor info containing the LHS reshaped matrix. Data type supported: QASYMM8/QASYMM8_SIGNED. The number of dimensions for the LHS matrix must be less or equal than 4.
+ * @param[in] input1 Input tensor info containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3.
+ * @param[in] output Output tensor info. Data type supported: S32
+ * @param[in] lhs_info LHS matrix information used for reshaping the input0 tensor. Only the following values are supported:
+ * lhs_info.m0: 2,3,4,5,6,7,8
+ * lhs_info.k0: 2,3,4,8,16
+ * lhs_info.transpose: false
+ * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported:
+ * rhs_info.n0: 2,3,4,8,16
+ * rhs_info.k0: 2,3,4,8,16
+ * rhs_info.transpose: true
+ * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices
+ *
+ * @note lhs_info.k0 must be equal to rhs_info.k0
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info,
+ const GEMMReshapeInfo &gemm_info);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input0;
+ const ICLTensor *_input1;
+ ICLTensor *_output;
+ bool _slide_matrix_b;
+ bool _reinterpret_output_as_3d;
+ unsigned int _k;
+ bool _use_dummy_work_items;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYRESHAPEDKERNEL_H*/
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
{
ARM_COMPUTE_UNUSED(vector_sum_row, vector_sum_col, output_multipliers, bias, output_shifts);
- const GEMMLowpOutputStageInfo output_stage = gemm_info.output_stage;
- unsigned int &num_elems_processed_per_iteration_x = num_elements_processed[0];
- unsigned int &num_elems_processed_per_iteration_y = num_elements_processed[1];
- bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d;
- bool reinterpret_output_as_3d = (gemm_info.depth_output_gemm3d != 0);
+ const GEMMLowpOutputStageInfo output_stage = gemm_info.output_stage;
+ unsigned int &num_elems_processed_per_iteration_x = num_elements_processed[0];
+ unsigned int &num_elems_processed_per_iteration_y = num_elements_processed[1];
+ bool reinterpret_input_as_3d = gemm_info.reinterpret_input_as_3d;
+ bool reinterpret_output_as_3d = (gemm_info.depth_output_gemm3d != 0);
Window win{};
Window win_out{};
output_multipliers != nullptr ? output_multipliers->info() : nullptr,
output_shifts != nullptr ? output_shifts->info() : nullptr));
- auto padding_info = get_padding_info({ input0, input1, output, vector_sum_col, vector_sum_row, bias, output_multipliers, output_shifts });
+ auto padding_info = get_padding_info({ input0, input1, output, vector_sum_col, vector_sum_row, bias, output_multipliers, output_shifts });
const GEMMRHSMatrixInfo rhs_info = gemm_info.rhs_info;
const GEMMLHSMatrixInfo lhs_info = gemm_info.lhs_info;
const GEMMLowpOutputStageInfo output_stage = gemm_info.output_stage;
// we will dispatch a batched-GEMM to reduce the complexity of the address calculation within the OpenCL kernel.
// This means that the actual m used by the kernel is given by output->info()->dimension(1) and not by gemm_info.m
const unsigned int internal_m = _reinterpret_output_as_3d ? gemm_info.m : output->info()->dimension(1);
- // Calculate partial (store instead of load) M0 and partial N0 for the partial blocks at the end of a row/column if any. This is to avoid padding.
+ // Calculate partial (store instead of load) M0 and partial N0 for the partial blocks at the end of a row/column if any. This is to avoid padding.
const unsigned int partial_store_m0 = internal_m % lhs_info.m0;
const unsigned int partial_store_n0 = gemm_info.n % rhs_info.n0;
--- /dev/null
+/*
+ * Copyright (c) 2019-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYRESHAPEDONLYRHSKERNEL_H
+#define ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYRESHAPEDONLYRHSKERNEL_H
+
+#include "arm_compute/core/KernelDescriptors.h"
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** OpenCL kernel to multiply matrices with QASYMM8 data type when only the input matrix RHS (input1) has been reshaped
+ *
+ * @note The input matrix input1 must be reshaped through @ref CLGEMMReshapeRHSMatrixKernel
+ * @note For fused output stage, only GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT type is supported
+ */
+class CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel : public ICLKernel
+{
+public:
+ /** Default Constructor */
+ CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel(const CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel &operator=(const CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel(CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel &operator=(CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel &&) = default;
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: QASYMM8/QASYMM8_SIGNED
+ * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL
+ * @param[out] output Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/S32.
+ * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices, output stage information and RHS/LHS info.
+ * Only the following values are supported for LHS info:
+ * lhs_info.m0: 2,3,4,5,6,7,8
+ * lhs_info.k0: 2,3,4,8,16
+ * Only the following values are supported for RHS info:
+ * rhs_info.n0: 2,3,4,8,16
+ * rhs_info.k0: same as lhs_info.k0
+ * rhs_info.transpose: true
+ * @param[in] vector_sum_col (Optional) Input row-vector of sums of all the entries in each column of matrix B.
+ * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: S32
+ * @param[in] vector_sum_row (Optional) Input row-vector of sums of all the entries in each row of matrix A.
+ * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: S32
+ * @param[in] bias (Optional) Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
+ * Biases are 1D tensor with dimensions [OFM]. Data type supported: S32.
+ * @param[in] output_multipliers (Optional) Output multipliers tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM).
+ * Supported data types: S32.
+ * @param[in] output_shifts (Optional) Output shifts tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM).
+ * Supported data types: S32.
+ */
+ void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMKernelInfo &gemm_info, const ICLTensor *vector_sum_col = nullptr,
+ const ICLTensor *vector_sum_row = nullptr, const ICLTensor *bias = nullptr, const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr);
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: QASYMM8/QASYMM8_SIGNED
+ * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0
+ * @param[out] output Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/S32.
+ * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices, output stage information and RHS/LHS info.
+ * Only the following values are supported for LHS info:
+ * lhs_info.m0: 2,3,4,5,6,7,8
+ * lhs_info.k0: 2,3,4,8,16
+ * Only the following values are supported for RHS info:
+ * rhs_info.n0: 2,3,4,8,16
+ * rhs_info.k0: same as lhs_info.k0
+ * rhs_info.transpose: true
+ * @param[in] vector_sum_col (Optional) Input row-vector of sums of all the entries in each column of matrix B.
+ * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: S32
+ * @param[in] vector_sum_row (Optional) Input row-vector of sums of all the entries in each row of matrix A.
+ * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: S32
+ * @param[in] bias (Optional) Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
+ * Biases are 1D tensor with dimensions [OFM]. Data type supported: S32.
+ * @param[in] output_multipliers (Optional) Output multipliers tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM).
+ * Supported data types: S32.
+ * @param[in] output_shifts (Optional) Output shifts tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM).
+ * Supported data types: S32.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMKernelInfo &gemm_info, const ICLTensor *vector_sum_col = nullptr,
+ const ICLTensor *vector_sum_row = nullptr, const ICLTensor *bias = nullptr, const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel
+ *
+ * @param[in] input0 Input tensor info for the LHS matrix. Data type supported: QASYMM8/QASYMM8_SIGNED
+ * @param[in] input1 Input tensor info for the RHS reshaped matrix. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL
+ * @param[in] output Output tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/S32.
+ * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices, output stage information and RHS/LHS info.
+ * Only the following values are supported for LHS info:
+ * lhs_info.m0: 2,3,4,5,6,7,8
+ * lhs_info.k0: 2,3,4,8,16
+ * Only the following values are supported for RHS info:
+ * rhs_info.n0: 2,3,4,8,16
+ * rhs_info.k0: same as lhs_info.k0
+ * rhs_info.transpose: true
+ * @param[in] vector_sum_col (Optional) Input row-vector info of sums of all the entries in each column of matrix B.
+ * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: S32
+ * @param[in] vector_sum_row (Optional) Input row-vector info of sums of all the entries in each row of matrix A.
+ * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: S32
+ * @param[in] bias (Optional) Biases tensor info. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
+ * Biases are 1D tensor with dimensions [OFM]. Data type supported: S32.
+ * @param[in] output_multipliers (Optional) Output multipliers tensor info. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM).
+ * Supported data types: S32.
+ * @param[in] output_shifts (Optional) Output shifts tensor info. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM).
+ * Supported data types: S32.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output, const GEMMKernelInfo &gemm_info, const ITensorInfo *vector_sum_col = nullptr,
+ const ITensorInfo *vector_sum_row = nullptr, const ITensorInfo *bias = nullptr, const ITensorInfo *output_multipliers = nullptr,
+ const ITensorInfo *output_shifts = nullptr);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input0;
+ const ICLTensor *_input1;
+ ICLTensor *_output;
+ const ICLTensor *_vector_sum_col;
+ const ICLTensor *_vector_sum_row;
+ const ICLTensor *_bias;
+ const ICLTensor *_output_multipliers;
+ const ICLTensor *_output_shifts;
+ bool _slide_matrix_b;
+ bool _reinterpret_input_as_3d;
+ bool _reinterpret_output_as_3d;
+ bool _use_dummy_work_items;
+ bool _is_quantized_per_channel;
+ bool _fuse_output_stage;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYRESHAPEDONLYRHSKERNEL_H */
\ No newline at end of file
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Helpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLGEMMLOWPOFFSETCONTRIBUTIONKERNEL_H
+#define ARM_COMPUTE_CLGEMMLOWPOFFSETCONTRIBUTIONKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** OpenCL kernel used to add the offset contribution after the matrix multiplication. The computation is performed in-place
+ *
+ * This kernel takes a final int32 accumulator value (the output of the matrix multiplication),
+ * and adds to it the offset contribution of matrix A and matrix B in-place.
+ *
+ * The final result is:
+ *
+ * mm_result[i][k] = mm_result[i][k] +
+ * (vector_sum_col[k] * a_offset) +
+ * (vector_sum_row[i] * b_offset) +
+ * (a_offset * b_offset * k)
+ *
+ */
+class CLGEMMLowpOffsetContributionKernel : public ICLKernel
+{
+public:
+ /** Constructor */
+ CLGEMMLowpOffsetContributionKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers)*/
+ CLGEMMLowpOffsetContributionKernel(const CLGEMMLowpOffsetContributionKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers)*/
+ CLGEMMLowpOffsetContributionKernel &operator=(const CLGEMMLowpOffsetContributionKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLGEMMLowpOffsetContributionKernel(CLGEMMLowpOffsetContributionKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLGEMMLowpOffsetContributionKernel &operator=(CLGEMMLowpOffsetContributionKernel &&) = default;
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in, out] mm_result Input tensor containing the result of the matrix multiplication. Data type supported: S32
+ * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B.
+ * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result
+ * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A.
+ * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result
+ * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
+ * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
+ * @param[in] k Number of matrix A columns or Matrix B rows
+ * @param[in] a_offset Offset to be added to each element of the matrix A.
+ * @param[in] b_offset Offset to be added to each element of the matrix B.
+ */
+ void configure(ICLTensor *mm_result, const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, const ICLTensor *bias, int32_t k, int32_t a_offset, int32_t b_offset);
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in, out] mm_result Input tensor containing the result of the matrix multiplication. Data type supported: S32
+ * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B.
+ * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result
+ * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A.
+ * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result
+ * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
+ * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
+ * @param[in] k Number of matrix A columns or Matrix B rows
+ * @param[in] a_offset Offset to be added to each element of the matrix A.
+ * @param[in] b_offset Offset to be added to each element of the matrix B.
+ */
+ void configure(const CLCompileContext &compile_context, ICLTensor *mm_result, const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, const ICLTensor *bias, int32_t k, int32_t a_offset,
+ int32_t b_offset);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpOffsetContributionKernel
+ *
+ * @param[in] mm_result Input tensor containing the result of @ref CLGEMMLowpOffsetContributionKernel. Data type supported: S32
+ * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B.
+ * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result
+ * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A.
+ * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result
+ * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
+ * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
+ * @param[in] a_offset Offset to be added to each element of the matrix A.
+ * @param[in] b_offset Offset to be added to each element of the matrix B.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *mm_result, const ITensorInfo *vector_sum_col, const ITensorInfo *vector_sum_row, const ITensorInfo *bias, int32_t a_offset, int32_t b_offset);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_vector_sum_col;
+ const ICLTensor *_vector_sum_row;
+ ICLTensor *_mm_result;
+ const ICLTensor *_bias;
+};
+} // namespace arm_compute
+
+#endif /* ARM_COMPUTE_CLGEMMLOWPOFFSETCONTRIBUTIONKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/ICLTensor.h"
--- /dev/null
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLGEMMLOWPOFFSETCONTRIBUTIONOUTPUTSTAGEKERNEL_H
+#define ARM_COMPUTE_CLGEMMLOWPOFFSETCONTRIBUTIONOUTPUTSTAGEKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** OpenCL kernel used to add the offset contribution after the matrix multiplication and perform the output stage.
+ *
+ * This kernel takes a final int32 accumulator value (the output of the matrix multiplication), adds to it the offset contribution
+ * of matrix A and matrix B and performs the output stage defined by the output_stage argument
+ *
+ * @note For quantized computations the output data type for auto-initialization must be passed as part of the @ref GEMMLowpOutputStageInfo.
+ */
+class CLGEMMLowpOffsetContributionOutputStageKernel : public ICLKernel
+{
+public:
+ /** Constructor */
+ CLGEMMLowpOffsetContributionOutputStageKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers)*/
+ CLGEMMLowpOffsetContributionOutputStageKernel(const CLGEMMLowpOffsetContributionOutputStageKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers)*/
+ CLGEMMLowpOffsetContributionOutputStageKernel &operator=(const CLGEMMLowpOffsetContributionOutputStageKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLGEMMLowpOffsetContributionOutputStageKernel(CLGEMMLowpOffsetContributionOutputStageKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLGEMMLowpOffsetContributionOutputStageKernel &operator=(CLGEMMLowpOffsetContributionOutputStageKernel &&) = default;
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] mm_result Input tensor containing the result of the matrix multiplication. Data type supported: S32
+ * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B.
+ * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result
+ * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A.
+ * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result
+ * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
+ * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
+ * @param[out] output Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED.
+ * @param[in] k Number of matrix A columns or Matrix B rows
+ * @param[in] a_offset Offset to be added to each element of the matrix A.
+ * @param[in] b_offset Offset to be added to each element of the matrix B.
+ * @param[in] output_stage GEMMLowp output stage info
+ * @param[in] output_multipliers Output multipliers tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM).
+ * Supported data types: S32
+ * @param[in] output_shifts Output shifts tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM).
+ * Supported data types: S32
+ */
+ void configure(const ICLTensor *mm_result, const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, const ICLTensor *bias, ICLTensor *output, int32_t k, int32_t a_offset, int32_t b_offset,
+ const GEMMLowpOutputStageInfo &output_stage, const ICLTensor *output_multipliers, const ICLTensor *output_shifts);
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] mm_result Input tensor containing the result of the matrix multiplication. Data type supported: S32
+ * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B.
+ * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result
+ * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A.
+ * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result
+ * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
+ * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
+ * @param[out] output Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED.
+ * @param[in] k Number of matrix A columns or Matrix B rows
+ * @param[in] a_offset Offset to be added to each element of the matrix A.
+ * @param[in] b_offset Offset to be added to each element of the matrix B.
+ * @param[in] output_stage GEMMLowp output stage info
+ * @param[in] output_multipliers Output multipliers tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM).
+ * Supported data types: S32
+ * @param[in] output_shifts Output shifts tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM).
+ * Supported data types: S32
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *mm_result, const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, const ICLTensor *bias, ICLTensor *output,
+ int32_t k,
+ int32_t a_offset, int32_t b_offset,
+ const GEMMLowpOutputStageInfo &output_stage, const ICLTensor *output_multipliers, const ICLTensor *output_shifts);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpOffsetContributionKernel
+ *
+ * @param[in] mm_result Input tensor containing the result of @ref CLGEMMLowpOffsetContributionKernel. Data type supported: S32
+ * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B.
+ * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result
+ * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A.
+ * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result
+ * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
+ * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
+ * @param[in] output Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED.
+ * @param[in] a_offset Offset to be added to each element of the matrix A.
+ * @param[in] b_offset Offset to be added to each element of the matrix B.
+ * @param[in] output_stage GEMMLowp output stage info
+ * @param[in] output_multipliers Output multipliers tensor info. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM).
+ * Supported data types: S32
+ * @param[in] output_shifts Output shifts tensor info. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM).
+ * Supported data types: S32
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *mm_result, const ITensorInfo *vector_sum_col, const ITensorInfo *vector_sum_row, const ITensorInfo *bias, const ITensorInfo *output, int32_t a_offset,
+ int32_t b_offset, const GEMMLowpOutputStageInfo &output_stage, const ITensorInfo *output_multipliers, const ITensorInfo *output_shifts);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_mm_result;
+ const ICLTensor *_vector_sum_col;
+ const ICLTensor *_vector_sum_row;
+ const ICLTensor *_bias;
+ ICLTensor *_output;
+ const ICLTensor *_output_multipliers;
+ const ICLTensor *_output_shifts;
+ bool _is_quantized_per_channel;
+};
+} // namespace arm_compute
+
+#endif /* ARM_COMPUTE_CLGEMMLOWPOFFSETCONTRIBUTIONOUTPUTSTAGEKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/ICLTensor.h"
--- /dev/null
+/*
+ * Copyright (c) 2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEBYFIXEDPOINTKERNEL_H
+#define ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEBYFIXEDPOINTKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** OpenCL kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8/QASYMM8_SIGNED/QSYMM16
+ *
+ * This kernel takes a final int32 accumulator value (the output of the matrix multiplication), and processes it to obtain the final quantized value.
+ * The following computations will be performed by the kernel:
+ *
+ * -# Compute fixed point multiplication between each entry of input by gemmlowp_multiplier
+ * -# Add bias to final result if bias tensor is not a nullptr
+ * -# Round to nearest division by a power-of-two using result_shift
+ * -# Add offset to each result
+ * -# Clamp the value between the specified min and max bounds
+ * -# Clamp the resulting int32 values to the proper quantized range and cast to QASYMM8/QASYMM8_SIGNED/QSYMM16.
+ */
+class CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel : public ICLKernel
+{
+public:
+ /** Constructor */
+ CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers)*/
+ CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel(const CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers)*/
+ CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel &operator=(const CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel(CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel &operator=(CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel &&) = default;
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Input tensor. Data type supported: S32
+ * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
+ * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
+ * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM16.
+ * @param[in] info Output stage info. Used to pass the quantized output data type
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo *info);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel
+ *
+ * @param[in] input Input tensor. Data type supported: S32
+ * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
+ * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
+ * @param[in] output Output tensor. Data type supported: Data type supported: QSYMM8/QASYMM8_SIGNED/QSYMM16.
+ * @param[in] info Output stage info. Used to pass the quantized output data type
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo *info);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input;
+ const ICLTensor *_bias;
+ ICLTensor *_output;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEBYFIXEDPOINTKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/ICLTensor.h"
--- /dev/null
+/*
+ * Copyright (c) 2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEBYFLOATKERNEL_H
+#define ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEBYFLOATKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+// Forward declarations
+class ICLTensor;
+
+/** OpenCL kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8/QASYMM8_SIGNED
+ *
+ * This kernel takes a final int32 accumulator value (the output of the matrix multiplication), and processes it to obtain the final QASYMM8/QASYMM8_SIGNED value.
+ * The following computations will be performed by the kernel:
+ *
+ * -# Compute fixed point multiplication between each entry of input by result_fixedpoint_multiplier
+ * -# Add bias to final result if bias tensor is not a nullptr
+ * -# Requantize
+ * -# Add offset to each result
+ * -# Clamp the value between the specified min and max bounds
+ * -# Clamp the resulting int32 values to
+ * - to the [0..255] range and cast to QASYMM8.
+ * - to the [-128..127] range and cast to QASYMM8_SIGNED.
+ */
+class CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel : public ICLKernel
+{
+public:
+ /** Constructor */
+ CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers)*/
+ CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel(const CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers)*/
+ CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel &operator=(const CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel(CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel &operator=(CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel &&) = default;
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] input Input tensor. Data type supported: S32
+ * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
+ * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
+ * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED
+ * @param[in] info Output stage info. Used to pass the quantized output data type
+ */
+ void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo *info);
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Input tensor. Data type supported: S32
+ * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
+ * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
+ * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED
+ * @param[in] info Output stage info. Used to pass the quantized output data type
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo *info);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel
+ *
+ * @param[in] input Input tensor. Data type supported: S32
+ * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
+ * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
+ * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED
+ * @param[in] info Output stage info. Used to pass the quantized output data type
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo *info);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input;
+ const ICLTensor *_bias;
+ ICLTensor *_output;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEBYFLOATKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/ICLTensor.h"
--- /dev/null
+/*
+ * Copyright (c) 2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEKERNEL_H
+#define ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** OpenCL kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8/QASYMM8_SIGNED
+ *
+ * This kernel takes a final int32 accumulator value (the output of the matrix multiplication), and processes it to obtain the final QASYMM8/QASYMM8_SIGNED value.
+ * The following computations will be performed by the kernel:
+ *
+ * -# Add offset terms to final result
+ * -# Multiply each entry of result by result_mult_int
+ * -# Add bias to final result if bias tensor is not a nullptr
+ * -# Shift the int32 accumulator by result_shift
+ * -# Clamp the value between the specified min and max bounds
+ * -# Clamp the resulting int32 values:
+ * -# -to the [0..255] range and cast to QASYMM8.
+ * -# -to the [-128..127] range and cast to QASYMM8_SIGNED.
+ *
+ */
+class CLGEMMLowpQuantizeDownInt32ScaleKernel : public ICLKernel
+{
+public:
+ /** Constructor */
+ CLGEMMLowpQuantizeDownInt32ScaleKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers)*/
+ CLGEMMLowpQuantizeDownInt32ScaleKernel(const CLGEMMLowpQuantizeDownInt32ScaleKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers)*/
+ CLGEMMLowpQuantizeDownInt32ScaleKernel &operator=(const CLGEMMLowpQuantizeDownInt32ScaleKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLGEMMLowpQuantizeDownInt32ScaleKernel(CLGEMMLowpQuantizeDownInt32ScaleKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLGEMMLowpQuantizeDownInt32ScaleKernel &operator=(CLGEMMLowpQuantizeDownInt32ScaleKernel &&) = default;
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] input Input tensor. Data type supported: S32
+ * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
+ * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
+ * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED
+ * @param[in] output_stage GEMMLowp output stage metadata.
+ */
+ void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo *output_stage);
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Input tensor. Data type supported: S32
+ * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
+ * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
+ * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED
+ * @param[in] output_stage GEMMLowp output stage metadata.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo *output_stage);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ScaleKernel
+ *
+ * @param[in] input Input tensor. Data type supported: S32
+ * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
+ * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
+ * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED
+ * @param[in] output_stage GEMMLowp output stage metadata.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo *output_stage);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input;
+ const ICLTensor *_bias;
+ ICLTensor *_output;
+};
+} // namespace arm_compute
+
+#endif /* ARM_COMPUTE_CLGEMMLOWPQUANTIZEDOWNINT32SCALEKERNEL_H */
\ No newline at end of file
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpReductionKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/ICLTensor.h"
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLGEMMLOWREDUCTIONKERNEL_H
+#define ARM_COMPUTE_CLGEMMLOWREDUCTIONKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+struct GEMMLowpReductionKernelInfo;
+
+/** Common interface for all OpenCL reduction kernels */
+class ICLGEMMLowpReductionKernel : public ICLKernel
+{
+public:
+ /** Constructor */
+ ICLGEMMLowpReductionKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers)*/
+ ICLGEMMLowpReductionKernel(const ICLGEMMLowpReductionKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers)*/
+ ICLGEMMLowpReductionKernel &operator=(const ICLGEMMLowpReductionKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ ICLGEMMLowpReductionKernel(ICLGEMMLowpReductionKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ ICLGEMMLowpReductionKernel &operator=(ICLGEMMLowpReductionKernel &&) = default;
+
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] input Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8.
+ * @param[out] output Output row-vector of sums of all the entries in each row/col of input tensor. Data type supported: S32
+ * @param[in] info Kernel metadata:
+ * - k Number of matrix columns/rows depending on the type of reduction.
+ * - is_reshaped True if the matrix has been reshaped.
+ * - scalar Scalar value to multiply each reduced column/row by.
+ * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value.
+ */
+ virtual void configure(const ICLTensor *input, ICLTensor *output, const GEMMLowpReductionKernelInfo &info) = 0;
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8.
+ * @param[out] output Output row-vector of sums of all the entries in each row/col of input tensor. Data type supported: S32
+ * @param[in] info Kernel metadata:
+ * - k Number of matrix columns/rows depending on the type of reduction.
+ * - is_reshaped True if the matrix has been reshaped.
+ * - scalar Scalar value to multiply each reduced column/row by.
+ * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value.
+ */
+ virtual void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const GEMMLowpReductionKernelInfo &info) = 0;
+
+protected:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+};
+
+/** OpenCL kernel used to compute the row-vectors of sums of all the entries in each row of Matrix A.
+ *
+ * @note This stage is needed to handle the offset of matrix product
+ * https://github.com/google/gemmlowp/blob/master/doc/low-precision.md
+ */
+class CLGEMMLowpMatrixAReductionKernel : public ICLGEMMLowpReductionKernel
+{
+public:
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] mtx_a Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8.
+ * @param[out] vector_sum_row Output row-vector of sums of all the entries in each row of mtx_a. Data type supported: S32
+ * @param[in] info Kernel metadata:
+ * - k Number of matrix columns/rows depending on the type of reduction.
+ * - is_reshaped True if the matrix has been reshaped.
+ * - scalar Scalar value to multiply each reduced column/row by.
+ * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value.
+ */
+ void configure(const ICLTensor *mtx_a, ICLTensor *vector_sum_row, const GEMMLowpReductionKernelInfo &info) override;
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] mtx_a Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8.
+ * @param[out] vector_sum_row Output row-vector of sums of all the entries in each row of mtx_a. Data type supported: S32
+ * @param[in] info Kernel metadata:
+ * - k Number of matrix columns/rows depending on the type of reduction.
+ * - is_reshaped True if the matrix has been reshaped.
+ * - scalar Scalar value to multiply each reduced column/row by.
+ * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *mtx_a, ICLTensor *vector_sum_row, const GEMMLowpReductionKernelInfo &info) override;
+ /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixAReductionKernel
+ *
+ * @param[in] mtx_a Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8.
+ * @param[in] vector_sum_row Output row-vector of sums of all the entries in each row of mtx_a. Data type supported: S32
+ * @param[in] info Kernel metadata:
+ * - k Number of matrix columns/rows depending on the type of reduction.
+ * - is_reshaped True if the matrix has been reshaped.
+ * - scalar Scalar value to multiply each reduced column/row by.
+ * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *mtx_a, const ITensorInfo *vector_sum_row, const GEMMLowpReductionKernelInfo &info);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+};
+
+/** OpenCL kernel used to compute the row-vectors of sums of all the entries in each column of Matrix B.
+ *
+ * @note This stage is needed to handle the offset of matrix product
+ * https://github.com/google/gemmlowp/blob/master/doc/low-precision.md
+ */
+class CLGEMMLowpMatrixBReductionKernel : public ICLGEMMLowpReductionKernel
+{
+public:
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] mtx_b Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL.
+ * @param[out] vector_sum_col Output row-vector of sums of all the entries in each column of mtx_b. Data type supported: S32
+ * @param[in] info Kernel metadata:
+ * - k Number of matrix columns/rows depending on the type of reduction.
+ * - is_reshaped True if the matrix has been reshaped.
+ * - scalar Scalar value to multiply each reduced column/row by.
+ * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value.
+ */
+ void configure(const ICLTensor *mtx_b, ICLTensor *vector_sum_col, const GEMMLowpReductionKernelInfo &info) override;
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] mtx_b Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL.
+ * @param[out] vector_sum_col Output row-vector of sums of all the entries in each column of mtx_b. Data type supported: S32
+ * @param[in] info Kernel metadata:
+ * - k Number of matrix columns/rows depending on the type of reduction.
+ * - is_reshaped True if the matrix has been reshaped.
+ * - scalar Scalar value to multiply each reduced column/row by.
+ * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *mtx_b, ICLTensor *vector_sum_col, const GEMMLowpReductionKernelInfo &info) override;
+ /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixBReductionKernel
+ *
+ * @param[in] mtx_b Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL.
+ * @param[in] vector_sum_col Output row-vector of sums of all the entries in each column of mtx_b. Data type supported: S32
+ * @param[in] info Kernel metadata:
+ * - k Number of matrix columns/rows depending on the type of reduction.
+ * - is_reshaped True if the matrix has been reshaped.
+ * - scalar Scalar value to multiply each reduced column/row by.
+ * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *mtx_b, const ITensorInfo *vector_sum_col, const GEMMLowpReductionKernelInfo &info);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CLGEMMLOWREDUCTIONKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLGEMMMATRIXMULTIPLYKERNEL_H
+#define ARM_COMPUTE_CLGEMMMATRIXMULTIPLYKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** OpenCL kernel to multiply two input matrices "A" and "B" and add a martix "C" if provided. All elements of the output matrix will be multiplied by alpha. In case matrix C is passed, it will be added to the previous result.
+ * For the matrix C, the broadcast addition is supported if the flag "broadcast_bias" is set in the GEMMReshapeInfo object
+ *
+ * @note If the input tensors @p input0 and @p input1 have been reshaped respectively with @ref CLGEMMReshapeLHSMatrixKernel" and @ref CLGEMMReshapeRHSMatrixKernel,
+ * the flag @p is_interleaved_transposed must be set to true
+ *
+ * @attention @p input1 tensor must have at least 2 dimensions (matrix)
+ *
+ */
+class CLGEMMMatrixMultiplyKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLGEMMMatrixMultiplyKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGEMMMatrixMultiplyKernel(const CLGEMMMatrixMultiplyKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGEMMMatrixMultiplyKernel &operator=(const CLGEMMMatrixMultiplyKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLGEMMMatrixMultiplyKernel(CLGEMMMatrixMultiplyKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLGEMMMatrixMultiplyKernel &operator=(CLGEMMMatrixMultiplyKernel &&) = default;
+ /** Initialise the kernel's input, output and alpha
+ *
+ * @param[in] input0 Input tensor containing the Matrix A. Data types supported: F16/F32
+ * @param[in] input1 Input tensor containing the Matrix B. Data type supported: same as @p input0
+ * @param[in] input2 Input tensor containing the Matrix C (bias). Can be nullptr. Data type supported: same as @p input0
+ * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0
+ * @param[in] alpha Weight of the matrix product
+ * @param[in] beta (Optional) Weight of vector C. Default value is 0. Only beta = 1 is currently supported.
+ * @param[in] is_interleaved_transposed (Optional) True if input0 and input1 have been reshaped respectively using @ref CLGEMMReshapeLHSMatrixKernel and @ref CLGEMMReshapeRHSMatrixKernel
+ * @param[in] reshape_info (Optional) GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped
+ * @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy
+ * @param[in] activation_info (Optional) Activation to apply after the matrix multiplication
+ *
+ */
+ void configure(const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta = 0.f,
+ bool is_interleaved_transposed = true, const GEMMReshapeInfo &reshape_info = GEMMReshapeInfo(), bool fp_mixed_precision = false, const ActivationLayerInfo &activation_info = ActivationLayerInfo());
+ /** Initialise the kernel's input, output and alpha
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input0 Input tensor containing the Matrix A. Data types supported: F16/F32
+ * @param[in] input1 Input tensor containing the Matrix B. Data type supported: same as @p input0
+ * @param[in] input2 Input tensor containing the Matrix C (bias). Can be nullptr. Data type supported: same as @p input0
+ * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0
+ * @param[in] alpha Weight of the matrix product
+ * @param[in] beta (Optional) Weight of vector C. Default value is 0. Only beta = 1 is currently supported.
+ * @param[in] is_interleaved_transposed (Optional) True if input0 and input1 have been reshaped respectively using @ref CLGEMMReshapeLHSMatrixKernel and @ref CLGEMMReshapeRHSMatrixKernel
+ * @param[in] reshape_info (Optional) GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped
+ * @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy
+ * @param[in] activation_info (Optional) Activation to apply after the matrix multiplication
+ *
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta = 0.f,
+ bool is_interleaved_transposed = true, const GEMMReshapeInfo &reshape_info = GEMMReshapeInfo(), bool fp_mixed_precision = false, const ActivationLayerInfo &activation_info = ActivationLayerInfo());
+ /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixMultiplyKernel
+ *
+ * @param[in] input0 Input tensor containing the Matrix A info. Data types supported: F16/F32
+ * @param[in] input1 Input tensor containing the Matrix B info. Data type supported: same as @p input0
+ * @param[in] input2 Input tensor containing the Matrix C (bias) info. Can be nullptr. Data type supported: same as @p input0
+ * @param[in] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0
+ * @param[in] alpha Weight of the matrix product
+ * @param[in] beta Weight of vector C. Default value is 0. Only beta = 1 is currently supported.
+ * @param[in] is_interleaved_transposed True if input0 and input1 have been reshaped respectively using @ref CLGEMMReshapeLHSMatrixKernel and @ref CLGEMMReshapeRHSMatrixKernel
+ * @param[in] reshape_info GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped
+ * @param[in] gpu_target GPU Target
+ * @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy
+ * @param[in] activation_info (Optional) Activation to apply after the matrix multiplication
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float alpha, float beta,
+ bool is_interleaved_transposed, const GEMMReshapeInfo &reshape_info, GPUTarget gpu_target, bool fp_mixed_precision = false, const ActivationLayerInfo &activation_info = ActivationLayerInfo());
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+public:
+ const ICLTensor *_input0;
+ const ICLTensor *_input1;
+ const ICLTensor *_input2;
+ ICLTensor *_output;
+ bool _slide_matrix_b;
+ bool _reinterpret_input_as_3d;
+ bool _reinterpret_output_as_3d;
+ bool _add_bias;
+ bool _broadcast_bias;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CLGEMMMATRIXMULTIPLYKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2019-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLGEMMMATRIXMULTIPLYNATIVEKERNEL_H
+#define ARM_COMPUTE_CLGEMMMATRIXMULTIPLYNATIVEKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+#include "arm_compute/core/KernelDescriptors.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** OpenCL kernel to multiply matrices when neither of the input matrices have been reshaped */
+class CLGEMMMatrixMultiplyNativeKernel : public ICLKernel
+{
+public:
+ /** Default Constructor */
+ CLGEMMMatrixMultiplyNativeKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGEMMMatrixMultiplyNativeKernel(const CLGEMMMatrixMultiplyNativeKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGEMMMatrixMultiplyNativeKernel &operator=(const CLGEMMMatrixMultiplyNativeKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLGEMMMatrixMultiplyNativeKernel(CLGEMMMatrixMultiplyNativeKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLGEMMMatrixMultiplyNativeKernel &operator=(CLGEMMMatrixMultiplyNativeKernel &&) = default;
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] input0 Input tensor for the LHS matrix. Data type supported: F32. The number of dimensions for the LHS matrix must be less or equal than 4.
+ * @param[in] input1 Input tensor for the RHS matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3.
+ * @param[in] input2 Input tensor containing the bias matrix. Data type supported: same as @p input0.
+ * @param[out] output Output tensor info. Data type supported: same as @p input0
+ * @param[in] alpha Weight of the matrix product
+ * @param[in] beta Weight of the matrix bias
+ * @param[in] lhs_info LHS matrix information used to retrieve the number of rows and accumulations to be processed by each thread. Only the following values are supported:
+ * lhs_info.m0: 1,2,3,4,5,6,7,8
+ * lhs_info.k0: 2,3,4,8,16
+ * @param[in] rhs_info RHS matrix information used to retrieve the number of columns and accumulations to be processed by each thread. Only the following values are supported:
+ * rhs_info.n0: 2,3,4,8,16
+ * rhs_info.k0: same of lhs_info.k0
+ * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices
+ */
+ void configure(const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info,
+ const GEMMRHSMatrixInfo &rhs_info,
+ const GEMMKernelInfo &gemm_info);
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input0 Input tensor for the LHS matrix. Data type supported: F32. The number of dimensions for the LHS matrix must be less or equal than 4.
+ * @param[in] input1 Input tensor for the RHS matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3.
+ * @param[in] input2 Input tensor containing the bias matrix. Data type supported: same as @p input0.
+ * @param[out] output Output tensor info. Data type supported: same as @p input0
+ * @param[in] alpha Weight of the matrix product
+ * @param[in] beta Weight of the matrix bias
+ * @param[in] lhs_info LHS matrix information used to retrieve the number of rows and accumulations to be processed by each thread. Only the following values are supported:
+ * lhs_info.m0: 1,2,3,4,5,6,7,8
+ * lhs_info.k0: 2,3,4,8,16
+ * @param[in] rhs_info RHS matrix information used to retrieve the number of columns and accumulations to be processed by each thread. Only the following values are supported:
+ * rhs_info.n0: 2,3,4,8,16
+ * rhs_info.k0: same of lhs_info.k0
+ * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta,
+ const GEMMLHSMatrixInfo &lhs_info,
+ const GEMMRHSMatrixInfo &rhs_info,
+ const GEMMKernelInfo &gemm_info);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixMultiplyNativeKernel
+ *
+ * @param[in] input0 Input tensor info for the LHS matrix. Data type supported: F32. The number of dimensions for the LHS matrix must be less or equal than 4.
+ * @param[in] input1 Input tensor info for the RHS matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3.
+ * @param[in] input2 Input tensor info containing the bias matrix. Data type supported: same as @p input0.
+ * @param[in] output Output tensor info. Data type supported: same as @p input0
+ * @param[in] alpha Weight of the matrix product
+ * @param[in] beta Weight of the matrix bias
+ * @param[in] lhs_info LHS matrix information used to retrieve the number of rows and accumulations to be processed by each thread. Only the following values are supported:
+ * lhs_info.m0: 1,2,3,4,5,6,7,8
+ * lhs_info.k0: 2,3,4,8,16
+ * @param[in] rhs_info RHS matrix information used to retrieve the number of columns and accumulations to be processed by each thread. Only the following values are supported:
+ * rhs_info.n0: 2,3,4,8,16
+ * rhs_info.k0: same of lhs_info.k0
+ * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info,
+ const GEMMRHSMatrixInfo &rhs_info,
+ const GEMMKernelInfo &gemm_info);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input0;
+ const ICLTensor *_input1;
+ const ICLTensor *_input2;
+ ICLTensor *_output;
+ bool _slide_matrix_b;
+ bool _reinterpret_input_as_3d;
+ bool _reinterpret_output_as_3d;
+ bool _use_dummy_work_items;
+ bool _add_bias;
+ bool _broadcast_bias;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLGEMMMATRIXMULTIPLYNATIVEKERNEL_H*/
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLGEMMMATRIXMULTIPLYRESHAPEDKERNEL_H
+#define ARM_COMPUTE_CLGEMMMATRIXMULTIPLYRESHAPEDKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+#include "arm_compute/core/KernelDescriptors.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** OpenCL kernel to multiply matrices when both the input matrices LHS (input0) and RHS (input1) have been reshaped
+ *
+ * @note The input matrices @p input0 and @p input1 must be reshaped through @ref CLGEMMReshapeLHSMatrixKernel and @ref CLGEMMReshapeRHSMatrixKernel
+ */
+class CLGEMMMatrixMultiplyReshapedKernel : public ICLKernel
+{
+public:
+ /** Default Constructor */
+ CLGEMMMatrixMultiplyReshapedKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGEMMMatrixMultiplyReshapedKernel(const CLGEMMMatrixMultiplyReshapedKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGEMMMatrixMultiplyReshapedKernel &operator=(const CLGEMMMatrixMultiplyReshapedKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLGEMMMatrixMultiplyReshapedKernel(CLGEMMMatrixMultiplyReshapedKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLGEMMMatrixMultiplyReshapedKernel &operator=(CLGEMMMatrixMultiplyReshapedKernel &&) = default;
+ /** Initialise the kernel's input and output.
+ *
+ * @note The F16 computation also supports mixed precision through the gemm_info.fp_mixed_precision flag.
+ * Mixed precision combines different floating precisions during the computation, in particular, F32 for the accumulations and F16 for the
+ * multiplications. i.e. float c = (half)a * (half)b
+ *
+ * @note If rhs_info.export_to_cl_image = true, this OpenCL kernel will fetch the RHS data using the OpenCL read_image built-in function.
+ * Reading from the OpenCL image object can increase the performance. However, since the OpenCL image object is created importing the OpenCL buffer,
+ * the following conditions are required:
+ * -# rhs_info.n0 can only be 4, 8 and 16
+ * -# rhs_info.k0 can only be 4, 8 and 16
+ * -# Data type can only be F32
+ * -# The platform should support the OpenCL cl_khr_image2d_from_buffer extension
+ * -# The stride Y for the input1 should satisfy the OpenCL pitch alignment requirement
+ * -# input1 width should be less or equal to (CL_DEVICE_IMAGE2D_MAX_WIDTH * 4)
+ * -# input1 (height * depth) should be less or equal to CL_DEVICE_IMAGE2D_MAX_HEIGHT
+ *
+ * @param[in] input0 Input tensor containing the LHS reshaped matrix. Data type supported: F16/F32 (only F32 if rhs_info.export_to_cl_image = true). The number of dimensions for the LHS matrix must be less or equal than 4
+ * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3
+ * @param[in] input2 Input tensor containing the bias matrix. Data type supported: same as @p input0.
+ * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0
+ * @param[in] alpha Weight of the matrix product
+ * @param[in] beta Weight of the matrix bias
+ * @param[in] lhs_info LHS matrix information used for reshaping the input0 tensor. Only the following values are supported:
+ * lhs_info.m0: 2,3,4,5,6,7,8
+ * lhs_info.k0: 2,3,4,8,16
+ * lhs_info.transpose: false
+ * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported:
+ * rhs_info.n0: 2,3,4,8,16 (only 4, 8 and 16 if rhs_info.export_to_cl_image = true)
+ * rhs_info.k0: 2,3,4,8,16 (only 4, 8 and 16 if rhs_info.export_to_cl_image = true)
+ * rhs_info.transpose: true
+ * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices
+ *
+ * @note lhs_info.k0 must be equal to rhs_info.k0
+ */
+ void configure(const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info,
+ const GEMMRHSMatrixInfo &rhs_info,
+ const GEMMKernelInfo &gemm_info);
+ /** Initialise the kernel's input and output.
+ *
+ * @note The F16 computation also supports mixed precision through the gemm_info.fp_mixed_precision flag.
+ * Mixed precision combines different floating precisions during the computation, in particular, F32 for the accumulations and F16 for the
+ * multiplications. i.e. float c = (half)a * (half)b
+ *
+ * @note If rhs_info.export_to_cl_image = true, this OpenCL kernel will fetch the RHS data using the OpenCL read_image built-in function.
+ * Reading from the OpenCL image object can increase the performance. However, since the OpenCL image object is created importing the OpenCL buffer,
+ * the following conditions are required:
+ * -# rhs_info.n0 can only be 4, 8 and 16
+ * -# rhs_info.k0 can only be 4, 8 and 16
+ * -# Data type can only be F32
+ * -# The platform should support the OpenCL cl_khr_image2d_from_buffer extension
+ * -# The stride Y for the input1 should satisfy the OpenCL pitch alignment requirement
+ * -# input1 width should be less or equal to (CL_DEVICE_IMAGE2D_MAX_WIDTH * 4)
+ * -# input1 (height * depth) should be less or equal to CL_DEVICE_IMAGE2D_MAX_HEIGHT
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input0 Input tensor containing the LHS reshaped matrix. Data type supported: F16/F32 (only F32 if rhs_info.export_to_cl_image = true). The number of dimensions for the LHS matrix must be less or equal than 4
+ * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3
+ * @param[in] input2 Input tensor containing the bias matrix. Data type supported: same as @p input0.
+ * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0
+ * @param[in] alpha Weight of the matrix product
+ * @param[in] beta Weight of the matrix bias
+ * @param[in] lhs_info LHS matrix information used for reshaping the input0 tensor. Only the following values are supported:
+ * lhs_info.m0: 2,3,4,5,6,7,8
+ * lhs_info.k0: 2,3,4,8,16
+ * lhs_info.transpose: false
+ * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported:
+ * rhs_info.n0: 2,3,4,8,16 (only 4, 8 and 16 if rhs_info.export_to_cl_image = true)
+ * rhs_info.k0: 2,3,4,8,16 (only 4, 8 and 16 if rhs_info.export_to_cl_image = true)
+ * rhs_info.transpose: true
+ * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices
+ *
+ * @note lhs_info.k0 must be equal to rhs_info.k0
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta,
+ const GEMMLHSMatrixInfo &lhs_info,
+ const GEMMRHSMatrixInfo &rhs_info,
+ const GEMMKernelInfo &gemm_info);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixMultiplyReshapedKernel
+ *
+ * @note The F16 computation also supports mixed precision through the gemm_info.fp_mixed_precision flag.
+ * Mixed precision combines different floating precisions during the computation, in particular, F32 for the accumulations and F16 for the
+ * multiplications. i.e. float c = (half)a * (half)b
+ *
+ * @note If rhs_info.export_to_cl_image = true, this OpenCL kernel will fetch the RHS data using the OpenCL read_image built-in function.
+ * Reading from the OpenCL image object can increase the performance. However, since the OpenCL image object is created importing the OpenCL buffer,
+ * the following conditions are required:
+ * -# rhs_info.n0 can only be 4, 8 and 16
+ * -# rhs_info.k0 can only be 4, 8 and 16
+ * -# Data type can only be F32
+ * -# The platform should support the OpenCL cl_khr_image2d_from_buffer extension
+ * -# The stride Y for the input1 should satisfy the OpenCL pitch alignment requirement
+ * -# input1 width should be less or equal to (CL_DEVICE_IMAGE2D_MAX_WIDTH * 4)
+ * -# input1 (height * depth) should be less or equal to CL_DEVICE_IMAGE2D_MAX_HEIGHT
+ *
+ * @param[in] input0 Input tensor containing the LHS reshaped matrix. Data type supported: F16/F32 (only F32 if rhs_info.export_to_cl_image = true). The number of dimensions for the LHS matrix must be less or equal than 4
+ * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3
+ * @param[in] input2 Input tensor info containing the bias matrix. Data type supported: same as @p input0.
+ * @param[in] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0
+ * @param[in] alpha Weight of the matrix product
+ * @param[in] beta Weight of the matrix bias
+ * @param[in] lhs_info LHS matrix information used for reshaping the input0 tensor. Only the following values are supported:
+ * lhs_info.m0: 2,3,4,5,6,7,8
+ * lhs_info.k0: 2,3,4,8,16
+ * lhs_info.transpose: false
+ * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported:
+ * rhs_info.n0: 2,3,4,8,16 (only 4, 8 and 16 if rhs_info.export_to_cl_image = true)
+ * rhs_info.k0: 2,3,4,8,16 (only 4, 8 and 16 if rhs_info.export_to_cl_image = true)
+ * rhs_info.transpose: true
+ * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices
+ *
+ * @note lhs_info.k0 must be equal to rhs_info.k0
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info,
+ const GEMMRHSMatrixInfo &rhs_info,
+ const GEMMKernelInfo &gemm_info);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input0;
+ const ICLTensor *_input1;
+ const ICLTensor *_input2;
+ ICLTensor *_output;
+ bool _slide_matrix_b;
+ bool _reinterpret_output_as_3d;
+ bool _use_dummy_work_items;
+ bool _add_bias;
+ bool _broadcast_bias;
+ bool _export_to_cl_image;
+ unsigned int _k;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLGEMMMATRIXMULTIPLYRESHAPEDKERNEL_H*/
\ No newline at end of file
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Helpers.h"
ARM_COMPUTE_ERROR_ON(_input1->info()->strides_in_bytes()[3] != 0);
}
- const size_t lhs_idx_batch_size = _reinterpret_input_as_3d && !_has_pad_y? 3u : 2u;
+ const size_t lhs_idx_batch_size = _reinterpret_input_as_3d && !_has_pad_y ? 3u : 2u;
const size_t rhs_idx_batch_size = 2u;
const size_t bia_idx_batch_size = 2u;
- const size_t out_idx_batch_size = _reinterpret_output_as_3d && !_has_pad_y? 3u : 2u;
+ const size_t out_idx_batch_size = _reinterpret_output_as_3d && !_has_pad_y ? 3u : 2u;
Window slice = window.first_slice_window_3D();
Window slice_matrix_b = slice;
--- /dev/null
+/*
+ * Copyright (c) 2019-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLGEMMMATRIXMULTIPLYRESHAPEDONLYRHSKERNEL_H
+#define ARM_COMPUTE_CLGEMMMATRIXMULTIPLYRESHAPEDONLYRHSKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+#include "arm_compute/core/KernelDescriptors.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** OpenCL kernel to multiply matrices when only the input matrix RHS (input1) has been reshaped
+ *
+ * @note The input matrix input1 must be reshaped through @ref CLGEMMReshapeRHSMatrixKernel
+ */
+class CLGEMMMatrixMultiplyReshapedOnlyRHSKernel : public ICLKernel
+{
+public:
+ /** Default Constructor */
+ CLGEMMMatrixMultiplyReshapedOnlyRHSKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGEMMMatrixMultiplyReshapedOnlyRHSKernel(const CLGEMMMatrixMultiplyReshapedOnlyRHSKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGEMMMatrixMultiplyReshapedOnlyRHSKernel &operator=(const CLGEMMMatrixMultiplyReshapedOnlyRHSKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLGEMMMatrixMultiplyReshapedOnlyRHSKernel(CLGEMMMatrixMultiplyReshapedOnlyRHSKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLGEMMMatrixMultiplyReshapedOnlyRHSKernel &operator=(CLGEMMMatrixMultiplyReshapedOnlyRHSKernel &&) = default;
+ /** Initialise the kernel's input and output.
+ *
+ * @note If rhs_info.export_to_cl_image = true, this OpenCL kernel will fetch the RHS data using the OpenCL read_image built-in function.
+ * Reading from the OpenCL image object can increase the performance. However, since the OpenCL image object is created importing the OpenCL buffer,
+ * the following conditions are required:
+ * -# rhs_info.n0 can only be 4, 8 and 16
+ * -# rhs_info.k0 can only be 4, 8 and 16
+ * -# Data type can only be F32
+ * -# The platform should support the OpenCL cl_khr_image2d_from_buffer extension
+ * -# The stride Y for the input1 should satisfy the OpenCL pitch alignment requirement
+ * -# input1 width should be less or equal to (CL_DEVICE_IMAGE2D_MAX_WIDTH * 4)
+ * -# input1 (height * depth) should be less or equal to CL_DEVICE_IMAGE2D_MAX_HEIGHT
+ *
+ * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: F16/F32 (only F32 if rhs_info.export_to_cl_image = true).
+ * The number of dimensions for the LHS matrix must be less or equal than 4.
+ * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3.
+ * @param[in] input2 Input tensor containing the bias matrix. Data type supported: same as @p input0.
+ * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0
+ * @param[in] alpha Weight of the matrix product
+ * @param[in] beta Weight of the matrix bias
+ * @param[in] lhs_info LHS matrix information used to retrieve the number of rows to be processed by each thread. Only the following values are supported:
+ * lhs_info.m0: 1,2,3,4,5,6,7,8
+ * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported:
+ * rhs_info.k0: 2,3,4,8,16
+ * rhs_info.n0: 2,3,4,8,16
+ * rhs_info.transpose: true,false
+ * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices
+ */
+ void configure(const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info,
+ const GEMMRHSMatrixInfo &rhs_info,
+ const GEMMKernelInfo &gemm_info);
+ /** Initialise the kernel's input and output.
+ *
+ * @note If rhs_info.export_to_cl_image = true, this OpenCL kernel will fetch the RHS data using the OpenCL read_image built-in function.
+ * Reading from the OpenCL image object can increase the performance. However, since the OpenCL image object is created importing the OpenCL buffer,
+ * the following conditions are required:
+ * -# rhs_info.n0 can only be 4, 8 and 16
+ * -# rhs_info.k0 can only be 4, 8 and 16
+ * -# Data type can only be F32
+ * -# The platform should support the OpenCL cl_khr_image2d_from_buffer extension
+ * -# The stride Y for the input1 should satisfy the OpenCL pitch alignment requirement
+ * -# input1 width should be less or equal to (CL_DEVICE_IMAGE2D_MAX_WIDTH * 4)
+ * -# input1 (height * depth) should be less or equal to CL_DEVICE_IMAGE2D_MAX_HEIGHT
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: F16/F32 (only F32 if rhs_info.export_to_cl_image = true).
+ * The number of dimensions for the LHS matrix must be less or equal than 4.
+ * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3.
+ * @param[in] input2 Input tensor containing the bias matrix. Data type supported: same as @p input0.
+ * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0
+ * @param[in] alpha Weight of the matrix product
+ * @param[in] beta Weight of the matrix bias
+ * @param[in] lhs_info LHS matrix information used to retrieve the number of rows to be processed by each thread. Only the following values are supported:
+ * lhs_info.m0: 1,2,3,4,5,6,7,8
+ * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported:
+ * rhs_info.k0: 2,3,4,8,16
+ * rhs_info.n0: 2,3,4,8,16
+ * rhs_info.transpose: true,false
+ * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta,
+ const GEMMLHSMatrixInfo &lhs_info,
+ const GEMMRHSMatrixInfo &rhs_info,
+ const GEMMKernelInfo &gemm_info);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixMultiplyReshapedOnlyRHSKernel
+ *
+ * @note If rhs_info.export_to_cl_image = true, this OpenCL kernel will fetch the RHS data using the OpenCL read_image built-in function.
+ * Reading from the OpenCL image object can increase the performance. However, since the OpenCL image object is created importing the OpenCL buffer,
+ * the following conditions are required:
+ * -# rhs_info.n0 can only be 4, 8 and 16
+ * -# rhs_info.k0 can only be 4, 8 and 16
+ * -# Data type can only be F32
+ * -# The platform should support the OpenCL cl_khr_image2d_from_buffer extension
+ * -# The stride Y for the input1 should satisfy the OpenCL pitch alignment requirement
+ * -# input1 width should be less or equal to (CL_DEVICE_IMAGE2D_MAX_WIDTH * 4)
+ * -# input1 (height * depth) should be less or equal to CL_DEVICE_IMAGE2D_MAX_HEIGHT
+ *
+ * @param[in] input0 Input tensor info for the LHS matrix. Data type supported: F16/F32 (only F32 if rhs_info.export_to_cl_image = true).
+ * The number of dimensions for the LHS matrix must be less or equal than 4.
+ * @param[in] input1 Input tensor info for the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3.
+ * @param[in] input2 Input tensor info containing the bias matrix. Data type supported: same as @p input0.
+ * @param[in] output Output tensor info. Data type supported: same as @p input0
+ * @param[in] alpha Weight of the matrix product
+ * @param[in] beta Weight of the matrix bias
+ * @param[in] lhs_info LHS matrix information used to retrieve the number of rows to be processed by each thread. Only the following values are supported:
+ * lhs_info.m0: 1,2,3,4,5,6,7,8
+ * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported:
+ * rhs_info.k0: 2,3,4,8,16
+ * rhs_info.n0: 2,3,4,8,16
+ * rhs_info.transpose: true,false
+ * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info,
+ const GEMMRHSMatrixInfo &rhs_info,
+ const GEMMKernelInfo &gemm_info);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input0;
+ const ICLTensor *_input1;
+ const ICLTensor *_input2;
+ ICLTensor *_output;
+ bool _slide_matrix_b;
+ bool _reinterpret_input_as_3d;
+ bool _reinterpret_output_as_3d;
+ bool _use_dummy_work_items;
+ bool _add_bias;
+ bool _broadcast_bias;
+ bool _export_to_cl_image;
+ bool _has_pad_y;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLGEMMMATRIXMULTIPLYRESHAPEDONLYRHSKERNEL_H*/
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLGEMMMATRIXVECTORMULTIPLYKERNEL_H
+#define ARM_COMPUTE_CLGEMMMATRIXVECTORMULTIPLYKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the GEMM matrix vector multiply kernel. **/
+class CLGEMMMatrixVectorMultiplyKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLGEMMMatrixVectorMultiplyKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGEMMMatrixVectorMultiplyKernel(const CLGEMMMatrixVectorMultiplyKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGEMMMatrixVectorMultiplyKernel &operator=(const CLGEMMMatrixVectorMultiplyKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLGEMMMatrixVectorMultiplyKernel(CLGEMMMatrixVectorMultiplyKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLGEMMMatrixVectorMultiplyKernel &operator=(CLGEMMMatrixVectorMultiplyKernel &&) = default;
+ /** Set the input and output of the kernel.
+ *
+ * @param[in] input0 The reshaped input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
+ * @param[in] input1 The 2D reshaped weights tensor. Data type supported: Same as @p input.
+ * @param[out] output The output 2D tensor. Data types supported: Same as @p input, S32 for QASYMM8/QASYMM8_SIGNED.
+ */
+ void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output);
+ /** Set the input and output of the kernel.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input0 The reshaped input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
+ * @param[in] input1 The 2D reshaped weights tensor. Data type supported: Same as @p input.
+ * @param[out] output The output 2D tensor. Data types supported: Same as @p input, S32 for QASYMM8/QASYMM8_SIGNED.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixVectorMultiplyKernel
+ *
+ * @param[in] input0 The reshaped input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
+ * @param[in] input1 The 2D reshaped weights tensor info. Data type supported: Same as @p input.
+ * @param[in] output The output 2D tensor info. Data types supported: Same as @p input, S32 for QASYMM8/QASYMM8_SIGNED.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+ BorderSize border_size() const override;
+
+private:
+ const ICLTensor *_input0;
+ const ICLTensor *_input1;
+ ICLTensor *_output;
+ int _num_rows_read_per_iteration;
+ BorderSize _border_size;
+};
+} // arm_compute
+#endif /*ARM_COMPUTE_CLGEMMMATRIXVECTORMULTIPLYKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLGEMMRESHAPELHSMATRIXKERNEL_H
+#define ARM_COMPUTE_CLGEMMRESHAPELHSMATRIXKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** OpenCL kernel to reshape the LHS matrix when performing the matrix multiplication.
+ * In particular, this function splits the input matrix in blocks of size M0xK0 (defined through GEMMLHSInfo) and
+ * stores each one in the output matrix unrolling the values
+ */
+class CLGEMMReshapeLHSMatrixKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLGEMMReshapeLHSMatrixKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGEMMReshapeLHSMatrixKernel(const CLGEMMReshapeLHSMatrixKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGEMMReshapeLHSMatrixKernel &operator=(const CLGEMMReshapeLHSMatrixKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLGEMMReshapeLHSMatrixKernel(CLGEMMReshapeLHSMatrixKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLGEMMReshapeLHSMatrixKernel &operator=(CLGEMMReshapeLHSMatrixKernel &&) = default;
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] input Input tensor. Data types supported: All
+ * @param[out] output Output tensor. Data type supported: same as @p input
+ * @param[in] lhs_info LHS matrix information to be used for reshaping. This object contains all the necessary
+ * information to reshape the input tensor. Only the following values are supported:
+ * lhs_info.m0: 2,3,4,5,6,7,8
+ * lhs_info.k0: 2,3,4,8,16
+ * lhs_info.v0: greater than 0
+ * lhs_info.transpose: true, false
+ * lhs_info.interleave: true, false
+ * @param[in] reinterpret_input_as_3d (Optional) True if the input has to be reinterpreted as 3D tensor
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, bool reinterpret_input_as_3d = false);
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Input tensor. Data types supported: All
+ * @param[out] output Output tensor. Data type supported: same as @p input
+ * @param[in] lhs_info LHS matrix information to be used for reshaping. This object contains all the necessary
+ * information to reshape the input tensor. Only the following values are supported:
+ * lhs_info.m0: 2,3,4,5,6,7,8
+ * lhs_info.k0: 2,3,4,8,16
+ * lhs_info.v0: greater than 0
+ * lhs_info.transpose: true, false
+ * lhs_info.interleave: true, false
+ * @param[in] reinterpret_input_as_3d (Optional) True if the input has to be reinterpreted as 3D tensor
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, bool reinterpret_input_as_3d = false);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMReshapeLHSMatrixKernel
+ *
+ * @param[in] input Input tensor info. Data types supported: All
+ * @param[in] output Output tensor info which stores the interleaved matrix. Data type supported: same as @p input.
+ * @param[in] lhs_info LHS matrix information to be used for reshaping. This object contains all the necessary
+ * information to reshape the input tensor. Only the following values are supported:
+ * lhs_info.m0: 2,3,4,5,6,7,8
+ * lhs_info.k0: 2,3,4,8,16
+ * lhs_info.v0: greater than 0
+ * lhs_info.transpose: true, false
+ * lhs_info.interleave: true, false
+ * @param[in] reinterpret_input_as_3d True if the input has to be reinterpreted as 3D tensor
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const GEMMLHSMatrixInfo &lhs_info, bool reinterpret_input_as_3d);
+
+ // Inherited methods overridden
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+ bool _reinterpret_input_as_3d;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CLGEMMRESHAPELHSMATRIXKERNEL_H */
\ No newline at end of file
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLGEMMRESHAPERHSMATRIXKERNEL_H
+#define ARM_COMPUTE_CLGEMMRESHAPERHSMATRIXKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** OpenCL kernel to reshape the RHS matrix when performing the matrix multiplication
+ * In particular, this kernel splits the input matrix in blocks of size K0xN0 and stores each one in
+ * the output matrix unrolling the values */
+class CLGEMMReshapeRHSMatrixKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLGEMMReshapeRHSMatrixKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGEMMReshapeRHSMatrixKernel(const CLGEMMReshapeRHSMatrixKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGEMMReshapeRHSMatrixKernel &operator=(const CLGEMMReshapeRHSMatrixKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLGEMMReshapeRHSMatrixKernel(CLGEMMReshapeRHSMatrixKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLGEMMReshapeRHSMatrixKernel &operator=(CLGEMMReshapeRHSMatrixKernel &&) = default;
+ /** Default destructor */
+ ~CLGEMMReshapeRHSMatrixKernel() = default;
+ /** Initialise the kernel's input and output.
+ *
+ * @note If rhs_info.export_to_cl_image = true, this OpenCL kernel will guarantee the OpenCL pitch alignment for the output tensor,
+ * required to create a OpenCL image object from buffer in @ref CLGEMMMatrixMultiplyReshapedKernel and in @ref CLGEMMMatrixMultiplyReshapedOnlyRHSKernel
+ * Since the OpenCL image object is created importing the OpenCL buffer, the following conditions are required:
+ * -# rhs_info.n0 can only be 4, 8 and 16
+ * -# rhs_info.k0 can only be 4, 8 and 16
+ * -# Data type can only be F32, F16
+ * -# The platform should support the OpenCL cl_khr_image2d_from_buffer extension
+ * -# output width should be less or equal to (CL_DEVICE_IMAGE2D_MAX_WIDTH * 4)
+ * -# output (height * depth) should be less or equal to CL_DEVICE_IMAGE2D_MAX_HEIGHT
+ * -# The output tensor should be only consumed by @ref CLGEMMMatrixMultiplyReshapedKernel or @ref CLGEMMMatrixMultiplyReshapedOnlyRHSKernel
+ *
+ * @param[in] input Input tensor. Data types supported: All
+ * @param[out] output Output tensor. Data type supported: same as @p input
+ * @param[in] rhs_info RHS matrix information to be used for reshaping. This object contains all the necessary
+ * information to reshape the input tensor. Only the following values are supported:
+ * rhs_info.n0: 2,3,4,8,16 (only 4, 8 and 16 if rhs_info.export_to_cl_image == true)
+ * rhs_info.k0: 1,2,3,4,8,16 (k0 = 1 only if rhs_info.transpose = false), (only 4, 8 and 16 if rhs_info.export_to_cl_image == true)
+ * rhs_info.h0: greater than 0
+ * rhs_info.transpose: true, false
+ * rhs_info.interleave: true, false
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, const GEMMRHSMatrixInfo &rhs_info);
+ /** Initialise the kernel's input and output.
+ *
+ * @note If rhs_info.export_to_cl_image = true, this OpenCL kernel will guarantee the OpenCL pitch alignment for the output tensor,
+ * required to create a OpenCL image object from buffer in @ref CLGEMMMatrixMultiplyReshapedKernel and in @ref CLGEMMMatrixMultiplyReshapedOnlyRHSKernel
+ * Since the OpenCL image object is created importing the OpenCL buffer, the following conditions are required:
+ * -# rhs_info.n0 can only be 4, 8 and 16
+ * -# rhs_info.k0 can only be 4, 8 and 16
+ * -# Data type can only be F32, F16
+ * -# The platform should support the OpenCL cl_khr_image2d_from_buffer extension
+ * -# output width should be less or equal to (CL_DEVICE_IMAGE2D_MAX_WIDTH * 4)
+ * -# output (height * depth) should be less or equal to CL_DEVICE_IMAGE2D_MAX_HEIGHT
+ * -# The output tensor should be only consumed by @ref CLGEMMMatrixMultiplyReshapedKernel or @ref CLGEMMMatrixMultiplyReshapedOnlyRHSKernel
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Input tensor. Data types supported: All
+ * @param[out] output Output tensor. Data type supported: same as @p input
+ * @param[in] rhs_info RHS matrix information to be used for reshaping. This object contains all the necessary
+ * information to reshape the input tensor. Only the following values are supported:
+ * rhs_info.n0: 2,3,4,8,16 (only 4, 8 and 16 if rhs_info.export_to_cl_image == true)
+ * rhs_info.k0: 1,2,3,4,8,16 (k0 = 1 only if rhs_info.transpose = false), (only 4, 8 and 16 if rhs_info.export_to_cl_image == true)
+ * rhs_info.h0: greater than 0
+ * rhs_info.transpose: true, false
+ * rhs_info.interleave: true, false
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const GEMMRHSMatrixInfo &rhs_info);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMReshapeRHSMatrixKernel
+ *
+ * @note If rhs_info.export_to_cl_image = true, this OpenCL kernel will guarantee the OpenCL pitch alignment for the output tensor,
+ * required to create a OpenCL image object from buffer in @ref CLGEMMMatrixMultiplyReshapedKernel and in @ref CLGEMMMatrixMultiplyReshapedOnlyRHSKernel
+ * Since the OpenCL image object is created importing the OpenCL buffer, the following conditions are required:
+ * -# rhs_info.n0 can only be 4, 8 and 16
+ * -# rhs_info.k0 can only be 4, 8 and 16
+ * -# Data type can only be F32, F16
+ * -# The platform should support the OpenCL cl_khr_image2d_from_buffer extension
+ * -# output width should be less or equal to (CL_DEVICE_IMAGE2D_MAX_WIDTH * 4)
+ * -# output (height * depth) should be less or equal to CL_DEVICE_IMAGE2D_MAX_HEIGHT
+ * -# The output tensor should be only consumed by @ref CLGEMMMatrixMultiplyReshapedKernel or @ref CLGEMMMatrixMultiplyReshapedOnlyRHSKernel
+ *
+ * @param[in] input Input tensor info. Data types supported: All
+ * @param[in] output Output tensor info which stores the interleaved matrix. Data type supported: same as @p input.
+ * @param[in] rhs_info RHS matrix information to be used for reshaping. This object contains all the necessary
+ * information to reshape the input tensor. Only the following values are supported:
+ * rhs_info.n0: 2,3,4,8,16 (only 4, 8 and 16 if rhs_info.export_to_cl_image == true)
+ * rhs_info.k0: 1,2,3,4,8,16 (k0 = 1 only if rhs_info.transpose = false),(only 4, 8 and 16 if rhs_info.export_to_cl_image == true)
+ * rhs_info.h0: greater than 0
+ * rhs_info.transpose: true, false
+ * rhs_info.interleave: true, false
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const GEMMRHSMatrixInfo &rhs_info);
+
+ // Inherited methods overridden
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CLGEMMRESHAPERHSMATRIXKERNEL_H */
\ No newline at end of file
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGatherKernel.h"
+#include "src/core/CL/kernels/CLGatherKernel.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "src/core/helpers/AutoConfiguration.h"
--- /dev/null
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLGATHERKERNEL_H
+#define ARM_COMPUTE_CLGATHERKERNEL_H
+
+#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the kernel to perform tensor reshaping */
+class CLGatherKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLGatherKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGatherKernel(const CLGatherKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGatherKernel &operator=(const CLGatherKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLGatherKernel(CLGatherKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLGatherKernel &operator=(CLGatherKernel &&) = default;
+ /** Default destructor */
+ ~CLGatherKernel() = default;
+ /** Initialise the kernel's inputs and outputs
+ *
+ * @param[in] input Source tensor. Supported tensor rank: up to 4. Data type supported: All.
+ * @param[in] indices Indices tensor. Supported tensor rank: up to 1. Must be one of the following types: U32/S32. Each value must be in range [0, input.shape[@p axis])
+ * @param[out] output Destination tensor. Data type supported: Same as @p input
+ * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Negative values wrap around. Defaults to 0
+ */
+ void configure(const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, int axis = 0);
+ /** Initialise the kernel's inputs and outputs
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Supported tensor rank: up to 4. Data type supported: All.
+ * @param[in] indices Indices tensor. Supported tensor rank: up to 1. Must be one of the following types: U32/S32. Each value must be in range [0, input.shape[@p axis])
+ * @param[out] output Destination tensor. Data type supported: Same as @p input
+ * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Negative values wrap around. Defaults to 0
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, int axis = 0);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref CLGatherKernel
+ *
+ * @param[in] input Source tensor info. Supported tensor rank: up to 4. Data type supported: All.
+ * @param[in] indices Indices tensor info. Supported tensor rank: up to 4. Must be one of the following types: U32/S32. Each value must be in range [0, input.shape[@p axis])
+ * @param[in] output Destination tensor info. Data type supported: Same as @p input
+ * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Negative values wrap around. Defaults to 0
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, int axis = 0);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input; /**< Source tensor */
+ const ICLTensor *_indices; /**< Indices tensor */
+ ICLTensor *_output; /**< Destination tensor */
+ int _axis; /**< Axis index */
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLGATHERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h"
+#include "src/core/CL/kernels/CLGaussian3x3Kernel.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLGAUSSIAN3X3KERNEL_H
+#define ARM_COMPUTE_CLGAUSSIAN3X3KERNEL_H
+
+#include "src/core/CL/ICLSimple2DKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the Gaussian 3x3 filter kernel.
+ *
+ */
+class CLGaussian3x3Kernel : public ICLSimple2DKernel
+{
+public:
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] input An input tensor. Data types supported: U8
+ * @param[out] output The output tensor. Data types supported: U8.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined);
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input An input tensor. Data types supported: U8
+ * @param[out] output The output tensor. Data types supported: U8.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined);
+
+ // Inherited methods overridden:
+ BorderSize border_size() const override;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLGAUSSIAN3X3KERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h"
+#include "src/core/CL/kernels/CLGaussian5x5Kernel.h"
#include <cstdint>
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLGAUSSIAN5X5KERNEL_H
+#define ARM_COMPUTE_CLGAUSSIAN5X5KERNEL_H
+
+#include "src/core/CL/kernels/CLConvolutionKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the kernel to run the horizontal pass of 5x5 Gaussian filter on a tensor. */
+class CLGaussian5x5HorKernel : public CLSeparableConvolution5x5HorKernel
+{
+public:
+ /** Initialise the kernel's source, destination and border.
+ *
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[out] output Destination tensor. Data types supported: S16.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined);
+ /** Initialise the kernel's source, destination and border.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[out] output Destination tensor. Data types supported: S16.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined);
+
+private:
+ //Make the configure method of the parent class private
+ using CLSeparableConvolution5x5HorKernel::configure;
+};
+
+/** Interface for the kernel to run the vertical pass of 5x5 Gaussian filter on a tensor. */
+class CLGaussian5x5VertKernel : public CLSeparableConvolution5x5VertKernel
+{
+public:
+ /** Initialise the kernel's source, destination and border.
+ *
+ * @param[in] input Input tensor(output of horizontal pass). Data types supported: S16.
+ * @param[out] output Destination tensor. Data types supported: U8.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined);
+ /** Initialise the kernel's source, destination and border.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Input tensor(output of horizontal pass). Data types supported: S16.
+ * @param[out] output Destination tensor. Data types supported: U8.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined);
+
+private:
+ //Make the configure method of the parent class private
+ using CLSeparableConvolution5x5VertKernel::configure;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLGAUSSIAN5X5KERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h"
+#include "src/core/CL/kernels/CLGaussianPyramidKernel.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLGAUSSIANPYRAMIDKERNEL_H
+#define ARM_COMPUTE_CLGAUSSIANPYRAMIDKERNEL_H
+
+#include "src/core/CL/ICLSimpleKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** OpenCL kernel to perform a Gaussian filter and half scaling across width (horizontal pass) */
+class CLGaussianPyramidHorKernel : public ICLSimpleKernel
+{
+public:
+ /** Default constructor */
+ CLGaussianPyramidHorKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGaussianPyramidHorKernel(const CLGaussianPyramidHorKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGaussianPyramidHorKernel &operator=(const CLGaussianPyramidHorKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLGaussianPyramidHorKernel(CLGaussianPyramidHorKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLGaussianPyramidHorKernel &operator=(CLGaussianPyramidHorKernel &&) = default;
+ /** Default destructor */
+ ~CLGaussianPyramidHorKernel() = default;
+
+ /** Initialise the kernel's source, destination and border mode.
+ *
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[out] output Destination tensor. Output should have half the input width. Data types supported: U16.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output);
+ /** Initialise the kernel's source, destination and border mode.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[out] output Destination tensor. Output should have half the input width. Data types supported: U16.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+ BorderSize border_size() const override;
+
+private:
+ int _l2_load_offset;
+};
+
+/** OpenCL kernel to perform a Gaussian filter and half scaling across height (vertical pass) */
+class CLGaussianPyramidVertKernel : public ICLSimpleKernel
+{
+public:
+ /** Default constructor */
+ CLGaussianPyramidVertKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGaussianPyramidVertKernel(const CLGaussianPyramidVertKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLGaussianPyramidVertKernel &operator=(const CLGaussianPyramidVertKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLGaussianPyramidVertKernel(CLGaussianPyramidVertKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLGaussianPyramidVertKernel &operator=(CLGaussianPyramidVertKernel &&) = default;
+ /** Default destructor */
+ ~CLGaussianPyramidVertKernel() = default;
+
+ /** Initialise the kernel's source, destination and border mode.
+ *
+ * @param[in] input Source tensor. Data types supported: U16.
+ * @param[out] output Destination tensor. Output should have half the input height. Data types supported: U8.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output);
+ /** Initialise the kernel's source, destination and border mode.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: U16.
+ * @param[out] output Destination tensor. Output should have half the input height. Data types supported: U8.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+ BorderSize border_size() const override;
+
+private:
+ int _t2_load_offset;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLGAUSSIANPYRAMIDKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGenerateProposalsLayerKernel.h"
+#include "src/core/CL/kernels/CLGenerateProposalsLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2019-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLGENERATEPROPOSALSLAYERKERNEL_H
+#define ARM_COMPUTE_CLGENERATEPROPOSALSLAYERKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for Compute All Anchors kernel */
+class CLComputeAllAnchorsKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLComputeAllAnchorsKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLComputeAllAnchorsKernel(const CLComputeAllAnchorsKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLComputeAllAnchorsKernel &operator=(const CLComputeAllAnchorsKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLComputeAllAnchorsKernel(CLComputeAllAnchorsKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLComputeAllAnchorsKernel &operator=(CLComputeAllAnchorsKernel &&) = default;
+ /** Default destructor */
+ ~CLComputeAllAnchorsKernel() = default;
+
+ /** Set the input and output tensors.
+ *
+ * @param[in] anchors Source tensor. Original set of anchors of size (4, A), where A is the number of anchors. Data types supported: QSYMM16/F16/F32
+ * @param[out] all_anchors Destination tensor. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input
+ * @param[in] info Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo
+ *
+ */
+ void configure(const ICLTensor *anchors, ICLTensor *all_anchors, const ComputeAnchorsInfo &info);
+ /** Set the input and output tensors.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] anchors Source tensor. Original set of anchors of size (4, A), where A is the number of anchors. Data types supported: QSYMM16/F16/F32
+ * @param[out] all_anchors Destination tensor. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input
+ * @param[in] info Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo
+ *
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *anchors, ICLTensor *all_anchors, const ComputeAnchorsInfo &info);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref CLComputeAllAnchorsKernel
+ *
+ * @param[in] anchors Source tensor info. Original set of anchors of size (4, A), where A is the number of anchors. Data types supported: QSYMM16/F16/F32
+ * @param[in] all_anchors Destination tensor info. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input
+ * @param[in] info Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo
+ *
+ * @return a Status
+ */
+ static Status validate(const ITensorInfo *anchors, const ITensorInfo *all_anchors, const ComputeAnchorsInfo &info);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_anchors;
+ ICLTensor *_all_anchors;
+};
+} // arm_compute
+#endif // ARM_COMPUTE_CLGENERATEPROSPOSALSLAYERKERNEL_H
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLHOGDescriptorKernel.h"
+#include "src/core/CL/kernels/CLHOGDescriptorKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLHOGDESCRIPTORKERNEL_H
+#define ARM_COMPUTE_CLHOGDESCRIPTORKERNEL_H
+
+#include "arm_compute/core/IHOG.h"
+#include "arm_compute/core/Size2D.h"
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** OpenCL kernel to perform HOG Orientation Binning */
+class CLHOGOrientationBinningKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLHOGOrientationBinningKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLHOGOrientationBinningKernel(const CLHOGOrientationBinningKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLHOGOrientationBinningKernel &operator=(const CLHOGOrientationBinningKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLHOGOrientationBinningKernel(CLHOGOrientationBinningKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLHOGOrientationBinningKernel &operator=(CLHOGOrientationBinningKernel &&) = default;
+ /** Default destructor */
+ ~CLHOGOrientationBinningKernel() = default;
+
+ /** Initialise the kernel's inputs, output and HOG's metadata
+ *
+ * @param[in] input_magnitude Input tensor which stores the magnitude of the gradient for each pixel. Data type supported: S16.
+ * @param[in] input_phase Input tensor which stores the phase of the gradient for each pixel. Data type supported: U8
+ * @param[out] output Output tensor which stores the local HOG for each cell. DataType supported: F32. Number of channels supported: equal to the number of histogram bins per cell
+ * @param[in] hog_info HOG's metadata
+ */
+ void configure(const ICLTensor *input_magnitude, const ICLTensor *input_phase, ICLTensor *output, const HOGInfo *hog_info);
+ /** Initialise the kernel's inputs, output and HOG's metadata
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input_magnitude Input tensor which stores the magnitude of the gradient for each pixel. Data type supported: S16.
+ * @param[in] input_phase Input tensor which stores the phase of the gradient for each pixel. Data type supported: U8
+ * @param[out] output Output tensor which stores the local HOG for each cell. DataType supported: F32. Number of channels supported: equal to the number of histogram bins per cell
+ * @param[in] hog_info HOG's metadata
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input_magnitude, const ICLTensor *input_phase, ICLTensor *output, const HOGInfo *hog_info);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input_magnitude;
+ const ICLTensor *_input_phase;
+ ICLTensor *_output;
+ Size2D _cell_size;
+};
+
+/** OpenCL kernel to perform HOG block normalization */
+class CLHOGBlockNormalizationKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLHOGBlockNormalizationKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLHOGBlockNormalizationKernel(const CLHOGBlockNormalizationKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLHOGBlockNormalizationKernel &operator=(const CLHOGBlockNormalizationKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLHOGBlockNormalizationKernel(CLHOGBlockNormalizationKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLHOGBlockNormalizationKernel &operator=(CLHOGBlockNormalizationKernel &&) = default;
+ /** Default destructor */
+ ~CLHOGBlockNormalizationKernel() = default;
+
+ /** Initialise the kernel's input, output and HOG's metadata
+ *
+ * @param[in] input Input tensor which stores the local HOG for each cell. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per cell
+ * @param[out] output Output tensor which stores the normalised blocks. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block
+ * @param[in] hog_info HOG's metadata
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, const HOGInfo *hog_info);
+ /** Initialise the kernel's input, output and HOG's metadata
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Input tensor which stores the local HOG for each cell. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per cell
+ * @param[out] output Output tensor which stores the normalised blocks. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block
+ * @param[in] hog_info HOG's metadata
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const HOGInfo *hog_info);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+ Size2D _num_cells_per_block_stride;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CLHOGDESCRIPTORKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLHOGDetectorKernel.h"
+#include "src/core/CL/kernels/CLHOGDetectorKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLHOGDETECTORKERNEL_H
+#define ARM_COMPUTE_CLHOGDETECTORKERNEL_H
+
+#include "arm_compute/core/CL/ICLArray.h"
+#include "arm_compute/core/CL/ICLHOG.h"
+#include "arm_compute/core/CL/OpenCL.h"
+#include "src/core/CL/ICLKernel.h"
+
+namespace cl
+{
+class Buffer;
+}
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** OpenCL kernel to perform HOG detector kernel using linear SVM */
+class CLHOGDetectorKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLHOGDetectorKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLHOGDetectorKernel(const CLHOGDetectorKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLHOGDetectorKernel &operator=(const CLHOGDetectorKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLHOGDetectorKernel(CLHOGDetectorKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLHOGDetectorKernel &operator=(CLHOGDetectorKernel &&) = default;
+ /** Default destructor */
+ ~CLHOGDetectorKernel() = default;
+
+ /** Initialise the kernel's input, HOG data-object, detection window, the stride of the detection window, the threshold and index of the object to detect
+ *
+ * @param[in] input Input tensor which stores the HOG descriptor obtained with @ref CLHOGOrientationBinningKernel. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block
+ * @param[in] hog HOG data object used by @ref CLHOGOrientationBinningKernel and @ref CLHOGBlockNormalizationKernel
+ * @param[out] detection_windows Array of @ref DetectionWindow. This array stores all the detected objects
+ * @param[in] num_detection_windows Number of detected objects
+ * @param[in] detection_window_stride Distance in pixels between 2 consecutive detection windows in x and y directions.
+ * It must be multiple of the hog->info()->block_stride()
+ * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane
+ * @param[in] idx_class (Optional) Index of the class used for evaluating which class the detection window belongs to
+ */
+ void configure(const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, cl::Buffer *num_detection_windows, const Size2D &detection_window_stride, float threshold = 0.0f,
+ uint16_t idx_class = 0);
+ /** Initialise the kernel's input, HOG data-object, detection window, the stride of the detection window, the threshold and index of the object to detect
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Input tensor which stores the HOG descriptor obtained with @ref CLHOGOrientationBinningKernel. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block
+ * @param[in] hog HOG data object used by @ref CLHOGOrientationBinningKernel and @ref CLHOGBlockNormalizationKernel
+ * @param[out] detection_windows Array of @ref DetectionWindow. This array stores all the detected objects
+ * @param[in] num_detection_windows Number of detected objects
+ * @param[in] detection_window_stride Distance in pixels between 2 consecutive detection windows in x and y directions.
+ * It must be multiple of the hog->info()->block_stride()
+ * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane
+ * @param[in] idx_class (Optional) Index of the class used for evaluating which class the detection window belongs to
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, cl::Buffer *num_detection_windows,
+ const Size2D &detection_window_stride, float threshold = 0.0f,
+ uint16_t idx_class = 0);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue);
+
+private:
+ const ICLTensor *_input;
+ ICLDetectionWindowArray *_detection_windows;
+ cl::Buffer *_num_detection_windows;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CLHOGDETECTORKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLHarrisCornersKernel.h"
+#include "src/core/CL/kernels/CLHarrisCornersKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLHARRISCORNERSKERNEL_H
+#define ARM_COMPUTE_CLHARRISCORNERSKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+class ICLTensor;
+using ICLImage = ICLTensor;
+
+/** Interface for the harris score kernel.
+ *
+ * @note The implementation supports 3, 5, and 7 for the block_size.
+ */
+class CLHarrisScoreKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLHarrisScoreKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLHarrisScoreKernel(const CLHarrisScoreKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLHarrisScoreKernel &operator=(const CLHarrisScoreKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLHarrisScoreKernel(CLHarrisScoreKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLHarrisScoreKernel &operator=(CLHarrisScoreKernel &&) = default;
+ /** Default destructor */
+ ~CLHarrisScoreKernel() = default;
+
+ /** Setup the kernel parameters
+ *
+ * @param[in] input1 Source image (gradient X). Data types supported S16, S32. (Must be the same as input2)
+ * @param[in] input2 Source image (gradient Y). Data types supported S16, S32. (Must be the same as input1)
+ * @param[out] output Destination image (harris score). Data types supported F32
+ * @param[in] block_size The block window size used to compute the Harris Corner score. Supports: 3, 5 and 7
+ * @param[in] norm_factor Normalization factor to use accordingly with the gradient size (Must be different from 0)
+ * @param[in] strength_thresh Minimum threshold with which to eliminate Harris Corner scores (computed using the normalized Sobel kernel).
+ * @param[in] sensitivity Sensitivity threshold k from the Harris-Stephens equation.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ICLImage *input1, const ICLImage *input2, ICLImage *output,
+ int32_t block_size, float norm_factor, float strength_thresh, float sensitivity,
+ bool border_undefined);
+ /** Setup the kernel parameters
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input1 Source image (gradient X). Data types supported S16, S32. (Must be the same as input2)
+ * @param[in] input2 Source image (gradient Y). Data types supported S16, S32. (Must be the same as input1)
+ * @param[out] output Destination image (harris score). Data types supported F32
+ * @param[in] block_size The block window size used to compute the Harris Corner score. Supports: 3, 5 and 7
+ * @param[in] norm_factor Normalization factor to use accordingly with the gradient size (Must be different from 0)
+ * @param[in] strength_thresh Minimum threshold with which to eliminate Harris Corner scores (computed using the normalized Sobel kernel).
+ * @param[in] sensitivity Sensitivity threshold k from the Harris-Stephens equation.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLImage *input1, const ICLImage *input2, ICLImage *output,
+ int32_t block_size, float norm_factor, float strength_thresh, float sensitivity,
+ bool border_undefined);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+ BorderSize border_size() const override;
+
+protected:
+ const ICLImage *_input1; /**< Source image - Gx component */
+ const ICLImage *_input2; /**< Source image - Gy component */
+ ICLImage *_output; /**< Source image - Harris score */
+ float _sensitivity; /**< Sensitivity value */
+ float _strength_thresh; /**< Threshold value */
+ float _norm_factor; /**< Normalization factor */
+ BorderSize _border_size; /**< Border size */
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CLHARRISCORNERSKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h"
+#include "src/core/CL/kernels/CLHeightConcatenateLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2019-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ARM_COMPUTE_CLHEIGHTCONCATENATELAYERKERNEL_H
+#define ARM_COMPUTE_CLHEIGHTCONCATENATELAYERKERNEL_H
+
+#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+/** Interface for the height concatenate kernel.
+ * The input tensor will be concatenated into the output tensor.
+ */
+class CLHeightConcatenateLayerKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLHeightConcatenateLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLHeightConcatenateLayerKernel(const CLHeightConcatenateLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLHeightConcatenateLayerKernel &operator=(const CLHeightConcatenateLayerKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLHeightConcatenateLayerKernel(CLHeightConcatenateLayerKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLHeightConcatenateLayerKernel &operator=(CLHeightConcatenateLayerKernel &&) = default;
+ /** Default destructor */
+ ~CLHeightConcatenateLayerKernel() = default;
+ /** Initialise the kernel's inputs and output
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Input tensor. Data types supported: All.
+ * @param[in] height_offset The starting offset on the Y axis for the output tensor.
+ * @param[out] output Output tensor. Data types supported: Same as @p input.
+ *
+ */
+ void configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int height_offset, ITensorInfo *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLHeightConcatenateLayerKernel
+ *
+ * @param[in] input Input tensor info. Data types supported: All.
+ * @param[in] height_offset The starting offset on the Y axis for the output tensor.
+ * @param[in] output Output tensor info. Data types supported: Same as @p input.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, unsigned int height_offset, const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ unsigned int _height_offset;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CLHEIGHTCONCATENATELAYERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLHistogramKernel.h"
+#include "src/core/CL/kernels/CLHistogramKernel.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLDistribution1D.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLHISTOGRAMKERNEL_H
+#define ARM_COMPUTE_CLHISTOGRAMKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLDistribution1D;
+class ICLTensor;
+using ICLImage = ICLTensor;
+
+/** Interface to run the histogram kernel. This kernel processes the part of image with width can be divided by 16.
+ * If the image width is not a multiple of 16, remaining pixels have to be processed with the @ref CLHistogramBorderKernel
+ */
+class CLHistogramKernel : public ICLKernel
+{
+public:
+ /** Constructor */
+ CLHistogramKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLHistogramKernel(const CLHistogramKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLHistogramKernel &operator=(const CLHistogramKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLHistogramKernel(CLHistogramKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLHistogramKernel &operator=(CLHistogramKernel &&) = default;
+ /** Initialise the kernel's input, output and border mode.
+ *
+ * @param[in] input Source image. Data types supported: U8.
+ * @param[out] output Destination distribution.
+ */
+ void configure(const ICLImage *input, ICLDistribution1D *output);
+ /** Initialise the kernel's input, output and border mode.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source image. Data types supported: U8.
+ * @param[out] output Destination distribution.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLImage *input, ICLDistribution1D *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLImage *_input;
+ ICLDistribution1D *_output;
+};
+
+/** Interface to run the histogram kernel to handle the leftover part of image
+ *
+ */
+class CLHistogramBorderKernel : public ICLKernel
+{
+public:
+ /** Constructor */
+ CLHistogramBorderKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLHistogramBorderKernel(const CLHistogramBorderKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLHistogramBorderKernel &operator=(const CLHistogramBorderKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLHistogramBorderKernel(CLHistogramBorderKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLHistogramBorderKernel &operator=(CLHistogramBorderKernel &&) = default;
+ /** Initialise the kernel's input, output and border mode.
+ *
+ * @param[in] input Source image. Data types supported: U8.
+ * @param[out] output Destination distribution.
+ */
+ void configure(const ICLImage *input, ICLDistribution1D *output);
+ /** Initialise the kernel's input, output and border mode.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source image. Data types supported: U8.
+ * @param[out] output Destination distribution.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLImage *input, ICLDistribution1D *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLImage *_input;
+ ICLDistribution1D *_output;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CLHISTOGRAMKERNEL_H*/
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLIm2ColKernel.h"
+#include "src/core/CL/kernels/CLIm2ColKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLIM2COLKERNEL_H
+#define ARM_COMPUTE_CLIM2COLKERNEL_H
+
+#include "arm_compute/core/Size2D.h"
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the im2col reshape kernel.
+ *
+ * Rearranges image blocks into columns. It is used to strip out each convolution block to a single column.
+ * It is used to transform a convolution to a plain matrix multiplication.
+ *
+ * For example taking into account the image below and assuming 3x3 image blocks with stride of 1 we have:
+ * @f[
+ * \left( \begin{array}{cccc}
+ * a00 & a01 & a02 & a03 \\
+ * a10 & a11 & a12 & a13 \\
+ * a20 & a21 & a22 & a23 \\
+ * a30 & a31 & a32 & a33 \\
+ * \end{array} \right)
+ * =
+ * \left( \begin{array}{ccccccccc}
+ * a00 & a01 & a02 & a10 & a11 & a12 & a20 & a21 & a22 \\
+ * a01 & a02 & a03 & a11 & a12 & a13 & a21 & a22 & a23 \\
+ * a10 & a11 & a12 & a20 & a21 & a22 & a30 & a31 & a32 \\
+ * a11 & a12 & a13 & a21 & a22 & a23 & a31 & a32 & a33 \\
+ * \end{array} \right)
+ * @f]
+ */
+class CLIm2ColKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLIm2ColKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLIm2ColKernel(const CLIm2ColKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLIm2ColKernel &operator=(const CLIm2ColKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLIm2ColKernel(CLIm2ColKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLIm2ColKernel &operator=(CLIm2ColKernel &&) = default;
+ /** Set the input and output of the kernel.
+ *
+ * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
+ * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
+ * @param[out] output The output tensor. First 2 lower dimensions represent a transform of each 3D input,
+ * while every dimension above represents a batch. Data types supported: Same as @p input
+ * @param[in] kernel_dims The kernel dimensions (width and height).
+ * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
+ * @param[in] has_bias In case biases are provided expands the matrix with 1.
+ * This is valid only for non-quantized inputs.
+ * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+ * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution.
+ * Number of groups other than 1 is only supported for NCHW data layout.
+ * Number of groups should be multiple to the number of channels.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation = Size2D(1U, 1U),
+ unsigned int num_groups = 1);
+ /** Set the input and output of the kernel.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
+ * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
+ * @param[out] output The output tensor. First 2 lower dimensions represent a transform of each 3D input,
+ * while every dimension above represents a batch. Data types supported: Same as @p input
+ * @param[in] kernel_dims The kernel dimensions (width and height).
+ * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
+ * @param[in] has_bias In case biases are provided expands the matrix with 1.
+ * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+ * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias,
+ const Size2D &dilation = Size2D(1U, 1U),
+ unsigned int num_groups = 1);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLIm2ColKernel
+ *
+ * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
+ * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
+ * @param[in] output The output tensor. First 2 lower dimensions represent a transform of each 3D input,
+ * while every dimension above represents a batch. Data types supported: Same as @p input
+ * @param[in] kernel_dims The kernel dimensions (width and height).
+ * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
+ * @param[in] has_bias In case biases are provided expands the matrix with 1.
+ * This is valid only for non-quantized inputs.
+ * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+ * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution.
+ * Number of groups other than 1 is only supported for NCHW data layout.
+ * Number of groups should be multiple to the number of channels.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation = Size2D(1U, 1U),
+ unsigned int num_groups = 1);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+public:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+ DataLayout _data_layout;
+ std::pair<unsigned int, unsigned int> _convolved_dims;
+ unsigned int _num_elems_processed_per_iteration;
+ Size2D _kernel_dims;
+ PadStrideInfo _conv_info;
+ unsigned int _num_groups;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLIM2COLKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernel.h"
+#include "src/core/CL/kernels/CLInstanceNormalizationLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2019-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYERKERNEL_H
+#define ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYERKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+#include "arm_compute/core/KernelDescriptors.h"
+
+namespace arm_compute
+{
+// Forward declarations
+class ICLTensor;
+
+/** Interface for performing an instance normalization */
+class CLInstanceNormalizationLayerKernel : public ICLKernel
+{
+public:
+ /** Constructor */
+ CLInstanceNormalizationLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLInstanceNormalizationLayerKernel(const CLInstanceNormalizationLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLInstanceNormalizationLayerKernel &operator=(const CLInstanceNormalizationLayerKernel &) = delete;
+ /** Default Move Constructor. */
+ CLInstanceNormalizationLayerKernel(CLInstanceNormalizationLayerKernel &&) = default;
+ /** Default move assignment operator */
+ CLInstanceNormalizationLayerKernel &operator=(CLInstanceNormalizationLayerKernel &&) = default;
+ /** Default destructor */
+ ~CLInstanceNormalizationLayerKernel() = default;
+
+ /** Set the input and output tensors.
+ *
+ * @param[in, out] input Source tensor. Data types supported: F16/F32. Data layout supported: NCHW, NHWC
+ * In case of @p output tensor = nullptr this tensor will store the result of the normalization.
+ * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input.
+ * @param[in] info Kernel meta-data descriptor
+ */
+ void configure(ICLTensor *input, ICLTensor *output, const InstanceNormalizationLayerKernelInfo &info);
+ /** Set the input and output tensors.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in, out] input Source tensor. Data types supported: F16/F32. Data layout supported: NCHW, NHWC
+ * In case of @p output tensor = nullptr this tensor will store the result of the normalization.
+ * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input.
+ * @param[in] info Kernel meta-data descriptor
+ */
+ void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const InstanceNormalizationLayerKernelInfo &info);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref CLInstanceNormalizationLayer.
+ *
+ * @param[in] input Source tensor info. Data types supported: F16/F32. Data layout supported: NHWC, NCHW
+ * @param[in] output Destination tensor info. Data types and data layouts supported: same as @p input.
+ * @param[in] info Kernel meta-data descriptor
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const InstanceNormalizationLayerKernelInfo &info);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ ICLTensor *_input;
+ ICLTensor *_output;
+ bool _run_in_place;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLINSTANCENORMALIZATIONLAYERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLIntegralImageKernel.h"
+#include "src/core/CL/kernels/CLIntegralImageKernel.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLINTEGRALIMAGEKERNEL_H
+#define ARM_COMPUTE_CLINTEGRALIMAGEKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+#include "src/core/CL/ICLSimple2DKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface to run the horizontal pass of the integral image kernel. */
+class CLIntegralImageHorKernel : public ICLSimple2DKernel
+{
+public:
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] input An input tensor. Data types supported: U8
+ * @param[out] output Destination tensor, Data types supported: U32.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output);
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input An input tensor. Data types supported: U8
+ * @param[out] output Destination tensor, Data types supported: U32.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
+};
+
+/** Interface to run the vertical pass of the integral image kernel. */
+class CLIntegralImageVertKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLIntegralImageVertKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLIntegralImageVertKernel(const CLIntegralImageVertKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLIntegralImageVertKernel &operator=(const CLIntegralImageVertKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLIntegralImageVertKernel(CLIntegralImageVertKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLIntegralImageVertKernel &operator=(CLIntegralImageVertKernel &&) = default;
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in,out] in_out The input/output tensor. Data types supported: U32
+ */
+ void configure(ICLTensor *in_out);
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in,out] in_out The input/output tensor. Data types supported: U32
+ */
+ void configure(const CLCompileContext &compile_context, ICLTensor *in_out);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ ICLTensor *_in_out;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLINTEGRALIMAGEKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h"
+#include "src/core/CL/kernels/CLL2NormalizeLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLL2NORMALIZELAYERKERNEL_H
+#define ARM_COMPUTE_CLL2NORMALIZELAYERKERNEL_H
+
+#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for performing a L2 normalize on a given axis given the square sum of it in this axis */
+class CLL2NormalizeLayerKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLL2NormalizeLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLL2NormalizeLayerKernel(const CLL2NormalizeLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLL2NormalizeLayerKernel &operator=(const CLL2NormalizeLayerKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLL2NormalizeLayerKernel(CLL2NormalizeLayerKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLL2NormalizeLayerKernel &operator=(CLL2NormalizeLayerKernel &&) = default;
+ /** Default destructor */
+ ~CLL2NormalizeLayerKernel() = default;
+
+ /** Set the input and output tensors.
+ *
+ * @param[in] input Source tensor. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC.
+ * @param[in] sum Sum values tensor. Data types supported: same as @p input.
+ * Sum will have the same number of dimensions as input.
+ * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input.
+ * Output will have the same number of dimensions as input.
+ * @param[in] axis Axis along which to reduce. Negative values wrap around. Maximum supported actual reduction axis : 2
+ * @param[in] epsilon Lower bound value for the normalization.
+ */
+ void configure(const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, int axis, float epsilon);
+ /** Set the input and output tensors.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC.
+ * @param[in] sum Sum values tensor. Data types supported: same as @p input.
+ * Sum will have the same number of dimensions as input.
+ * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input.
+ * Output will have the same number of dimensions as input.
+ * @param[in] axis Axis along which to reduce. Negative values wrap around. Maximum supported actual reduction axis : 2
+ * @param[in] epsilon Lower bound value for the normalization.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, int axis, float epsilon);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref CLL2NormalizeLayerKernel.
+ *
+ * @param[in] input Source tensor info. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC.
+ * @param[in] sum Sum values tensor info. Data types supported: same as @p input.
+ * Sum will have the same number of dimensions as input.
+ * @param[in] output Destination tensor info. Data types and data layouts supported: Same as @p input.
+ * Output will have the same number of dimensions as input.
+ * @param[in] axis Axis along which to reduce. Negative values wrap around. Maximum supported actual reduction axis : 2
+ * @param[in] epsilon Lower bound value for the normalization.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, int axis, float epsilon);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input;
+ const ICLTensor *_sum;
+ ICLTensor *_output;
+ unsigned int _actual_axis;
+ float _epsilon;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLL2NORMALIZELAYERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLLKTrackerKernel.h"
+#include "src/core/CL/kernels/CLLKTrackerKernel.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLArray.h"
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLLKTRACKERKERNEL_H
+#define ARM_COMPUTE_CLLKTRACKERKERNEL_H
+
+#include "arm_compute/core/CL/ICLArray.h"
+#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
+
+#include <cstddef>
+#include <cstdint>
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface to run the initialization step of LKTracker */
+class CLLKTrackerInitKernel : public ICLKernel
+{
+public:
+ /** Initialise the kernel input and output
+ *
+ * @param[in] old_points Pointer to the @ref ICLKeyPointArray storing old key points
+ * @param[in] new_points_estimates Pointer to the @ref ICLKeyPointArray storing new estimates key points
+ * @param[out] old_points_internal Pointer to the array of internal @ref CLLKInternalKeypoint old points
+ * @param[out] new_points_internal Pointer to the array of internal @ref CLLKInternalKeypoint new points
+ * @param[in] use_initial_estimate The flag to indicate whether the initial estimated position should be used
+ * @param[in] level The pyramid level
+ * @param[in] num_levels The number of pyramid levels
+ * @param[in] pyramid_scale Scale factor used for generating the pyramid
+ */
+ void configure(const ICLKeyPointArray *old_points, const ICLKeyPointArray *new_points_estimates,
+ ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal,
+ bool use_initial_estimate, size_t level, size_t num_levels, float pyramid_scale);
+ /** Initialise the kernel input and output
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] old_points Pointer to the @ref ICLKeyPointArray storing old key points
+ * @param[in] new_points_estimates Pointer to the @ref ICLKeyPointArray storing new estimates key points
+ * @param[out] old_points_internal Pointer to the array of internal @ref CLLKInternalKeypoint old points
+ * @param[out] new_points_internal Pointer to the array of internal @ref CLLKInternalKeypoint new points
+ * @param[in] use_initial_estimate The flag to indicate whether the initial estimated position should be used
+ * @param[in] level The pyramid level
+ * @param[in] num_levels The number of pyramid levels
+ * @param[in] pyramid_scale Scale factor used for generating the pyramid
+ */
+ void configure(const CLCompileContext &compile_context, const ICLKeyPointArray *old_points, const ICLKeyPointArray *new_points_estimates,
+ ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal,
+ bool use_initial_estimate, size_t level, size_t num_levels, float pyramid_scale);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+};
+
+/** Interface to run the finalize step of LKTracker, where it truncates the coordinates stored in new_points array */
+class CLLKTrackerFinalizeKernel : public ICLKernel
+{
+public:
+ /** Initialise the kernel input and output
+ *
+ * @param[in] new_points_internal Pointer to the array of internal @ref CLLKInternalKeypoint new points
+ * @param[out] new_points Pointer to the @ref ICLKeyPointArray storing new key points
+ */
+ void configure(ICLLKInternalKeypointArray *new_points_internal, ICLKeyPointArray *new_points);
+ /** Initialise the kernel input and output
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] new_points_internal Pointer to the array of internal @ref CLLKInternalKeypoint new points
+ * @param[out] new_points Pointer to the @ref ICLKeyPointArray storing new key points
+ */
+ void configure(const CLCompileContext &compile_context, ICLLKInternalKeypointArray *new_points_internal, ICLKeyPointArray *new_points);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+};
+
+/** Interface to run the first stage of LKTracker, where A11, A12, A22, min_eig, ival, ixval and iyval are computed */
+class CLLKTrackerStage0Kernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLLKTrackerStage0Kernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLLKTrackerStage0Kernel(const CLLKTrackerStage0Kernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLLKTrackerStage0Kernel &operator=(const CLLKTrackerStage0Kernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLLKTrackerStage0Kernel(CLLKTrackerStage0Kernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLLKTrackerStage0Kernel &operator=(CLLKTrackerStage0Kernel &&) = default;
+ /** Initialise the kernel input and output
+ *
+ * @param[in] old_input Pointer to the input old tensor. Data types supported: U8
+ * @param[in] old_scharr_gx Pointer to the input scharr X tensor. Data types supported: S16
+ * @param[in] old_scharr_gy Pointer to the input scharr Y tensor. Data types supported: S16
+ * @param[in] old_points_internal Pointer to the array of CLLKInternalKeypoint old points
+ * @param[in, out] new_points_internal Pointer to the array of CLLKInternalKeypoint new points
+ * @param[out] coeff_table Pointer to the array holding the Spatial Gradient coefficients
+ * @param[out] old_ival Pointer to the array holding internal values
+ * @param[in] window_dimension The size of the window on which to perform the algorithm
+ * @param[in] level The pyramid level
+ */
+ void configure(const ICLTensor *old_input, const ICLTensor *old_scharr_gx, const ICLTensor *old_scharr_gy,
+ ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal,
+ ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival,
+ size_t window_dimension, size_t level);
+ /** Initialise the kernel input and output
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] old_input Pointer to the input old tensor. Data types supported: U8
+ * @param[in] old_scharr_gx Pointer to the input scharr X tensor. Data types supported: S16
+ * @param[in] old_scharr_gy Pointer to the input scharr Y tensor. Data types supported: S16
+ * @param[in] old_points_internal Pointer to the array of CLLKInternalKeypoint old points
+ * @param[in, out] new_points_internal Pointer to the array of CLLKInternalKeypoint new points
+ * @param[out] coeff_table Pointer to the array holding the Spatial Gradient coefficients
+ * @param[out] old_ival Pointer to the array holding internal values
+ * @param[in] window_dimension The size of the window on which to perform the algorithm
+ * @param[in] level The pyramid level
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *old_input, const ICLTensor *old_scharr_gx, const ICLTensor *old_scharr_gy,
+ ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal,
+ ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival,
+ size_t window_dimension, size_t level);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_old_input;
+ const ICLTensor *_old_scharr_gx;
+ const ICLTensor *_old_scharr_gy;
+};
+
+/** Interface to run the second stage of LKTracker, where the motion vectors of the given points are computed */
+class CLLKTrackerStage1Kernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLLKTrackerStage1Kernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLLKTrackerStage1Kernel(const CLLKTrackerStage1Kernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLLKTrackerStage1Kernel &operator=(const CLLKTrackerStage1Kernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLLKTrackerStage1Kernel(CLLKTrackerStage1Kernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLLKTrackerStage1Kernel &operator=(CLLKTrackerStage1Kernel &&) = default;
+ /** Initialise the kernel input and output
+ *
+ * @param[in] new_input Pointer to the input new tensor. Data types supported: U8
+ * @param[in, out] new_points_internal Pointer to the array of CLLKInternalKeypoint for new points
+ * @param[in] coeff_table Pointer to the array holding the Spatial Gradient coefficients
+ * @param[in] old_ival Pointer to the array holding internal values
+ * @param[in] termination The criteria to terminate the search of each keypoint.
+ * @param[in] epsilon The error for terminating the algorithm
+ * @param[in] num_iterations The maximum number of iterations before terminating the algorithm
+ * @param[in] window_dimension The size of the window on which to perform the algorithm
+ * @param[in] level The pyramid level
+ */
+ void configure(const ICLTensor *new_input, ICLLKInternalKeypointArray *new_points_internal, ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival,
+ Termination termination, float epsilon, size_t num_iterations, size_t window_dimension, size_t level);
+ /** Initialise the kernel input and output
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] new_input Pointer to the input new tensor. Data types supported: U8
+ * @param[in, out] new_points_internal Pointer to the array of CLLKInternalKeypoint for new points
+ * @param[in] coeff_table Pointer to the array holding the Spatial Gradient coefficients
+ * @param[in] old_ival Pointer to the array holding internal values
+ * @param[in] termination The criteria to terminate the search of each keypoint.
+ * @param[in] epsilon The error for terminating the algorithm
+ * @param[in] num_iterations The maximum number of iterations before terminating the algorithm
+ * @param[in] window_dimension The size of the window on which to perform the algorithm
+ * @param[in] level The pyramid level
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *new_input, ICLLKInternalKeypointArray *new_points_internal, ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival,
+ Termination termination, float epsilon, size_t num_iterations, size_t window_dimension, size_t level);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_new_input;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLLKTRACKERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h"
+#include "src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/ICLTensor.h"
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLLOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H
+#define ARM_COMPUTE_CLLOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** OpenCL kernel to multiply each row of first tensor with low 2 dimensions of second tensor.
+ *
+ * @attention The second input tensor must have at least 2 dimensions (matrix)
+ *
+ */
+class CLLocallyConnectedMatrixMultiplyKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLLocallyConnectedMatrixMultiplyKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLLocallyConnectedMatrixMultiplyKernel(const CLLocallyConnectedMatrixMultiplyKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLLocallyConnectedMatrixMultiplyKernel &operator=(const CLLocallyConnectedMatrixMultiplyKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLLocallyConnectedMatrixMultiplyKernel(CLLocallyConnectedMatrixMultiplyKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLLocallyConnectedMatrixMultiplyKernel &operator=(CLLocallyConnectedMatrixMultiplyKernel &&) = default;
+ /** Initialise the kernel's input, output and alpha
+ *
+ * @param[in] input0 First input tensor. Data types supported: F32
+ * @param[in] input1 Second input tensor. Data type supported: same as @p input0
+ * @param[out] output Output tensor to store the result. Data type supported: same as @p input0
+ */
+ void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output);
+ /** Initialise the kernel's input, output and alpha
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input0 First input tensor. Data types supported: F32
+ * @param[in] input1 Second input tensor. Data type supported: same as @p input0
+ * @param[out] output Output tensor to store the result. Data type supported: same as @p input0
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLLocallyConnectedMatrixMultiplyKernel
+ *
+ * @param[in] input0 First input tensor info. Data types supported: F32
+ * @param[in] input1 Second input tensor info. Data type supported: same as @p input0
+ * @param[in] output Output tensor info. Data type supported: same as @p input0
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input0;
+ const ICLTensor *_input1;
+ ICLTensor *_output;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CLLOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h"
+#include "src/core/CL/kernels/CLMagnitudePhaseKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLMAGNITUDEPHASEKERNEL_H
+#define ARM_COMPUTE_CLMAGNITUDEPHASEKERNEL_H
+
+#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Template interface for the kernel to compute magnitude and phase.
+ *
+ */
+class CLMagnitudePhaseKernel : public ICLKernel
+{
+public:
+ /** Default constructor. */
+ CLMagnitudePhaseKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLMagnitudePhaseKernel(const CLMagnitudePhaseKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLMagnitudePhaseKernel &operator=(const CLMagnitudePhaseKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLMagnitudePhaseKernel(CLMagnitudePhaseKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLMagnitudePhaseKernel &operator=(CLMagnitudePhaseKernel &&) = default;
+ /** Initialise the kernel's input, output.
+ *
+ * @note At least one of output1 or output2 must be set.
+ *
+ * @param[in] gx The input gradient X tensor. Data types supported: S16/S32.
+ * @param[in] gy The input gradient Y tensor. Data types supported: S16/S32.
+ * @param[out] magnitude (Optional) The output tensor - Magnitude. Data types supported: S16/S32.
+ * @param[out] phase (Optional) The output tensor - Phase. Data types supported: U8.
+ * @param[in] mag_type (Optional) Magnitude calculation type. Default: L2NORM.
+ * @param[in] phase_type (Optional) Phase calculation type. Default: SIGNED.
+ */
+ void configure(const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase,
+ MagnitudeType mag_type = MagnitudeType::L2NORM, PhaseType phase_type = PhaseType::SIGNED);
+ /** Initialise the kernel's input, output.
+ *
+ * @note At least one of output1 or output2 must be set.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] gx The input gradient X tensor. Data types supported: S16/S32.
+ * @param[in] gy The input gradient Y tensor. Data types supported: S16/S32.
+ * @param[out] magnitude (Optional) The output tensor - Magnitude. Data types supported: S16/S32.
+ * @param[out] phase (Optional) The output tensor - Phase. Data types supported: U8.
+ * @param[in] mag_type (Optional) Magnitude calculation type. Default: L2NORM.
+ * @param[in] phase_type (Optional) Phase calculation type. Default: SIGNED.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase,
+ MagnitudeType mag_type = MagnitudeType::L2NORM, PhaseType phase_type = PhaseType::SIGNED);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_gx; /**< Input gradient X. */
+ const ICLTensor *_gy; /**< Input gradient Y. */
+ ICLTensor *_magnitude; /**< Output - Magnitude. */
+ ICLTensor *_phase; /**< Output - Phase. */
+ bool _run_mag; /**< Calculate magnitude ? */
+ bool _run_phase; /**< Calculate phase ? */
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CLMAGNITUDEPHASEKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLMaxUnpoolingLayerKernel.h"
+#include "src/core/CL/kernels/CLMaxUnpoolingLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLMAXUNPOOLINGLAYERKERNEL_H
+#define ARM_COMPUTE_CLMAXUNPOOLINGLAYERKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the pooling layer kernel */
+class CLMaxUnpoolingLayerKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLMaxUnpoolingLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLMaxUnpoolingLayerKernel(const CLMaxUnpoolingLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLMaxUnpoolingLayerKernel &operator=(const CLMaxUnpoolingLayerKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLMaxUnpoolingLayerKernel(CLMaxUnpoolingLayerKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLMaxUnpoolingLayerKernel &operator=(CLMaxUnpoolingLayerKernel &&) = default;
+ /** Default destructor */
+ ~CLMaxUnpoolingLayerKernel() = default;
+ /** Set the input and output tensors.
+ *
+ * @note Output shape must be equal to the shape of the original input to pool.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] indices Tensor containing the offset to store the input elements in the output tensor.
+ * @ref CLPoolingLayerKernel with indices should precede this function in order to
+ * properly reconstruct the output tensor.
+ * The tensor shape of this tensor has to be equal to the input tensor shape. Data type supported: U32.
+ * @param[out] output Destination tensor. Data types supported: Same as @p input.
+ * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, const PoolingLayerInfo &pool_info);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLMaxUnpoolingLayerKernel
+ *
+ * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] output Destination tensor info. Data types supported: Same as @p input.
+ * @param[in] indices TensorInfo associated to the tensor containing the offset to store the input elements in the output tensor.
+ * @ref CLPoolingLayerKernel with indices should precede this function in order to
+ * properly reconstruct the output tensor.
+ * The tensor shape of this tensor has to be equal to the input tensor shape. Data type supported: U32.
+ * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, const PoolingLayerInfo &pool_info);
+
+ // Inherited methods overridden
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+ const ICLTensor *_indices;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLMAXUNPOOLINGLAYERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLMeanStdDevKernel.h"
+#include "src/core/CL/kernels/CLMeanStdDevKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLMEANSTDDEVKERNEL_H
+#define ARM_COMPUTE_CLMEANSTDDEVKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+namespace cl
+{
+class Buffer;
+}
+
+namespace arm_compute
+{
+class ICLTensor;
+using ICLImage = ICLTensor;
+
+/** Interface for the kernel to calculate mean and standard deviation of input image pixels. */
+class CLMeanStdDevKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLMeanStdDevKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLMeanStdDevKernel(const CLMeanStdDevKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLMeanStdDevKernel &operator=(const CLMeanStdDevKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLMeanStdDevKernel(CLMeanStdDevKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLMeanStdDevKernel &operator=(CLMeanStdDevKernel &&) = default;
+ /** Initialise the kernel's input and outputs.
+ *
+ * @param[in] input Input image. Data types supported: U8.
+ * @param[out] mean Input average pixel value.
+ * @param[out] global_sum Keeps global sum of pixel values (Buffer size: 1 cl_ulong).
+ * @param[out] stddev (Optional) Output standard deviation of pixel values.
+ * @param[out] global_sum_squared (Optional if stddev is not set, required if stddev is set) Keeps global sum of squared pixel values (Buffer size: 1 cl_ulong).
+ */
+ void configure(const ICLImage *input, float *mean, cl::Buffer *global_sum, float *stddev = nullptr, cl::Buffer *global_sum_squared = nullptr);
+ /** Initialise the kernel's input and outputs.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Input image. Data types supported: U8.
+ * @param[out] mean Input average pixel value.
+ * @param[out] global_sum Keeps global sum of pixel values (Buffer size: 1 cl_ulong).
+ * @param[out] stddev (Optional) Output standard deviation of pixel values.
+ * @param[out] global_sum_squared (Optional if stddev is not set, required if stddev is set) Keeps global sum of squared pixel values (Buffer size: 1 cl_ulong).
+ */
+ void configure(const CLCompileContext &compile_context, const ICLImage *input, float *mean, cl::Buffer *global_sum, float *stddev = nullptr, cl::Buffer *global_sum_squared = nullptr);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLMeanStdDevKernel.
+ *
+ * @param[in] input Input image info. Data types supported: U8.
+ * @param[in] mean Input average pixel value.
+ * @param[in] global_sum Keeps global sum of pixel values.
+ * @param[in] stddev (Optional) Output standard deviation of pixel values.
+ * @param[in] global_sum_squared (Optional if stddev is not set, required if stddev is set) Keeps global sum of squared pixel values.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, float *mean, cl::Buffer *global_sum, float *stddev = nullptr, cl::Buffer *global_sum_squared = nullptr);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+ BorderSize border_size() const override;
+
+private:
+ const ICLImage *_input;
+ float *_mean;
+ float *_stddev;
+ cl::Buffer *_global_sum;
+ cl::Buffer *_global_sum_squared;
+ BorderSize _border_size;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CLMEANSTDDEVKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLMeanStdDevNormalizationKernel.h"
+#include "src/core/CL/kernels/CLMeanStdDevNormalizationKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2019-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLMEANSTDDEVNORMALIZATIONKERNEL_H
+#define ARM_COMPUTE_CLMEANSTDDEVNORMALIZATIONKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the kernel to normalize the input 2D tensor across the first dimension with respect to mean and standard deviation of the same dimension. */
+class CLMeanStdDevNormalizationKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLMeanStdDevNormalizationKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLMeanStdDevNormalizationKernel(const CLMeanStdDevNormalizationKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLMeanStdDevNormalizationKernel &operator=(const CLMeanStdDevNormalizationKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLMeanStdDevNormalizationKernel(CLMeanStdDevNormalizationKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLMeanStdDevNormalizationKernel &operator=(CLMeanStdDevNormalizationKernel &&) = default;
+ /** Default destructor */
+ ~CLMeanStdDevNormalizationKernel() = default;
+ /** Initialise the kernel's input and outputs.
+ *
+ * @note If the output tensor is a nullptr, the normalization will be performed in-place.
+ *
+ * @param[in, out] input Source tensor with 2 dimensions. In case of @p output tensor = nullptr,
+ * this tensor will store the result of the normalization. Data types supported: F16/F32.
+ * @param[out] output (Optional) Destination tensor. It can be nullptr in case of in-place computation. Data type supported: same as @p input
+ * @param[in] epsilon (Optional) Small float to avoid division by zero in case of zero standard deviation. Defaults to 1e-8.
+ */
+ void configure(ICLTensor *input, ICLTensor *output = nullptr, float epsilon = 1e-8f);
+ /** Initialise the kernel's input and outputs.
+ *
+ * @note If the output tensor is a nullptr, the normalization will be performed in-place.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in, out] input Source tensor with 2 dimensions. In case of @p output tensor = nullptr,
+ * this tensor will store the result of the normalization. Data types supported: F16/F32.
+ * @param[out] output (Optional) Destination tensor. It can be nullptr in case of in-place computation. Data type supported: same as @p input
+ * @param[in] epsilon (Optional) Small float to avoid division by zero in case of zero standard deviation. Defaults to 1e-8.
+ */
+ void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output = nullptr, float epsilon = 1e-8f);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLMeanStdDevNormalizationKernel
+ *
+ * @param[in] input Source tensor info with 2 dimensions. In case of @p output tensor info = nullptr,
+ * this tensor will store the result of the normalization. Data types supported: F16/F32.
+ * @param[in] output (Optional) Destination tensor info. It can be nullptr in case of in-place computation. Data type supported: same as @p input
+ * @param[in] epsilon (Optional) Small float to avoid division by zero in case of zero standard deviation. Defaults to 1e-8.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output = nullptr, float epsilon = 1e-8f);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ ICLTensor *_input;
+ ICLTensor *_output;
+ bool _run_in_place;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CLMEANSTDDEVNORMALIZATIONKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLMedian3x3Kernel.h"
+#include "src/core/CL/kernels/CLMedian3x3Kernel.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLMEDIAN3X3KERNEL_H
+#define ARM_COMPUTE_CLMEDIAN3X3KERNEL_H
+
+#include "src/core/CL/ICLSimple2DKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the median 3x3 filter kernel.
+ *
+ */
+class CLMedian3x3Kernel : public ICLSimple2DKernel
+{
+public:
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] input An input tensor. Data types supported: U8
+ * @param[out] output The output tensor. Data types supported: U8.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined);
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input An input tensor. Data types supported: U8
+ * @param[out] output The output tensor. Data types supported: U8.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined);
+
+ // Inherited methods overridden:
+ BorderSize border_size() const override;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLMEDIAN3X3KERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLMemsetKernel.h"
+#include "src/core/CL/kernels/CLMemsetKernel.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "src/core/helpers/WindowHelpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLMEMSETKERNEL_H
+#define ARM_COMPUTE_CLMEMSETKERNEL_H
+
+#include "arm_compute/core/PixelValue.h"
+#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for filling the planes of a tensor */
+class CLMemsetKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLMemsetKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLMemsetKernel(const CLMemsetKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLMemsetKernel &operator=(const CLMemsetKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLMemsetKernel(CLMemsetKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLMemsetKernel &operator=(CLMemsetKernel &&) = default;
+ /** Default destructor */
+ ~CLMemsetKernel() = default;
+
+ /** Initialise the kernel's tensor and filling value
+ *
+ * @param[in,out] tensor Input tensor to fill. Supported data types: All.
+ * @param[in] constant_value The value used to fill the planes of the tensor
+ * @param[in] window Window to be used in case setting only part of a tensor. Default is nullptr.
+ */
+ void configure(ICLTensor *tensor, const PixelValue &constant_value, Window *window = nullptr);
+ /** Initialise the kernel's tensor and filling value
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in,out] tensor Input tensor to fill. Supported data types: All.
+ * @param[in] constant_value The value used to fill the planes of the tensor
+ * @param[in] window Window to be used in case setting only part of a tensor. Default is nullptr.
+ */
+ void configure(const CLCompileContext &compile_context, ICLTensor *tensor, const PixelValue &constant_value, Window *window = nullptr);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLMemsetKernel
+ *
+ * @param[in] tensor Source tensor info. Data types supported: All.
+ * @param[in] constant_value The value used to fill the planes of the tensor
+ * @param[in] window Window to be used in case setting only part of a tensor. Default is nullptr.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *tensor, const PixelValue &constant_value, Window *window = nullptr);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ ICLTensor *_tensor;
+ Window _full_window;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLMEMSETRKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLMinMaxLayerKernel.h"
+#include "src/core/CL/kernels/CLMinMaxLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLMINMAXLAYERKERNEL_H
+#define ARM_COMPUTE_CLMINMAXLAYERKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the kernel to perform min max search on a 3D tensor.
+ */
+class CLMinMaxLayerKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLMinMaxLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLMinMaxLayerKernel(const CLMinMaxLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLMinMaxLayerKernel &operator=(const CLMinMaxLayerKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLMinMaxLayerKernel(CLMinMaxLayerKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLMinMaxLayerKernel &operator=(CLMinMaxLayerKernel &&) = default;
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] input Input tensor with at least 3 dimensions. The dimensions over the third will be interpreted as batches.Data types supported: F32.
+ * @param[out] output Output tensor with shape [2, batches, ...] which stores the minimum and maximum values for each 3D input tensor.
+ * The dimensions over the second must match the batched dimensions of the input tensor. Data types supported: F32.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output);
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Input tensor with at least 3 dimensions. The dimensions over the third will be interpreted as batches.Data types supported: F32.
+ * @param[out] output Output tensor with shape [2, batches, ...] which stores the minimum and maximum values for each 3D input tensor.
+ * The dimensions over the second must match the batched dimensions of the input tensor. Data types supported: F32.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLMinMaxLayerKernel
+ *
+ * @param[in] input Input tensor info. Data types supported: F32.
+ * @param[in] output Output tensor info with shape [2, batches, ...] which stores the minimum and maximum values for each 3D input tensor.
+ * The dimensions over the second must match the batched dimensions of the input tensor. Data types supported: F32.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output);
+
+ /** Resets global minimum and maximum
+ *
+ * @param[in,out] queue Command queue on which to map and unmap the min_max tensor
+ */
+ void reset(cl::CommandQueue &queue);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLMINMAXLAYERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLMinMaxLocationKernel.h"
+#include "src/core/CL/kernels/CLMinMaxLocationKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLMINMAXLOCATIONKERNEL_H
+#define ARM_COMPUTE_CLMINMAXLOCATIONKERNEL_H
+
+#include "arm_compute/core/CL/ICLArray.h"
+#include "src/core/CL/ICLKernel.h"
+
+#include <array>
+
+namespace arm_compute
+{
+class ICLTensor;
+using ICLImage = ICLTensor;
+
+/** Interface for the kernel to perform min max search on an image.
+ */
+class CLMinMaxKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLMinMaxKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLMinMaxKernel(const CLMinMaxKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLMinMaxKernel &operator=(const CLMinMaxKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLMinMaxKernel(CLMinMaxKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLMinMaxKernel &operator=(CLMinMaxKernel &&) = default;
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] input Input Image. Data types supported: U8/S16/F32.
+ * @param[out] min_max Buffer of 2 elements to store the min value at position 0 and the max value at position 1. Data type supported: S32 if input type is U8/S16, F32 if input type is F32.
+ */
+ void configure(const ICLImage *input, cl::Buffer *min_max);
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Input Image. Data types supported: U8/S16/F32.
+ * @param[out] min_max Buffer of 2 elements to store the min value at position 0 and the max value at position 1. Data type supported: S32 if input type is U8/S16, F32 if input type is F32.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLImage *input, cl::Buffer *min_max);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input; /**< Input image. */
+ cl::Buffer *_min_max; /**< Minimum/maximum value. */
+ std::array<int, 2> _data_type_max_min; /**< Maximum and minimum data type value respectively. */
+};
+
+/** Interface for the kernel to find min max locations of an image.
+ */
+class CLMinMaxLocationKernel : public ICLKernel
+{
+public:
+ /** Constructor */
+ CLMinMaxLocationKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLMinMaxLocationKernel(const CLMinMaxLocationKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLMinMaxLocationKernel &operator=(const CLMinMaxLocationKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLMinMaxLocationKernel(CLMinMaxLocationKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLMinMaxLocationKernel &operator=(CLMinMaxLocationKernel &&) = default;
+ /** Initialise the kernel's input and outputs.
+ *
+ * @note When locations of min and max occurrences are requested, the reported number of locations is limited to the given array size.
+ *
+ * @param[in] input Input image. Data types supported: U8/S16/F32.
+ * @param[out] min_max Buffer of 2 elements to store the min value at position 0 and the max value at position 1. Data type supported: S32 if input type is U8/S16, F32 if input type is F32.
+ * @param[out] min_max_count Buffer of 2 elements to store the min value occurrences at position 0 and the max value occurrences at position 1. Data type supported: S32
+ * @param[out] min_loc (Optional) Array of Coordinates2D used to store minimum value locations.
+ * @param[out] max_loc (Optional) Array of Coordinates2D used to store maximum value locations.
+ */
+ void configure(const ICLImage *input, cl::Buffer *min_max, cl::Buffer *min_max_count,
+ ICLCoordinates2DArray *min_loc = nullptr, ICLCoordinates2DArray *max_loc = nullptr);
+ /** Initialise the kernel's input and outputs.
+ *
+ * @note When locations of min and max occurrences are requested, the reported number of locations is limited to the given array size.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Input image. Data types supported: U8/S16/F32.
+ * @param[out] min_max Buffer of 2 elements to store the min value at position 0 and the max value at position 1. Data type supported: S32 if input type is U8/S16, F32 if input type is F32.
+ * @param[out] min_max_count Buffer of 2 elements to store the min value occurrences at position 0 and the max value occurrences at position 1. Data type supported: S32
+ * @param[out] min_loc (Optional) Array of Coordinates2D used to store minimum value locations.
+ * @param[out] max_loc (Optional) Array of Coordinates2D used to store maximum value locations.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLImage *input, cl::Buffer *min_max, cl::Buffer *min_max_count,
+ ICLCoordinates2DArray *min_loc = nullptr, ICLCoordinates2DArray *max_loc = nullptr);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLImage *_input; /**< Input image. */
+ cl::Buffer *_min_max_count; /**< Minimum/maximum value occurrences. */
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLMINMAXLOCATIONKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLNonLinearFilterKernel.h"
+#include "src/core/CL/kernels/CLNonLinearFilterKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLNONLINEARFILTERKERNEL_H
+#define ARM_COMPUTE_CLNONLINEARFILTERKERNEL_H
+
+#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLSimple2DKernel.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the kernel to apply a non-linear filter */
+class CLNonLinearFilterKernel : public ICLSimple2DKernel
+{
+public:
+ /** Default constructor */
+ CLNonLinearFilterKernel();
+ /** Set the source, destination and border mode of the kernel
+ *
+ * @param[in] input Source tensor. Data types supported: U8
+ * @param[out] output Destination tensor. Data types supported: U8
+ * @param[in] function Non linear function to perform
+ * @param[in] mask_size Mask size. Supported sizes: 3, 5
+ * @param[in] pattern Mask pattern
+ * @param[in] mask The given mask. Will be used only if pattern is specified to PATTERN_OTHER
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, NonLinearFilterFunction function,
+ unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask,
+ bool border_undefined);
+ /** Set the source, destination and border mode of the kernel
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: U8
+ * @param[out] output Destination tensor. Data types supported: U8
+ * @param[in] function Non linear function to perform
+ * @param[in] mask_size Mask size. Supported sizes: 3, 5
+ * @param[in] pattern Mask pattern
+ * @param[in] mask The given mask. Will be used only if pattern is specified to PATTERN_OTHER
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, NonLinearFilterFunction function,
+ unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask,
+ bool border_undefined);
+
+ // Inherited methods overridden:
+ BorderSize border_size() const override;
+
+private:
+ BorderSize _border_size; /**< Border size */
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLNONLINEARFILTERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h"
+#include "src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLNONMAXIMASUPPRESSION3x3KERNEL_H
+#define ARM_COMPUTE_CLNONMAXIMASUPPRESSION3x3KERNEL_H
+
+#include "src/core/CL/ICLSimple2DKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface to perform Non-Maxima suppression over a 3x3 window using OpenCL
+ *
+ * @note Used by @ref CLFastCorners and @ref CLHarrisCorners
+ */
+class CLNonMaximaSuppression3x3Kernel : public ICLSimple2DKernel
+{
+public:
+ /** Initialise the kernel's sources, destinations and border mode.
+ *
+ * @param[in] input Source tensor. Data types supported: U8, F32. (Must be the same as the output tensor)
+ * @param[out] output Destination tensor. Data types supported: U8, F32. (Must be the same as the input tensor)
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined);
+ /** Initialise the kernel's sources, destinations and border mode.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: U8, F32. (Must be the same as the output tensor)
+ * @param[out] output Destination tensor. Data types supported: U8, F32. (Must be the same as the input tensor)
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined);
+
+ // Inherited methods overridden:
+ BorderSize border_size() const override;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CLNONMAXIMASUPPRESSION3x3KERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLNormalizationLayerKernel.h"
+#include "src/core/CL/kernels/CLNormalizationLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLNORMALIZATIONLAYERKERNEL_H
+#define ARM_COMPUTE_CLNORMALIZATIONLAYERKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the normalization layer kernel.
+ */
+class CLNormalizationLayerKernel : public ICLKernel
+{
+public:
+ /** Constructor */
+ CLNormalizationLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLNormalizationLayerKernel(const CLNormalizationLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLNormalizationLayerKernel &operator=(const CLNormalizationLayerKernel &) = delete;
+ /** Default Move Constructor. */
+ CLNormalizationLayerKernel(CLNormalizationLayerKernel &&) = default;
+ /** Default move assignment operator */
+ CLNormalizationLayerKernel &operator=(CLNormalizationLayerKernel &&) = default;
+ /** Set the input and output tensors.
+ *
+ * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM],
+ * and an optional 4th dimension for batch of inputs. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC.
+ * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data types supported: same as @p input.
+ * Data layouts supported: same as @p input.
+ * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, NormalizationLayerInfo norm_info);
+ /** Set the input and output tensors.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM],
+ * and an optional 4th dimension for batch of inputs. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC.
+ * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data types supported: same as @p input.
+ * Data layouts supported: same as @p input.
+ * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, NormalizationLayerInfo norm_info);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLNormalizationLayerKernel
+ *
+ * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM],
+ * and an optional 4th dimension for batch of inputs. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC.
+ * @param[in] output Destination tensor. Output will have the same number of dimensions as input. Data types supported: same as @p input.
+ * Data layouts supported: same as @p input.
+ * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, NormalizationLayerInfo norm_info);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+ BorderSize border_size() const override;
+
+private:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+ BorderSize _border_size;
+ bool _is_norm_across_width;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLNORMALIZATIONLAYERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h"
+#include "src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLNORMALIZEPLANARYUVLAYERKERNEL_H
+#define ARM_COMPUTE_CLNORMALIZEPLANARYUVLAYERKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the NormalizePlanarYUV layer kernel. */
+class CLNormalizePlanarYUVLayerKernel : public ICLKernel
+{
+public:
+ /** Constructor */
+ CLNormalizePlanarYUVLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLNormalizePlanarYUVLayerKernel(const CLNormalizePlanarYUVLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLNormalizePlanarYUVLayerKernel &operator=(const CLNormalizePlanarYUVLayerKernel &) = delete;
+ /** Default Move Constructor. */
+ CLNormalizePlanarYUVLayerKernel(CLNormalizePlanarYUVLayerKernel &&) = default;
+ /** Default move assignment operator */
+ CLNormalizePlanarYUVLayerKernel &operator=(CLNormalizePlanarYUVLayerKernel &&) = default;
+ /** Default destructor */
+ ~CLNormalizePlanarYUVLayerKernel() = default;
+
+ /** Set the input and output tensors.
+ *
+ * @param[in] input Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, channels].
+ * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[out] output Destination tensor. Data type supported: same as @p input
+ * @param[in] mean Mean values tensor. 1 dimension with size equal to the number of input channels. Data types supported: same as @p input
+ * @param[in] std Standard deviation values tensor. 1 dimension with size equal to the number of input channels.
+ * Data types supported: same as @p input
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *std);
+ /** Set the input and output tensors.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, channels].
+ * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[out] output Destination tensor. Data type supported: same as @p input
+ * @param[in] mean Mean values tensor. 1 dimension with size equal to the number of input channels. Data types supported: same as @p input
+ * @param[in] std Standard deviation values tensor. 1 dimension with size equal to the number of input channels.
+ * Data types supported: same as @p input
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *std);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLNormalizePlanarYUVLayerKernel
+ *
+ * @param[in] input Source tensor info. 3 lower dimensions represent a single input with dimensions [width, height, channels].
+ * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[out] output Destination tensor info. Data type supported: same as @p input
+ * @param[in] mean Mean values tensor info. 1 dimension with size equal to the number of input channels. Data types supported: same as @p input
+ * @param[in] std Standard deviation values tensor info. 1 dimension with size equal to the number of input channels.
+ * Data types supported: same as @p input
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *mean, const ITensorInfo *std);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+ const ICLTensor *_mean;
+ const ICLTensor *_std;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLNORMALIZEPLANARYUVLAYERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLPadLayerKernel.h"
+#include "src/core/CL/kernels/CLPadLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/ICLTensor.h"
--- /dev/null
+/*
+ * Copyright (c) 2019-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLPADLAYERKERNEL_H
+#define ARM_COMPUTE_CLPADLAYERKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the PadLayer function. */
+class CLPadLayerKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLPadLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLPadLayerKernel(const CLPadLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLPadLayerKernel &operator=(const CLPadLayerKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLPadLayerKernel(CLPadLayerKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLPadLayerKernel &operator=(CLPadLayerKernel &&) = default;
+ /** Default destructor */
+ ~CLPadLayerKernel() = default;
+ /** Set the input and output tensor.
+ *
+ * @param[in] input Source tensor. Data types supported: All.
+ * @param[out] output Output tensor. Data type supported: same as @p input
+ * @param[in] padding The padding for each spatial dimension of the input tensor. The pair padding[i]
+ * specifies the front and the end padding in the i-th dimension.
+ * @param[in] constant_value (Optional) Constant value to be used for the padding.
+ * @param[in] mode (Optional) Controls whether the padding should be filled with @p constant_value using CONSTANT,
+ * or reflect the input, either including the border values (SYMMETRIC) or not (REFLECT).
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, const PaddingList &padding, PixelValue constant_value = PixelValue(), PaddingMode mode = PaddingMode::CONSTANT);
+ /** Set the input and output tensor.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: All.
+ * @param[out] output Output tensor. Data type supported: same as @p input
+ * @param[in] padding The padding for each spatial dimension of the input tensor. The pair padding[i]
+ * specifies the front and the end padding in the i-th dimension.
+ * @param[in] constant_value (Optional) Constant value to be used for the padding.
+ * @param[in] mode (Optional) Controls whether the padding should be filled with @p constant_value using CONSTANT,
+ * or reflect the input, either including the border values (SYMMETRIC) or not (REFLECT).
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PaddingList &padding, PixelValue constant_value = PixelValue(),
+ PaddingMode mode = PaddingMode::CONSTANT);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLPadLayerKernel
+ *
+ * @param[in] input Source tensor info. Data types supported: All.
+ * @param[in] output Output tensor info. Data type supported: same as @p input
+ * @param[in] padding The padding for each spatial dimension of the input tensor. The pair padding[i]
+ * specifies the front and the end padding in the i-th dimension.
+ * @param[in] constant_value (Optional) Constant value to be used for the padding.
+ * @param[in] mode (Optional) Controls whether the padding should be filled with @p constant_value using CONSTANT,
+ * or reflect the input, either including the border values (SYMMETRIC) or not (REFLECT).
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PaddingList &padding, PixelValue constant_value = PixelValue(), PaddingMode mode = PaddingMode::CONSTANT);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+ int _input_start_x;
+ int _input_start_y;
+ bool _4d_enabled;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLPADLAYERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLPermuteKernel.h"
+#include "src/core/CL/kernels/CLPermuteKernel.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "src/core/helpers/AutoConfiguration.h"
--- /dev/null
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLPERMUTEKERNEL_H
+#define ARM_COMPUTE_CLPERMUTEKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** OpenCL kernel to perform tensor permutation.
+ *
+ * Permutes given a permutation vector
+ */
+class CLPermuteKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLPermuteKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLPermuteKernel(const CLPermuteKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLPermuteKernel &operator=(const CLPermuteKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLPermuteKernel(CLPermuteKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLPermuteKernel &operator=(CLPermuteKernel &&) = default;
+ /** Set the input and output of the kernel.
+ *
+ * @note Arbitrary permutation vectors are supported with rank not greater than 4
+ *
+ * @param[in] input The input tensor to permute. Data types supported: All.
+ * @param[in] output The output tensor. Data types supported: Same as @p input
+ * @param[in] perm Permutation vector
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, const PermutationVector &perm);
+ /** Set the input and output of the kernel.
+ *
+ * @note Arbitrary permutation vectors are supported with rank not greater than 4
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input The input tensor to permute. Data types supported: All.
+ * @param[in] output The output tensor. Data types supported: Same as @p input
+ * @param[in] perm Permutation vector
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PermutationVector &perm);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLPermuteKernel
+ *
+ * @note Arbitrary permutation vectors are supported with rank not greater than 4
+ *
+ * @param[in] input First tensor input info. Data types supported: All.
+ * @param[in] output Output tensor info. Data types supported: same as @p input.
+ * @param[in] perm Permutation vector
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PermutationVector &perm);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+ PermutationVector _perm;
+};
+} // arm_compute
+#endif /*ARM_COMPUTE_CLPERMUTEKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h"
+#include "src/core/CL/kernels/CLPixelWiseMultiplicationKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLPIXELWISEMULTIPLICATIONKERNEL_H
+#define ARM_COMPUTE_CLPIXELWISEMULTIPLICATIONKERNEL_H
+
+#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+// Forward declarations
+class ICLTensor;
+
+/** Interface for the pixelwise multiplication kernel. */
+class CLPixelWiseMultiplicationKernel : public ICLKernel
+{
+public:
+ /** Default constructor.*/
+ CLPixelWiseMultiplicationKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLPixelWiseMultiplicationKernel(const CLPixelWiseMultiplicationKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLPixelWiseMultiplicationKernel &operator=(const CLPixelWiseMultiplicationKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLPixelWiseMultiplicationKernel(CLPixelWiseMultiplicationKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLPixelWiseMultiplicationKernel &operator=(CLPixelWiseMultiplicationKernel &&) = default;
+ /** Initialise the kernel's input, output and border mode.
+ *
+ * Valid configurations (Input1,Input2) -> Output :
+ *
+ * - (U8,U8) -> U8
+ * - (U8,U8) -> S16
+ * - (U8,S16) -> S16
+ * - (S16,U8) -> S16
+ * - (S16,S16) -> S16
+ * - (F16,F16) -> F16
+ * - (F32,F32) -> F32
+ * - (QASYMM8,QASYMM8) -> QASYMM8
+ * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED
+ * - (QSYMM16,QSYMM16) -> QSYMM16
+ * - (QSYMM16,QSYMM16) -> S32
+ *
+ * @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
+ * @param[in] input2 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
+ * @param[out] output The output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
+ * @param[in] scale Scale to apply after multiplication.
+ * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15.
+ * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate
+ * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest even.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
+ */
+ void configure(ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, float scale,
+ ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ /** Initialise the kernel's input, output and border mode.
+ *
+ * Valid configurations (Input1,Input2) -> Output :
+ *
+ * - (U8,U8) -> U8
+ * - (U8,U8) -> S16
+ * - (U8,S16) -> S16
+ * - (S16,U8) -> S16
+ * - (S16,S16) -> S16
+ * - (F16,F16) -> F16
+ * - (F32,F32) -> F32
+ * - (QASYMM8,QASYMM8) -> QASYMM8
+ * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED
+ * - (QSYMM16,QSYMM16) -> QSYMM16
+ * - (QSYMM16,QSYMM16) -> S32
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
+ * @param[in] input2 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
+ * @param[out] output The output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
+ * @param[in] scale Scale to apply after multiplication.
+ * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15.
+ * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate
+ * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest even.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
+ */
+ void configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, float scale,
+ ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ /** Static function to check if given info will lead to a valid configuration of @ref CLPixelWiseMultiplicationKernel
+ *
+ * Valid configurations (Input1,Input2) -> Output :
+ *
+ * - (U8,U8) -> U8
+ * - (U8,U8) -> S16
+ * - (U8,S16) -> S16
+ * - (S16,U8) -> S16
+ * - (S16,S16) -> S16
+ * - (F16,F16) -> F16
+ * - (F32,F32) -> F32
+ * - (QASYMM8,QASYMM8) -> QASYMM8
+ * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED
+ * - (QSYMM16,QSYMM16) -> QSYMM16
+ * - (QSYMM16,QSYMM16) -> S32
+ *
+ * @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
+ * @param[in] input2 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
+ * @param[in] output The output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
+ * @param[in] scale Scale to apply after multiplication.
+ * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15.
+ * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate
+ * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest even.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float scale,
+ ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+
+ // Inherited methods overridden:
+ void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
+ BorderSize border_size() const override;
+
+private:
+ const ITensorInfo *_input1;
+ const ITensorInfo *_input2;
+ ITensorInfo *_output;
+};
+
+/** Interface for the complex pixelwise multiplication kernel. */
+class CLComplexPixelWiseMultiplicationKernel : public ICLKernel
+{
+public:
+ /** Default constructor.*/
+ CLComplexPixelWiseMultiplicationKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLComplexPixelWiseMultiplicationKernel(const CLComplexPixelWiseMultiplicationKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLComplexPixelWiseMultiplicationKernel &operator=(const CLComplexPixelWiseMultiplicationKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLComplexPixelWiseMultiplicationKernel(CLComplexPixelWiseMultiplicationKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLComplexPixelWiseMultiplicationKernel &operator=(CLComplexPixelWiseMultiplicationKernel &&) = default;
+ /** Initialise the kernel's input, output and border mode.
+ *
+ * @param[in] input1 An input tensor info. Data types supported: F32. Number of channels supported: 2.
+ * @param[in] input2 An input tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1.
+ * @param[out] output The output tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
+ */
+ void configure(ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ /** Initialise the kernel's input, output and border mode.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input1 An input tensor info. Data types supported: F32. Number of channels supported: 2.
+ * @param[in] input2 An input tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1.
+ * @param[out] output The output tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
+ */
+ void configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ /** Static function to check if given info will lead to a valid configuration of @ref CLComplexPixelWiseMultiplicationKernel
+ *
+ * @param[in] input1 An input tensor info. Data types supported: F32. Number of channels supported: 2.
+ * @param[in] input2 An input tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1.
+ * @param[in] output The output tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+
+ // Inherited methods overridden:
+ void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
+ BorderSize border_size() const override;
+
+private:
+ const ITensorInfo *_input1;
+ const ITensorInfo *_input2;
+ ITensorInfo *_output;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLPIXELWISEMULTIPLICATIONKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLPoolingLayerKernel.h"
+#include "src/core/CL/kernels/CLPoolingLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "src/core/AccessWindowStatic.h"
#include "src/core/CL/CLValidate.h"
+#include "src/core/CL/ICLKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
case DataLayout::NHWC:
{
// Initialize border size
- border_size = BorderSize();
+ border_size = BorderSize();
num_elems_processed_per_iteration = adjust_vec_size(4, output->dimension(0));
win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration));
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLPOOLINGLAYERKERNEL_H
+#define ARM_COMPUTE_CLPOOLINGLAYERKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+#include "arm_compute/core/Error.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the pooling layer kernel */
+class CLPoolingLayerKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLPoolingLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLPoolingLayerKernel(const CLPoolingLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLPoolingLayerKernel &operator=(const CLPoolingLayerKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLPoolingLayerKernel(CLPoolingLayerKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLPoolingLayerKernel &operator=(CLPoolingLayerKernel &&) = default;
+ /** Default destructor */
+ ~CLPoolingLayerKernel() = default;
+
+ /** Set the input and output tensors.
+ *
+ *
+ * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[out] output Destination tensor. Data types supported: Same as @p input.
+ * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
+ * @param[out] indices (optional) The indices of the maximal values. Data type supported: U32.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info, ICLTensor *indices = nullptr);
+ /** Set the input and output tensors.
+ *
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[out] output Destination tensor. Data types supported: Same as @p input.
+ * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
+ * @param[out] indices (optional) The indices of the maximal values. Data type supported: U32.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info, ICLTensor *indices = nullptr);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLPoolingLayerKernel
+ *
+ * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] output Destination tensor info. Data types supported: Same as @p input.
+ * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
+ * @param[in] indices (optional) The indices of the maximal values. Data type supported: U32.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, const ITensorInfo *indices = nullptr);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+ BorderSize border_size() const override;
+
+public:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+ ICLTensor *_indices;
+ PoolingLayerInfo _pool_info;
+ DataLayout _data_layout;
+ BorderSize _border_size;
+ unsigned int _num_elems_processed_per_iteration;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLPOOLINGLAYERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLPriorBoxLayerKernel.h"
+#include "src/core/CL/kernels/CLPriorBoxLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLPRIORBOXLAYERKERNEL_H
+#define ARM_COMPUTE_CLPRIORBOXLAYERKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the PriorBox layer kernel. */
+class CLPriorBoxLayerKernel : public ICLKernel
+{
+public:
+ /** Constructor */
+ CLPriorBoxLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLPriorBoxLayerKernel(const CLPriorBoxLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLPriorBoxLayerKernel &operator=(const CLPriorBoxLayerKernel &) = delete;
+ /** Default Move Constructor. */
+ CLPriorBoxLayerKernel(CLPriorBoxLayerKernel &&) = default;
+ /** Default move assignment operator */
+ CLPriorBoxLayerKernel &operator=(CLPriorBoxLayerKernel &&) = default;
+ /** Default destructor */
+ ~CLPriorBoxLayerKernel() = default;
+
+ /** Set the input and output tensors.
+ *
+ * @param[in] input1 First source tensor. Data types supported: F32. Data layouts supported: NCHW/NHWC.
+ * @param[in] input2 Second source tensor. Data types and layouts supported: same as @p input1
+ * @param[out] output Destination tensor. Output dimensions are [W * H * num_priors * 4, 2]. Data types and layouts supported: same as @p input1
+ * @param[in] info Prior box layer info.
+ * @param[in] min Minimum prior box values
+ * @param[in] max Maximum prior box values
+ * @param[in] aspect_ratios Aspect ratio values
+ */
+ void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const PriorBoxLayerInfo &info, cl::Buffer *min, cl::Buffer *max, cl::Buffer *aspect_ratios);
+ /** Set the input and output tensors.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input1 First source tensor. Data types supported: F32. Data layouts supported: NCHW/NHWC.
+ * @param[in] input2 Second source tensor. Data types and layouts supported: same as @p input1
+ * @param[out] output Destination tensor. Output dimensions are [W * H * num_priors * 4, 2]. Data types and layouts supported: same as @p input1
+ * @param[in] info Prior box layer info.
+ * @param[in] min Minimum prior box values
+ * @param[in] max Maximum prior box values
+ * @param[in] aspect_ratios Aspect ratio values
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const PriorBoxLayerInfo &info, cl::Buffer *min, cl::Buffer *max,
+ cl::Buffer *aspect_ratios);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLPriorBoxLayerKernel
+ *
+ * @param[in] input1 First source tensor info. Data types supported: F32. Data layouts supported: NCHW/NHWC.
+ * @param[in] input2 Second source tensor info. Data types and layouts supported: same as @p input1
+ * @param[in] output Destination tensor info. Output dimensions are [W * H * num_priors * 4, 2]. Data type supported: same as @p input1
+ * @param[in] info Prior box layer info.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const PriorBoxLayerInfo &info);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input1;
+ const ICLTensor *_input2;
+ ICLTensor *_output;
+ PriorBoxLayerInfo _info;
+ int _num_priors;
+ cl::Buffer *_min;
+ cl::Buffer *_max;
+ cl::Buffer *_aspect_ratios;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLPRIORBOXLAYERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h"
+#include "src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "src/core/helpers/AutoConfiguration.h"
--- /dev/null
+/*
+ * Copyright (c) 2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLQLSTMLAYERVNORMALIZATIONKERNEL_H
+#define ARM_COMPUTE_CLQLSTMLAYERVNORMALIZATIONKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the kernel to do layer normalization. */
+class CLQLSTMLayerNormalizationKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLQLSTMLayerNormalizationKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLQLSTMLayerNormalizationKernel(const CLQLSTMLayerNormalizationKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLQLSTMLayerNormalizationKernel &operator=(const CLQLSTMLayerNormalizationKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLQLSTMLayerNormalizationKernel(CLQLSTMLayerNormalizationKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLQLSTMLayerNormalizationKernel &operator=(CLQLSTMLayerNormalizationKernel &&) = default;
+ /** Default destructor */
+ ~CLQLSTMLayerNormalizationKernel() = default;
+ /** Initialise the kernel's input and outputs.
+ *
+ * @param[in] input Source tensor with 2 dimensions. Data types supported: QSYMM16.
+ * @param[out] output Destination tensor. Data type supported: same as @p input
+ * @param[in] weight Weight tensor. Data types supported: Same as @p input.
+ * @param[in] bias Bias tensor. Data types supported: S32.
+ *
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *weight, const ICLTensor *bias);
+ /** Initialise the kernel's input and outputs.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor with 2 dimensions. Data types supported: QSYMM16.
+ * @param[out] output Destination tensor. Data type supported: same as @p input
+ * @param[in] weight Weight tensor. Data types supported: Same as @p input.
+ * @param[in] bias Bias tensor. Data types supported: S32.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *weight, const ICLTensor *bias);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLQLSTMLayerNormalizationKernel
+ *
+ * @param[in] input Source tensor info with 2 dimensions. Data types supported: QSYMM16.
+ * @param[in] output Destination info tensor. Data type supported: same as @p input
+ * @param[in] weight Weight info tensor. Data types supported: Same as @p input.
+ * @param[in] bias Bias tensor info. Data types supported: S32.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *weight, const ITensorInfo *bias);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input;
+ const ICLTensor *_weight;
+ const ICLTensor *_bias;
+ ICLTensor *_output;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CLQLSTMLAYERVNORMALIZATIONKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLQuantizationLayerKernel.h"
+#include "src/core/CL/kernels/CLQuantizationLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLQUANTIZATIONLAYERKERNEL_H
+#define ARM_COMPUTE_CLQUANTIZATIONLAYERKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the quantization layer kernel.
+ *
+ * @note The implementation supports only 3D input tensors.
+ */
+class CLQuantizationLayerKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLQuantizationLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLQuantizationLayerKernel(const CLQuantizationLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLQuantizationLayerKernel &operator=(const CLQuantizationLayerKernel &) = delete;
+ /** Default Move Constructor. */
+ CLQuantizationLayerKernel(CLQuantizationLayerKernel &&) = default;
+ /** Default move assignment operator */
+ CLQuantizationLayerKernel &operator=(CLQuantizationLayerKernel &&) = default;
+ /** Default destructor */
+ ~CLQuantizationLayerKernel() = default;
+ /** Set the input, output.
+ *
+ * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16.
+ * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16.
+ *
+ * @note Output auto initialization is not supported by this kernel
+ */
+ void configure(const ICLTensor *input, ICLTensor *output);
+ /** Set the input, output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16.
+ * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16.
+ *
+ * @note Output auto initialization is not supported by this kernel
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLQuantizationLayerKernel
+ *
+ * @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16.
+ * @param[in] output Destination tensor info with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLQUANTIZATIONLAYERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLROIAlignLayerKernel.h"
+#include "src/core/CL/kernels/CLROIAlignLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLROIALIGNLAYERKERNEL_H
+#define ARM_COMPUTE_CLROIALIGNLAYERKERNEL_H
+
+#include "arm_compute/core/CL/ICLArray.h"
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the RoIAlign kernel.
+ */
+class CLROIAlignLayerKernel : public ICLKernel
+{
+public:
+ /** Constructor */
+ CLROIAlignLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLROIAlignLayerKernel(const CLROIAlignLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLROIAlignLayerKernel &operator=(const CLROIAlignLayerKernel &) = delete;
+ /** Default Move Constructor. */
+ CLROIAlignLayerKernel(CLROIAlignLayerKernel &&) = default;
+ /** Default move assignment operator. */
+ CLROIAlignLayerKernel &operator=(CLROIAlignLayerKernel &&) = default;
+ /** Default destructor */
+ ~CLROIAlignLayerKernel() = default;
+
+ /** Set the input and output tensors.
+ *
+ * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner
+ * as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ].
+ * Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8/QASYMM8_SIGNED, otherwise same as @p input
+ * @param[out] output Destination tensor. Data types supported: Same as @p input.
+ * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo.
+ *
+ * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled
+ * width and pooled height.
+ * @note The z dimensions of @p output tensor and @p input tensor must be the same.
+ * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array.
+ */
+ void configure(const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info);
+ /** Set the input and output tensors.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner
+ * as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ].
+ * Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8/QASYMM8_SIGNED, otherwise same as @p input
+ * @param[out] output Destination tensor. Data types supported: Same as @p input.
+ * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo.
+ *
+ * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled
+ * width and pooled height.
+ * @note The z dimensions of @p output tensor and @p input tensor must be the same.
+ * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLROIAlignLayerKernel
+ *
+ * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] rois ROIs tensor info. Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8/QASYMM8_SIGNED,
+ * otherwise same as @p input
+ * @param[in] output Destination tensor info. Data types supported: Same as @p input.
+ * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo.
+ *
+ * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled
+ * width and pooled height.
+ * @note The z dimensions of @p output tensor and @p input tensor must be the same.
+ * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array.
+ *
+ * @return a Status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *rois, ITensorInfo *output, const ROIPoolingLayerInfo &pool_info);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue);
+
+private:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+ const ICLTensor *_rois;
+ ROIPoolingLayerInfo _pool_info;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLROIALIGNLAYERKERNEL_H*/
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLROIPoolingLayerKernel.h"
+#include "src/core/CL/kernels/CLROIPoolingLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLROIPOOLINGLAYERKERNEL_H
+#define ARM_COMPUTE_CLROIPOOLINGLAYERKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+#include "arm_compute/core/CL/ICLArray.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the ROI pooling layer kernel */
+class CLROIPoolingLayerKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLROIPoolingLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLROIPoolingLayerKernel(const CLROIPoolingLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLROIPoolingLayerKernel &operator=(const CLROIPoolingLayerKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLROIPoolingLayerKernel(CLROIPoolingLayerKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLROIPoolingLayerKernel &operator=(CLROIPoolingLayerKernel &&) = default;
+ /** Default destructor */
+ ~CLROIPoolingLayerKernel() = default;
+
+ /** Set the input and output tensors.
+ *
+ * @param[in] input Source tensor. Data types supported: F16/F32.
+ * @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner
+ * as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ]. Data types supported: U16
+ * @param[out] output Destination tensor. Data types supported: Same as @p input.
+ * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo.
+ *
+ * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled
+ * width and pooled height.
+ * @note The z dimensions of @p output tensor and @p input tensor must be the same.
+ * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array.
+ */
+ void configure(const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info);
+ /** Set the input and output tensors.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: F16/F32.
+ * @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner
+ * as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ]. Data types supported: U16
+ * @param[out] output Destination tensor. Data types supported: Same as @p input.
+ * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo.
+ *
+ * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled
+ * width and pooled height.
+ * @note The z dimensions of @p output tensor and @p input tensor must be the same.
+ * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input;
+ const ICLTensor *_rois;
+ ICLTensor *_output;
+ ROIPoolingLayerInfo _pool_info;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLROIPOOLINGLAYERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLRangeKernel.h"
+#include "src/core/CL/kernels/CLRangeKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/ICLTensor.h"
--- /dev/null
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLRANGEKERNEL_H
+#define ARM_COMPUTE_CLRANGEKERNEL_H
+
+#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Kernel class for Range
+ *
+ * range generates a 1-D tensor containing a sequence of numbers that begins at 'start' and extends by increments
+ * of 'step' up to but not including 'end'.
+ */
+class CLRangeKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLRangeKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLRangeKernel(const CLRangeKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLRangeKernel &operator=(const CLRangeKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLRangeKernel(CLRangeKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLRangeKernel &operator=(CLRangeKernel &&) = default;
+ /** Default destructor */
+ ~CLRangeKernel() = default;
+ /** Initialize the kernel's output tensor, start, end and step of the sequence.
+ *
+ * @param[out] output Output tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
+ * @param[in] start The starting value of the sequence.
+ * @param[in] end The ending (not including) value of the sequence.
+ * @param[in] step The gap between each pair of values in the sequence.
+ */
+ void configure(ICLTensor *output, float start, float end, float step);
+ /** Initialize the kernel's output tensor, start, end and step of the sequence.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[out] output Output tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
+ * @param[in] start The starting value of the sequence.
+ * @param[in] end The ending (not including) value of the sequence.
+ * @param[in] step The gap between each pair of values in the sequence.
+ */
+ void configure(const CLCompileContext &compile_context, ICLTensor *output, float start, float end, float step);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLRangeKernel
+ *
+ * @param[in] output Output tensor info. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
+ * @param[in] start The starting value of the sequence.
+ * @param[in] end The ending (not including) value of the sequence.
+ * @param[in] step The gap between each pair of values in the sequence.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *output, float start, float end, float step);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ float _start; /**< Start of sequence */
+ float _end; /**< End of sequence */
+ float _step; /**< Increment/step value */
+ ICLTensor *_output; /**< Destination tensor */
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CLRANGEKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLReductionOperationKernel.h"
+#include "src/core/CL/kernels/CLReductionOperationKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLREDUCTIONOPERATIONKERNEL_H
+#define ARM_COMPUTE_CLREDUCTIONOPERATIONKERNEL_H
+
+#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the reduction operation kernel
+ */
+class CLReductionOperationKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLReductionOperationKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLReductionOperationKernel(const CLReductionOperationKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLReductionOperationKernel &operator=(const CLReductionOperationKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLReductionOperationKernel(CLReductionOperationKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLReductionOperationKernel &operator=(CLReductionOperationKernel &&) = default;
+ /** Default destructor */
+ ~CLReductionOperationKernel() = default;
+
+ /** Set the input and output tensors.
+ *
+ * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/S32/F16/F32.
+ * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input.
+ * Output will have the same number of dimensions as input.
+ * @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1,2,3
+ * @param[in] op Reduction operation to perform. Operations supported: MEAN_SUM, PROD, SUM_SQUARE, SUM, MIN, MAX
+ * @param[in] width (Optional) In case of x-axis we also need to provide the width of the input image.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op, unsigned int width = 0);
+ /** Set the input and output tensors.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/S32/F16/F32.
+ * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input.
+ * Output will have the same number of dimensions as input.
+ * @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1,2,3
+ * @param[in] op Reduction operation to perform. Operations supported: MEAN_SUM, PROD, SUM_SQUARE, SUM, MIN, MAX
+ * @param[in] width (Optional) In case of x-axis we also need to provide the width of the input image.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op, unsigned int width = 0);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref CLReductionOperationKernel.
+ *
+ * @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/S32/F16/F32.
+ * @param[in] output Destination tensor info. Data types and data layouts supported: Same as @p input.
+ * Output will have the same number of dimensions as input.
+ * @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1,2,3
+ * @param[in] op Reduction operation to perform. Operations supported: MEAN_SUM, PROD, SUM_SQUARE, SUM, MIN, MAX
+ * @param[in] width (Optional) In case of x-axis we also need to provide the width of the input image.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op, unsigned int width = 0);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+ BorderSize border_size() const override;
+
+private:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+ unsigned int _reduction_axis;
+ ReductionOperation _op;
+ BorderSize _border_size;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLREDUCTIONOPERATIONKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLRemapKernel.h"
+#include "src/core/CL/kernels/CLRemapKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLREMAPKERNEL_H
+#define ARM_COMPUTE_CLREMAPKERNEL_H
+
+#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** OpenCL kernel to perform a remap on a tensor */
+class CLRemapKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLRemapKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLRemapKernel(const CLRemapKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLRemapKernel &operator=(const CLRemapKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLRemapKernel(CLRemapKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLRemapKernel &operator=(CLRemapKernel &&) = default;
+ /** Initialize the kernel's input, output and border mode.
+ *
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[in] map_x Map for X coordinates. Data types supported: F32.
+ * @param[in] map_y Map for Y coordinates. Data types supported: F32.
+ * @param[out] output Destination tensor. Data types supported: U8. All but the lowest two dimensions must be the same size as in the input tensor, i.e. remapping is only performed within the XY-plane.
+ * @param[in] policy The interpolation type.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output, InterpolationPolicy policy, bool border_undefined);
+ /** Initialize the kernel's input, output and border mode.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[in] map_x Map for X coordinates. Data types supported: F32.
+ * @param[in] map_y Map for Y coordinates. Data types supported: F32.
+ * @param[out] output Destination tensor. Data types supported: U8. All but the lowest two dimensions must be the same size as in the input tensor, i.e. remapping is only performed within the XY-plane.
+ * @param[in] policy The interpolation type.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output, InterpolationPolicy policy, bool border_undefined);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+ BorderSize border_size() const override;
+
+private:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+ const ICLTensor *_map_x;
+ const ICLTensor *_map_y;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLREMAPKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLReorgLayerKernel.h"
+#include "src/core/CL/kernels/CLReorgLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLREORGLAYERKERNEL_H
+#define ARM_COMPUTE_CLREORGLAYERKERNEL_H
+
+#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** OpenCL kernel to perform a reorg layer */
+class CLReorgLayerKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLReorgLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ CLReorgLayerKernel(const CLReorgLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers). */
+ CLReorgLayerKernel &operator=(const CLReorgLayerKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLReorgLayerKernel(CLReorgLayerKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLReorgLayerKernel &operator=(CLReorgLayerKernel &&) = default;
+ /** Initialize the kernel's input, output.
+ *
+ * @param[in] input Source tensor. Data types supported: All.
+ * @param[out] output Destination tensor with tensor shape:
+ * [width_input / stride, height_input / stride, channels_input * stride * stride, batch_size]. This means the output has
+ * the same number of input elements. Data types supported: same as @p input.
+ * @param[in] stride Stride value to use for reorganizing the values in the output tensor.
+ * It defines the spatial distance between 2 consecutive pixels in the x and y direction
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, int32_t stride);
+ /** Initialize the kernel's input, output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: All.
+ * @param[out] output Destination tensor with tensor shape:
+ * [width_input / stride, height_input / stride, channels_input * stride * stride, batch_size]. This means the output has
+ * the same number of input elements. Data types supported: same as @p input.
+ * @param[in] stride Stride value to use for reorganizing the values in the output tensor.
+ * It defines the spatial distance between 2 consecutive pixels in the x and y direction
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t stride);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLReorgLayerKernel
+ *
+ * @param[in] input Source tensor. Data types supported: All.
+ * @param[in] output Destination tensor with tensor shape:
+ * [width_input / stride, height_input / stride, channels_input * stride * stride, batch_size]. This means the output has
+ * the same number of input elements. Data types supported: same as @p input. Data types supported: same as @p input.
+ * @param[in] stride Stride value to use for reorganizing the values in the output tensor
+ * It defines the spatial distance between 2 consecutive pixels in the x and y direction
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t stride);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLREORGLAYERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLReshapeLayerKernel.h"
+#include "src/core/CL/kernels/CLReshapeLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLRESHAPELAYERKERNEL_H
+#define ARM_COMPUTE_CLRESHAPELAYERKERNEL_H
+
+#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the kernel to perform tensor reshaping */
+class CLReshapeLayerKernel : public ICLKernel
+{
+public:
+ /** Set the input and output of the kernel
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor info. Data type supported: All.
+ * @param[out] output Destination tensor info. Data type supported: Same as @p input
+ */
+ void configure(const CLCompileContext &compile_context, const ITensorInfo *input, ITensorInfo *output);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref CLReshapeLayerKernel
+ *
+ * @param[in] input Source tensor info. Data type supported: All
+ * @param[in] output Destination tensor info. Data type supported: Same as @p input
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLRESHAPELAYERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLReverseKernel.h"
+#include "src/core/CL/kernels/CLReverseKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLREVERSEKERNEL_H
+#define ARM_COMPUTE_CLREVERSEKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the reverse kernel */
+class CLReverseKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLReverseKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLReverseKernel(const CLReverseKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLReverseKernel &operator=(const CLReverseKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLReverseKernel(CLReverseKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLReverseKernel &operator=(CLReverseKernel &&) = default;
+ /** Default destructor */
+ ~CLReverseKernel() = default;
+ /** Initialise the kernel's inputis and output
+ *
+ * @param[in] input Input tensor. Data types supported: All.
+ * @param[out] output Output tensor. Data type supported: Same as @p input
+ * @param[in] axis Axis tensor. Contains the indices of the dimensions to reverse. Data type supported: U32
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *axis);
+ /** Initialise the kernel's inputis and output
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Input tensor. Data types supported: All.
+ * @param[out] output Output tensor. Data type supported: Same as @p input
+ * @param[in] axis Axis tensor. Contains the indices of the dimensions to reverse. Data type supported: U32
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *axis);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref CLReverseKernel
+ *
+ * @param[in] input Input tensor info. Data types supported: All.
+ * @param[in] output Output tensor info. Data type supported: Same as @p input
+ * @param[in] axis Axis tensor info. Contains the indices of the dimensions to reverse. Data type supported: U32
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *axis);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+public:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+ const ICLTensor *_axis;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLREVERSEKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLScaleKernel.h"
+#include "src/core/CL/kernels/CLScaleKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/CL/OpenCL.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/TensorInfo.h"
#include "src/core/AccessWindowStatic.h"
#include "src/core/CL/CLValidate.h"
+#include "src/core/CL/ICLKernel.h"
#include "src/core/helpers/WindowHelpers.h"
#include "support/StringSupport.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLSCALEKERNEL_H
+#define ARM_COMPUTE_CLSCALEKERNEL_H
+
+#include "arm_compute/core/KernelDescriptors.h"
+#include "src/core/CL/ICLSimple2DKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the scale kernel */
+class CLScaleKernel : public ICLSimple2DKernel
+{
+public:
+ /** Initialise the kernel's inputs, output and interpolation policy
+ *
+ * @param[in] input Source tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32
+ * @param[out] output Destination tensor. Data types supported: Same as @p input
+ * All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
+ * @param[in] info @ref ScaleKernelInfo Kernel descriptor to be used to configure.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, const ScaleKernelInfo &info);
+ /** Initialise the kernel's inputs, output and interpolation policy
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32
+ * @param[out] output Destination tensor. Data types supported: Same as @p input
+ * All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
+ * @param[in] info @ref ScaleKernelInfo Kernel descriptor to be used to configure.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ScaleKernelInfo &info);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref CLScaleKernel
+ *
+ * @param[in] input Source tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32
+ * @param[in] output Destination tensor info. Data types supported: Same as @p input
+ * All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
+ * @param[in] info @ref ScaleKernelInfo Kernel descriptor to be used to validate
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ScaleKernelInfo &info);
+ /** Input tensor accessor.
+ *
+ * @return Pointer to input tensor.
+ */
+ const ICLTensor *input() const;
+ /** Output tensor accessor.
+ *
+ * @return Pointer to output tensor.
+ */
+ const ICLTensor *output() const;
+
+ // Inherited methods overridden:
+ BorderSize border_size() const override;
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+ // Getter for interpolation policy
+ InterpolationPolicy get_interpolation_policy() const
+ {
+ return _interpolation_policy;
+ }
+
+private:
+ InterpolationPolicy _interpolation_policy = InterpolationPolicy::BILINEAR;
+ DataLayout _data_layout = DataLayout::UNKNOWN;
+ bool _align_corners = false;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLSCALEKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLScharr3x3Kernel.h"
+#include "src/core/CL/kernels/CLScharr3x3Kernel.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLSCHARR3X3KERNEL_H
+#define ARM_COMPUTE_CLSCHARR3X3KERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the kernel to run a 3x3 Scharr filter on a tensor.
+ *
+ * @f[
+ * \mathbf{G}_x=\begin{vmatrix}
+ * -3 & 0 & +3\\
+ * -10& 0 & +10\\
+ * -3 & 0 & +3
+ * \end{vmatrix}
+ * @f]
+ * @f[
+ * \mathbf{G}_y=\begin{vmatrix}
+ * -3 & -10 & -3\\
+ * 0 & 0 & 0\\
+ * +3 & +10 & +3
+ * \end{vmatrix}
+ * @f]
+ */
+class CLScharr3x3Kernel : public ICLKernel
+{
+public:
+ /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */
+ CLScharr3x3Kernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLScharr3x3Kernel(const CLScharr3x3Kernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLScharr3x3Kernel &operator=(const CLScharr3x3Kernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLScharr3x3Kernel(CLScharr3x3Kernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLScharr3x3Kernel &operator=(CLScharr3x3Kernel &&) = default;
+ /** Initialise the kernel's source, destination and border.
+ *
+ * @note At least one of output_x or output_y must be set.
+ *
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16.
+ * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
+ /** Initialise the kernel's source, destination and border.
+ *
+ * @note At least one of output_x or output_y must be set.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16.
+ * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+ BorderSize border_size() const override;
+
+private:
+ bool _run_scharr_x; /**< Do we need to run Scharr X ? */
+ bool _run_scharr_y; /**< Do we need to run Scharr Y ? */
+ const ICLTensor *_input; /**< Input image */
+ ICLTensor *_output_x; /**< Output image for scharr X */
+ ICLTensor *_output_y; /**< Output image for scharr Y */
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLSCHARR3X3KERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLSelectKernel.h"
+#include "src/core/CL/kernels/CLSelectKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLSELECTKERNEL_H
+#define ARM_COMPUTE_CLSELECTKERNEL_H
+
+#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+// Forward declarations
+class ICLTensor;
+
+/** OpenCL interface for executing the select kernel
+ *
+ * Select is computed by:
+ * @f[ output(i) = condition(i) ? x(i) : y(i) @f]
+ **/
+class CLSelectKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLSelectKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLSelectKernel(const CLSelectKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLSelectKernel &operator=(const CLSelectKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLSelectKernel(CLSelectKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLSelectKernel &operator=(CLSelectKernel &&) = default;
+ /** Default destructor */
+ ~CLSelectKernel() = default;
+ /** Initialise the kernel's inputs and output.
+ *
+ * @param[in] c Condition input tensor. Data types supported: U8.
+ * @param[in] x First input tensor. Data types supported: All.
+ * @param[out] y Second input tensor. Data types supported: Same as @p x
+ * @param[in] output Output tensor. Data types supported: Same as @p x.
+ */
+ void configure(const ICLTensor *c, const ICLTensor *x, const ICLTensor *y, ICLTensor *output);
+ /** Initialise the kernel's inputs and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] c Condition input tensor. Data types supported: U8.
+ * @param[in] x First input tensor. Data types supported: All.
+ * @param[out] y Second input tensor. Data types supported: Same as @p x
+ * @param[in] output Output tensor. Data types supported: Same as @p x.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *c, const ICLTensor *x, const ICLTensor *y, ICLTensor *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLSelectKernel
+ *
+ * @param[in] c Condition input tensor. Data types supported: U8.
+ * @param[in] x First input tensor. Data types supported: All.
+ * @param[in] y Second input tensor. Data types supported: Same as @p x
+ * @param[in] output Output tensor. Data types supported: Same as @p x.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *c, const ITensorInfo *x, const ITensorInfo *y, const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_c; /**< Condition tensor */
+ const ICLTensor *_x; /**< Source tensor 1 */
+ const ICLTensor *_y; /**< Source tensor 2 */
+ ICLTensor *_output; /**< Destination tensor */
+ bool _has_same_rank; /**< Flag that indicates if condition tensor and other inputs have the same rank */
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CLWHEREKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLSobel3x3Kernel.h"
+#include "src/core/CL/kernels/CLSobel3x3Kernel.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLSOBEL3X3KERNEL_H
+#define ARM_COMPUTE_CLSOBEL3X3KERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the kernel to run a 3x3 Sobel filter on a tensor. */
+class CLSobel3x3Kernel : public ICLKernel
+{
+public:
+ /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */
+ CLSobel3x3Kernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLSobel3x3Kernel(const CLSobel3x3Kernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLSobel3x3Kernel &operator=(const CLSobel3x3Kernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLSobel3x3Kernel(CLSobel3x3Kernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLSobel3x3Kernel &operator=(CLSobel3x3Kernel &&) = default;
+ /** Default destructor */
+ ~CLSobel3x3Kernel() = default;
+ /** Initialise the kernel's source, destination and border.
+ *
+ * @note At least one of output_x or output_y must be set.
+ *
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16.
+ * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
+ /** Initialise the kernel's source, destination and border.
+ *
+ * @note At least one of output_x or output_y must be set.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16.
+ * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+ BorderSize border_size() const override;
+
+private:
+ const ICLTensor *_input; /**< Input tensor */
+ ICLTensor *_output_x; /**< Output tensor for Sobel X */
+ ICLTensor *_output_y; /**< Output tensor for Sobel Y */
+ bool _run_sobel_x; /**< Do we need to run Sobel X ? */
+ bool _run_sobel_y; /**< Do we need to run Sobel Y ? */
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLSOBEL3X3KERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLSobel5x5Kernel.h"
+#include "src/core/CL/kernels/CLSobel5x5Kernel.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLSOBEL5X5KERNEL_H
+#define ARM_COMPUTE_CLSOBEL5X5KERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the kernel to run the horizontal pass of 5x5 Sobel filter on a tensor. */
+class CLSobel5x5HorKernel : public ICLKernel
+{
+public:
+ /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */
+ CLSobel5x5HorKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLSobel5x5HorKernel(const CLSobel5x5HorKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLSobel5x5HorKernel &operator=(const CLSobel5x5HorKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLSobel5x5HorKernel(CLSobel5x5HorKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLSobel5x5HorKernel &operator=(CLSobel5x5HorKernel &&) = default;
+ /** Default destructor */
+ ~CLSobel5x5HorKernel() = default;
+
+ /** Initialise the kernel's source, destination and border.
+ *
+ * @note At least one of output_x or output_y must be set.
+ *
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16.
+ * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
+ /** Initialise the kernel's source, destination and border.
+ *
+ * @note At least one of output_x or output_y must be set.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16.
+ * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+ BorderSize border_size() const override;
+
+private:
+ const ICLTensor *_input; /**< Input tensor */
+ ICLTensor *_output_x; /**< X output of horizontal pass */
+ ICLTensor *_output_y; /**< Y output of horizontal pass */
+ bool _run_sobel_x; /**< Do we need to run Sobel X ? */
+ bool _run_sobel_y; /**< Do we need to run Sobel Y ? */
+ BorderSize _border_size; /**< Border size */
+};
+
+/** Interface for the kernel to run the vertical pass of 5x5 Sobel filter on a tensor. */
+class CLSobel5x5VertKernel : public ICLKernel
+{
+public:
+ /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */
+ CLSobel5x5VertKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLSobel5x5VertKernel(const CLSobel5x5VertKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLSobel5x5VertKernel &operator=(const CLSobel5x5VertKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLSobel5x5VertKernel(CLSobel5x5VertKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLSobel5x5VertKernel &operator=(CLSobel5x5VertKernel &&) = default;
+ /** Default destructor */
+ ~CLSobel5x5VertKernel() = default;
+
+ /** Initialise the kernel's source, destination and border.
+ *
+ * @note At least one of output_x or output_y must be set and the corresponding input.
+ *
+ * @param[in] input_x (Optional) Input for X (X output of horizontal pass). Data types supported: S16.
+ * @param[in] input_y (Optional) Input for Y (Y output of horizontal pass). Data types supported: S16.
+ * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16.
+ * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
+ /** Initialise the kernel's source, destination and border.
+ *
+ * @note At least one of output_x or output_y must be set and the corresponding input.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input_x (Optional) Input for X (X output of horizontal pass). Data types supported: S16.
+ * @param[in] input_y (Optional) Input for Y (Y output of horizontal pass). Data types supported: S16.
+ * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16.
+ * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+ BorderSize border_size() const override;
+
+private:
+ const ICLTensor *_input_x; /**< X input (X output of the horizontal pass) */
+ const ICLTensor *_input_y; /**< Y input (Y output of the horizontal pass) */
+ ICLTensor *_output_x; /**< X output of sobel */
+ ICLTensor *_output_y; /**< Y output of sobel */
+ bool _run_sobel_x; /**< Do we need to run sobel X? */
+ bool _run_sobel_y; /**< Do we need to run sobel Y? */
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLSOBEL5X5KERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLSobel7x7Kernel.h"
+#include "src/core/CL/kernels/CLSobel7x7Kernel.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLSOBEL7X7KERNEL_H
+#define ARM_COMPUTE_CLSOBEL7X7KERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the kernel to run the horizontal pass of 7x7 Sobel filter on a tensor. */
+class CLSobel7x7HorKernel : public ICLKernel
+{
+public:
+ /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */
+ CLSobel7x7HorKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLSobel7x7HorKernel(const CLSobel7x7HorKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLSobel7x7HorKernel &operator=(const CLSobel7x7HorKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLSobel7x7HorKernel(CLSobel7x7HorKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLSobel7x7HorKernel &operator=(CLSobel7x7HorKernel &&) = default;
+ /** Default destructor */
+ ~CLSobel7x7HorKernel() = default;
+
+ /** Initialise the kernel's source, destination and border.
+ *
+ * @note At least one of output_x or output_y must be set.
+ *
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S32.
+ * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S32.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
+ /** Initialise the kernel's source, destination and border.
+ *
+ * @note At least one of output_x or output_y must be set.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S32.
+ * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S32.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+ BorderSize border_size() const override;
+
+private:
+ const ICLTensor *_input; /**< Input tensor */
+ ICLTensor *_output_x; /**< X output of horizontal pass */
+ ICLTensor *_output_y; /**< Y output of horizontal pass */
+ bool _run_sobel_x; /**< Do we need to run Sobel X ? */
+ bool _run_sobel_y; /**< Do we need to run Sobel Y ? */
+ BorderSize _border_size; /**< Border size */
+};
+
+/** Interface for the kernel to run the vertical pass of 7x7 Sobel filter on a tensor. */
+class CLSobel7x7VertKernel : public ICLKernel
+{
+public:
+ /** Default constructor: initialize all the pointers to nullptr and parameters to zero. */
+ CLSobel7x7VertKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLSobel7x7VertKernel(const CLSobel7x7VertKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLSobel7x7VertKernel &operator=(const CLSobel7x7VertKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLSobel7x7VertKernel(CLSobel7x7VertKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLSobel7x7VertKernel &operator=(CLSobel7x7VertKernel &&) = default;
+ /** Default destructor */
+ ~CLSobel7x7VertKernel() = default;
+
+ /** Initialise the kernel's source, destination and border.
+ *
+ * @note At least one of output_x or output_y must be set and the corresponding input.
+ *
+ * @param[in] input_x (Optional) Input for X (X output of horizontal pass). Data types supported: S32.
+ * @param[in] input_y (Optional) Input for Y (Y output of horizontal pass). Data types supported: S32.
+ * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S32.
+ * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S32.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
+ /** Initialise the kernel's source, destination and border.
+ *
+ * @note At least one of output_x or output_y must be set and the corresponding input.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input_x (Optional) Input for X (X output of horizontal pass). Data types supported: S32.
+ * @param[in] input_y (Optional) Input for Y (Y output of horizontal pass). Data types supported: S32.
+ * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S32.
+ * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S32.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+ BorderSize border_size() const override;
+
+private:
+ const ICLTensor *_input_x; /**< X input (X output of the horizontal pass) */
+ const ICLTensor *_input_y; /**< Y input (Y output of the horizontal pass) */
+ ICLTensor *_output_x; /**< X output of sobel */
+ ICLTensor *_output_y; /**< Y output of sobel */
+ bool _run_sobel_x; /**< Do we need to run sobel X? */
+ bool _run_sobel_y; /**< Do we need to run sobel Y? */
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLSOBEL7X7KERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h"
+#include "src/core/CL/kernels/CLSoftmaxLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLSOFTMAXLAYERKERNEL_H
+#define ARM_COMPUTE_CLSOFTMAXLAYERKERNEL_H
+
+#include "arm_compute/core/KernelDescriptors.h"
+#include "src/core/CL/ICLSimple3DKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for max, shifting, exponentiating and summing the logits */
+class CLLogits1DMaxShiftExpSumKernel : public ICLKernel
+{
+public:
+ /** Info for whether a parallel reduction will be run and the vector size of the execution. */
+ using ParallelReductionInfo = std::tuple<bool, unsigned int>;
+
+public:
+ /** Default constructor */
+ CLLogits1DMaxShiftExpSumKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLLogits1DMaxShiftExpSumKernel(const CLLogits1DMaxShiftExpSumKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLLogits1DMaxShiftExpSumKernel &operator=(const CLLogits1DMaxShiftExpSumKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLLogits1DMaxShiftExpSumKernel(CLLogits1DMaxShiftExpSumKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLLogits1DMaxShiftExpSumKernel &operator=(CLLogits1DMaxShiftExpSumKernel &&) = default;
+ /** Set the input and output tensors.
+ *
+ * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
+ * @param[in,out] max Max values tensor. Data types supported: same as @p input
+ * @param[out] output Destination tensor. Data types supported: same as @p input
+ * @param[out] sum Sum of 1D logits tensor. Data types supported: same as @p input
+ * @param[in] info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo.
+ */
+ void configure(const ICLTensor *input, ICLTensor *max, ICLTensor *output, ICLTensor *sum, const SoftmaxKernelInfo &info);
+ /** Set the input and output tensors.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
+ * @param[in,out] max Max values tensor. Data types supported: same as @p input
+ * @param[out] output Destination tensor. Data types supported: same as @p input
+ * @param[out] sum Sum of 1D logits tensor. Data types supported: same as @p input
+ * @param[in] info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *max, ICLTensor *output, ICLTensor *sum, const SoftmaxKernelInfo &info);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLLogits1DMaxShiftExpSumKernel
+ *
+ * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
+ * @param[in] max Max values tensor. Data types supported: same as @p input
+ * @param[in] output Destination tensor. Data types supported: same as @p input
+ * @param[in] sum Sum of 1D logits tensor. Data types supported: same as @p input
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *max, const ITensorInfo *output, const ITensorInfo *sum);
+ /** Checks if the given size is eligible for parallel reduction
+ *
+ * @note Serial reduction is launched for width < (_grid_size * _serial_vector_size).
+ * @note Parallel reduction is launched for width >= (_grid_size * _serial_vector_size) and vector_size is forced to 4.
+ *
+ * @param[in] size Size to check
+ *
+ * @return A two-element tuple where the first element is a boolean specifying if a parallel reduction will be run,
+ * while the second element is the vector size of the execution.
+ */
+ static ParallelReductionInfo is_parallel_reduction(size_t size);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input;
+ ICLTensor *_max;
+ ICLTensor *_output;
+ ICLTensor *_sum;
+
+private:
+ static const unsigned int _grid_size;
+ static const unsigned int _serial_vector_size;
+ static const unsigned int _parallel_vector_size;
+};
+/** Interface for calculating the final step of the Softmax Layer where each logit value is multiplied by the inverse of the sum of the logits. */
+class CLLogits1DNormKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLLogits1DNormKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLLogits1DNormKernel(const CLLogits1DNormKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLLogits1DNormKernel &operator=(const CLLogits1DNormKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLLogits1DNormKernel(CLLogits1DNormKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLLogits1DNormKernel &operator=(CLLogits1DNormKernel &&) = default;
+ /** Set the input and output tensors.
+ *
+ * @param[in] input Source tensor. Data types supported: S32/F16/F32. If this kernel is used for log softmax, only F32/F16 is supported.
+ * @param[in] sum Sum tensor. Dimensions should be dim(input)-1. Data types supported: same as @p input
+ * @param[out] output Destination tensor. Data types supported: QASYMM8/QASYMM8_SIGNED for S32 @p input, or same as @p input
+ * @param[in] info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo.
+ */
+ void configure(const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, const SoftmaxKernelInfo &info);
+ /** Set the input and output tensors.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: S32/F16/F32. If this kernel is used for log softmax, only F32/F16 is supported.
+ * @param[in] sum Sum tensor. Dimensions should be dim(input)-1. Data types supported: same as @p input
+ * @param[out] output Destination tensor. Data types supported: QASYMM8/QASYMM8_SIGNED for S32 @p input, or same as @p input
+ * @param[in] info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, const SoftmaxKernelInfo &info);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLLogits1DNormKernel
+ *
+ * @param[in] input Source tensor. Data types supported: S32/F16/F32. If this kernel is used for log softmax, only F32/F16 is supported.
+ * @param[in] sum Sum tensor. Dimensions should be dim(input)-1. Data types supported: same as @p input
+ * @param[in] output Destination tensor. Data types supported: QASYMM8 for S32 @p input, or same as @p input
+ * @param[in] info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, const SoftmaxKernelInfo &info);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input;
+ const ICLTensor *_sum;
+ ICLTensor *_output;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLSOFTMAXLAYERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLSpaceToBatchLayerKernel.h"
+#include "src/core/CL/kernels/CLSpaceToBatchLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/ICLTensor.h"
--- /dev/null
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLSPACETOBATCHLAYERKERNEL_H
+#define ARM_COMPUTE_CLSPACETOBATCHLAYERKERNEL_H
+
+#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the space to batch kernel */
+class CLSpaceToBatchLayerKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLSpaceToBatchLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLSpaceToBatchLayerKernel(const CLSpaceToBatchLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLSpaceToBatchLayerKernel &operator=(const CLSpaceToBatchLayerKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLSpaceToBatchLayerKernel(CLSpaceToBatchLayerKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLSpaceToBatchLayerKernel &operator=(CLSpaceToBatchLayerKernel &&) = default;
+ /** Default destructor */
+ ~CLSpaceToBatchLayerKernel() = default;
+ /** Initialise the kernel's inputs and output.
+ *
+ * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
+ * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32
+ * @param[in] paddings 2-D tensor with shape [2, M]. Data types supported: S32
+ * @param[out] output Tensor output. Data types supported: same as @p input
+ */
+ void configure(const ICLTensor *input, const ICLTensor *block_shape, const ICLTensor *paddings, ICLTensor *output);
+ /** Initialise the kernel's inputs and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
+ * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32
+ * @param[in] paddings 2-D tensor with shape [2, M]. Data types supported: S32
+ * @param[out] output Tensor output. Data types supported: same as @p input
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *block_shape, const ICLTensor *paddings, ICLTensor *output);
+ /** Initialise the kernel's input and output. (Static block shape and paddings)
+ *
+ * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
+ * @param[in] block_shape_x Block shape x value.
+ * @param[in] block_shape_y Block shape y value.
+ * @param[in] padding_left The left padding of the output tensor.
+ * @param[in] padding_right The right padding of the output tensor.
+ * @param[out] output Tensor output. Data types supported: same as @p input
+ */
+ void configure(const ICLTensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, ICLTensor *output);
+ /** Initialise the kernel's input and output. (Static block shape and paddings)
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
+ * @param[in] block_shape_x Block shape x value.
+ * @param[in] block_shape_y Block shape y value.
+ * @param[in] padding_left The left padding of the output tensor.
+ * @param[in] padding_right The right padding of the output tensor.
+ * @param[out] output Tensor output. Data types supported: same as @p input
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right,
+ ICLTensor *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLSpaceToBatchLayerKernel
+ *
+ * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
+ * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32
+ * @param[in] paddings 2-D tensor with shape [2, M]. Data types supported: S32
+ * @param[in] output Tensor output. Data types supported: same as @p input
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *paddings, const ITensorInfo *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLSpaceToBatchLayerKernel (Static block shape and paddings)
+ *
+ * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
+ * @param[in] block_shape_x Block shape x value.
+ * @param[in] block_shape_y Block shape y value.
+ * @param[in] padding_left The left padding of the output tensor.
+ * @param[in] padding_right The right padding of the output tensor.
+ * @param[in] output Tensor output. Data types supported: same as @p input
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input; /**< Source tensor */
+ const ICLTensor *_block_shape; /**< Block shape tensor */
+ const ICLTensor *_paddings; /**< Paddings tensor */
+ ICLTensor *_output; /**< Destination tensor */
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CLSPACETOBATCHLAYERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLSpaceToDepthLayerKernel.h"
+#include "src/core/CL/kernels/CLSpaceToDepthLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/ICLTensor.h"
--- /dev/null
+/*
+ * Copyright (c) 2019-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLSPACETODEPTHLAYERKERNEL_H
+#define ARM_COMPUTE_CLSPACETODEPTHLAYERKERNEL_H
+
+#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the space to depth kernel */
+class CLSpaceToDepthLayerKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLSpaceToDepthLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLSpaceToDepthLayerKernel(const CLSpaceToDepthLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLSpaceToDepthLayerKernel &operator=(const CLSpaceToDepthLayerKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLSpaceToDepthLayerKernel(CLSpaceToDepthLayerKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLSpaceToDepthLayerKernel &operator=(CLSpaceToDepthLayerKernel &&) = default;
+ /** Default destructor */
+ ~CLSpaceToDepthLayerKernel() = default;
+ /** Initialise the kernel's inputs and output.
+ *
+ * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
+ * @param[out] output Tensor output. Data types supported: same as @p input
+ * @param[in] block_shape Block shape value.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, int32_t block_shape);
+ /** Initialise the kernel's inputs and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
+ * @param[out] output Tensor output. Data types supported: same as @p input
+ * @param[in] block_shape Block shape value.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t block_shape);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLSpaceToDepthLayerKernel.
+ *
+ * @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All.
+ * @param[in] output Tensor output info. Data types supported: same as @p input
+ * @param[in] block_shape Block shape value.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input; /**< Source tensor */
+ ICLTensor *_output; /**< Destination tensor */
+ int32_t _block_shape; /**< Block shape */
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CLSPACETODEPTHLAYERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLStackLayerKernel.h"
+#include "src/core/CL/kernels/CLStackLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ARM_COMPUTE_CLSTACKLAYERKERNEL_H
+#define ARM_COMPUTE_CLSTACKLAYERKERNEL_H
+
+#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** OpenCL kernel to stacks a rank-R tensor into one with rank-(R+1) along the axis dimension.*/
+class CLStackLayerKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLStackLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLStackLayerKernel(const CLStackLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLStackLayerKernel &operator=(const CLStackLayerKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLStackLayerKernel(CLStackLayerKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLStackLayerKernel &operator=(CLStackLayerKernel &&) = default;
+ /** Default destructor */
+ ~CLStackLayerKernel() = default;
+ /** Initialise the kernel's inputs and output
+ *
+ * @note Supported input tensor rank: up to 4
+ *
+ * @param[in] input Input tensor. Data types supported: All.
+ * @param[in] axis The dimension to stack the tensors along. It must be smaller than the number of input dimensions.
+ * @param[in] idx_input Index of the input tensor in the list of tensors to stack.
+ * All tensors in the list must have the same shape
+ * @param[in] num_tensors Number of tensors to stack
+ * @param[out] output Output tensor. Data types supported: Same as @p input.
+ *
+ */
+ void configure(const ICLTensor *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, ICLTensor *output);
+ /** Initialise the kernel's inputs and output
+ *
+ * @note Supported input tensor rank: up to 4
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Input tensor. Data types supported: All.
+ * @param[in] axis The dimension to stack the tensors along. It must be smaller than the number of input dimensions.
+ * @param[in] idx_input Index of the input tensor in the list of tensors to stack.
+ * All tensors in the list must have the same shape
+ * @param[in] num_tensors Number of tensors to stack
+ * @param[out] output Output tensor. Data types supported: Same as @p input.
+ *
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, ICLTensor *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLStackLayerKernel
+ *
+ * @note Supported input tensor rank: up to 4
+ *
+ * @param[in] input Input tensor info. Data types supported: All.
+ * @param[in] axis The dimension to stack the tensors along. It must be smaller than the number of input dimensions.
+ * @param[in] idx_input Index of the input tensor in the list of tensors to stack
+ * All tensors in the list must have the same shape
+ * @param[in] num_tensors Number of tensors to stack
+ * @param[in] output Output tensor info. Data types supported: Same as @p input.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CLSTACKLAYERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLStridedSliceKernel.h"
+#include "src/core/CL/kernels/CLStridedSliceKernel.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/utils/helpers/tensor_transform.h"
--- /dev/null
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CL_STRIDED_SLICE_KERNEL_H
+#define ARM_COMPUTE_CL_STRIDED_SLICE_KERNEL_H
+
+#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+/** Interface for the kernel to perform tensor strided slicing */
+class CLStridedSliceKernel : public ICLKernel
+{
+public:
+ /** Configure kernel
+ *
+ * @note Supported tensor rank: up to 4
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor info. Data type supported: All.
+ * @param[out] output Destination tensor info. Data type supported: Same as @p input
+ * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input).
+ * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input).
+ * @param[in] strides The strides of the dimensions of the input tensor to be sliced. The length must be of rank(input).
+ * @param[in] begin_mask If the ith bit of begin_mask is set, starts[i] is ignored and the fullest possible range in that dimension is used instead.
+ * @param[in] end_mask If the ith bit of end_mask is set, ends[i] is ignored and the fullest possible range in that dimension is used instead.
+ * @param[in] shrink_axis_mask If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1.
+ * A slice of size 1 starting from starts[i] in the dimension must be preserved.
+ */
+ void configure(const CLCompileContext &compile_context, const ITensorInfo *input, ITensorInfo *output,
+ const Coordinates &starts, const Coordinates &ends, const BiStrides &strides,
+ int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask);
+
+ /** Static function to check if given info will lead to a valid configuration of @ref CLStridedSliceKernel
+ *
+ * @note Supported tensor rank: up to 4
+ *
+ * @param[in] input Source tensor. Data type supported: All.
+ * @param[in] output Destination tensor. Data type supported: Same as @p input
+ * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input).
+ * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input).
+ * @param[in] strides The strides of the dimensions of the input tensor to be sliced. The length must be of rank(input).
+ * @param[in] begin_mask If the ith bit of begin_mask is set, starts[i] is ignored and the fullest possible range in that dimension is used instead.
+ * @param[in] end_mask If the ith bit of end_mask is set, ends[i] is ignored and the fullest possible range in that dimension is used instead.
+ * @param[in] shrink_axis_mask If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1.
+ * A slice of size 1 starting from starts[i] in the dimension must be preserved.
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output,
+ const Coordinates &starts, const Coordinates &ends, const BiStrides &strides,
+ int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask);
+
+ // Inherited methods overridden:
+ void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CL_STRIDED_SLICE_KERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLTableLookupKernel.h"
+#include "src/core/CL/kernels/CLTableLookupKernel.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLLut.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLTABLELOOKUPKERNEL_H
+#define ARM_COMPUTE_CLTABLELOOKUPKERNEL_H
+
+#include "src/core/CL/ICLSimple2DKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+class ICLLut;
+
+/** Interface for the kernel to perform table lookup calculations. */
+class CLTableLookupKernel : public ICLSimple2DKernel
+{
+public:
+ /** Initialise the kernel's input, lut and output.
+ *
+ * @param[in] input An input tensor. Data types supported: U8, S16.
+ * @param[in] lut The input LUT. Data types supported: U8, S16.
+ * @param[out] output The output tensor. Data types supported: U8, S16.
+ */
+ void configure(const ICLTensor *input, const ICLLut *lut, ICLTensor *output);
+ /** Initialise the kernel's input, lut and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input An input tensor. Data types supported: U8, S16.
+ * @param[in] lut The input LUT. Data types supported: U8, S16.
+ * @param[out] output The output tensor. Data types supported: U8, S16.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLLut *lut, ICLTensor *output);
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CLTABLELOOKUPKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLThresholdKernel.h"
+#include "src/core/CL/kernels/CLThresholdKernel.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLTHRESHOLDKERNEL_H
+#define ARM_COMPUTE_CLTHRESHOLDKERNEL_H
+
+#include "arm_compute/core/KernelDescriptors.h"
+#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLSimple2DKernel.h"
+
+namespace arm_compute
+{
+// Forward declarations
+class ICLTensor;
+
+/** Interface for the thresholding kernel. */
+class CLThresholdKernel : public ICLSimple2DKernel
+{
+public:
+ /**Initialise the kernel's input, output and threshold parameters.
+ *
+ * @param[in] input An input tensor. Data types supported: U8
+ * @param[out] output The output tensor. Data types supported: U8.
+ * @param[in] info Threshold descriptor
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, const ThresholdKernelInfo &info);
+ /**Initialise the kernel's input, output and threshold parameters.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input An input tensor. Data types supported: U8
+ * @param[out] output The output tensor. Data types supported: U8.
+ * @param[in] info Threshold descriptor
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ThresholdKernelInfo &info);
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_NETHRESHOLDKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLTileKernel.h"
+#include "src/core/CL/kernels/CLTileKernel.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "src/core/helpers/AutoConfiguration.h"
--- /dev/null
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLTILEKERNEL_H
+#define ARM_COMPUTE_CLTILEKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** OpenCL kernel to perform a Tile operation */
+class CLTileKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLTileKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLTileKernel(const CLTileKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLTileKernel &operator=(const CLTileKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLTileKernel(CLTileKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLTileKernel &operator=(CLTileKernel &&) = default;
+ /** Default destructor */
+ ~CLTileKernel() = default;
+ /** Set the source, destination of the kernel
+ *
+ * @param[in] input Source tensor. Data type supported: All.
+ * @param[in] multiples Contains the number of times the input tensor should be replicated on the given dimension.
+ * Cannot have more than 4 elements (tiling in dimensions greater than 4 is not supported).
+ * @param[out] output Destination tensor. Same as @p input
+ *
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, const Multiples &multiples);
+ /** Set the source, destination of the kernel
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data type supported: All.
+ * @param[in] multiples Contains the number of times the input tensor should be replicated on the given dimension.
+ * Cannot have more than 4 elements (tiling in dimensions greater than 4 is not supported).
+ * @param[out] output Destination tensor. Same as @p input
+ *
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Multiples &multiples);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLTileKernel
+ *
+ * @param[in] input Source tensor info. Data type supported: All.
+ * @param[in] multiples Contains the number of times the input tensor should be replicated on the given dimension.
+ * Cannot have more than 4 elements (tiling in dimensions greater than 4 is not supported).
+ * @param[in] output Destination tensor info. Same as @p input
+ *
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Multiples &multiples);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLTILEKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLTransposeKernel.h"
+#include "src/core/CL/kernels/CLTransposeKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLTRANSPOSEKERNEL_H
+#define ARM_COMPUTE_CLTRANSPOSEKERNEL_H
+
+#include "src/core/CL/ICLSimple2DKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** OpenCL kernel which transposes the elements of a matrix.
+ *
+ * [width, height, batch] -> [height, width, batch]
+ *
+ */
+class CLTransposeKernel : public ICLSimple2DKernel
+{
+public:
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] input Input tensor. Data types supported: All.
+ * @param[out] output Output tensor. Data type supported: Same as @p input
+ */
+ void configure(const ICLTensor *input, ICLTensor *output);
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Input tensor. Data types supported: All.
+ * @param[out] output Output tensor. Data type supported: Same as @p input
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLTransposeKernel
+ *
+ * @param[in] input Input tensor. Data types supported: All.
+ * @param[in] output Output tensor. Data type supported: Same as @p input
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output);
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CLTRANSPOSEKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLUpsampleLayerKernel.h"
+#include "src/core/CL/kernels/CLUpsampleLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLUPSAMPLELAYERKERNEL_H
+#define ARM_COMPUTE_CLUPSAMPLELAYERKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the UpsampleLayer kernel on OpenCL. */
+class CLUpsampleLayerKernel : public ICLKernel
+{
+public:
+ /** Constructor */
+ CLUpsampleLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLUpsampleLayerKernel(const CLUpsampleLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLUpsampleLayerKernel &operator=(const CLUpsampleLayerKernel &) = delete;
+ /** Default Move Constructor. */
+ CLUpsampleLayerKernel(CLUpsampleLayerKernel &&) = default;
+ /** Default move assignment operator */
+ CLUpsampleLayerKernel &operator=(CLUpsampleLayerKernel &&) = default;
+ /** Default destructor */
+ ~CLUpsampleLayerKernel() = default;
+
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] input Source tensor. Data types supported: All.
+ * @param[out] output Destination tensor. Data types supported: same as @p input.
+ * @param[in] info Contains stride information described in @ref Size2D.
+ * @param[in] upsampling_policy Defines the policy to fill the intermediate pixels.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, const Size2D &info, const InterpolationPolicy upsampling_policy);
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: All.
+ * @param[out] output Destination tensor. Data types supported: same as @p input.
+ * @param[in] info Contains stride information described in @ref Size2D.
+ * @param[in] upsampling_policy Defines the policy to fill the intermediate pixels.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Size2D &info, const InterpolationPolicy upsampling_policy);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLUpsampleLayerKernel
+ *
+ * @param[in] input Source tensor info. Data types supported: All.
+ * @param[in] output Destination tensor info. Data types supported: same as @p input.
+ * @param[in] info Contains stride information described in @ref Size2D.
+ * @param[in] upsampling_policy Defines the policy to fill the intermediate pixels.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &info, const InterpolationPolicy upsampling_policy);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+ Size2D _info;
+ DataLayout _data_layout;
+ unsigned int _num_elems_processed_per_iteration_input_x;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLUPSAMPLELAYERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLWarpAffineKernel.h"
+#include "src/core/CL/kernels/CLWarpAffineKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLWARPAFFINEKERNEL_H
+#define ARM_COMPUTE_CLWARPAFFINEKERNEL_H
+
+#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLSimple2DKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the warp affine kernel.*/
+class CLWarpAffineKernel : public ICLSimple2DKernel
+{
+public:
+ /** Initialize the function's source, destination, interpolation policy and border_mode.
+ *
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[out] output Destination tensor, Data types supported: U8.
+ * @param[in] matrix The perspective matrix. Must be 2x3 of type float
+ * The matrix argument requires 9 values, the last 3 values are ignored.
+ * @param[in] policy The interpolation type.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy);
+ /** Initialize the function's source, destination, interpolation policy and border_mode.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[out] output Destination tensor, Data types supported: U8.
+ * @param[in] matrix The perspective matrix. Must be 2x3 of type float
+ * The matrix argument requires 9 values, the last 3 values are ignored.
+ * @param[in] policy The interpolation type.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy);
+
+ // Inherited methods overridden:
+ BorderSize border_size() const override;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLWARPAFFINEKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLWarpPerspectiveKernel.h"
+#include "src/core/CL/kernels/CLWarpPerspectiveKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2016-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLWARPERSPECTIVEKERNEL_H
+#define ARM_COMPUTE_CLWARPERSPECTIVEKERNEL_H
+
+#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLSimple2DKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+/** Interface for the warp perspective kernel.*/
+class CLWarpPerspectiveKernel : public ICLSimple2DKernel
+{
+public:
+ /** Initialize the function's source, destination, interpolation policy and border_mode.
+ *
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[out] output Destination tensor, Data types supported: U8.
+ * @param[in] matrix The perspective matrix. Must be 3x3 of type float.
+ * @param[in] policy The interpolation type.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy);
+ /** Initialize the function's source, destination, interpolation policy and border_mode.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[out] output Destination tensor, Data types supported: U8.
+ * @param[in] matrix The perspective matrix. Must be 3x3 of type float.
+ * @param[in] policy The interpolation type.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy);
+
+ // Inherited methods overridden:
+ BorderSize border_size() const override;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLWARPERSPECTIVEKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h"
+#include "src/core/CL/kernels/CLWeightsReshapeKernel.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLWEIGHTSRESHAPEKERNEL_H
+#define ARM_COMPUTE_CLWEIGHTSRESHAPEKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+/** OpenCL kernel to perform reshaping on the weights used by convolution and locally connected layer
+ *
+ * Rearranges each 3-dimensional kernel to a single row leading to a matrix with linearized kernels.
+ * In combination with the @ref CLIm2ColKernel can transform a convolution to a matrix multiplication.
+ *
+ * For example assuming a 3D weight kernel of 3x3 dimensions and depth of 2 we have:
+ * @f[
+ * \left( \begin{array}{ccc}
+ * a000 & a001 & a002 \\
+ * a010 & a011 & a012 \\
+ * a020 & a021 & a022 \\
+ * \end{array} \right)
+ * \left( \begin{array}{ccc}
+ * a100 & a101 & a102 \\
+ * a110 & a111 & a112 \\
+ * a120 & a121 & a122 \\
+ * \end{array} \right)
+ * \rightarrow
+ * \left( \begin{array}{ccccccccc}
+ * a000 & a001 & a002 & a010 & a011 & a012 & a020 & a021 & a022 & a100 & a101 & a102 & a110 & a111 & a112 & a120 & a121 & a122 \\
+ * \end{array} \right)
+ * @f]
+ */
+class CLWeightsReshapeKernel : public ICLKernel
+{
+public:
+ /** Constructor.*/
+ CLWeightsReshapeKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLWeightsReshapeKernel(const CLWeightsReshapeKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLWeightsReshapeKernel &operator=(const CLWeightsReshapeKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLWeightsReshapeKernel(CLWeightsReshapeKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLWeightsReshapeKernel &operator=(CLWeightsReshapeKernel &&) = default;
+ /** Default destructor */
+ ~CLWeightsReshapeKernel() = default;
+ /** Set the input and output of the kernel.
+ *
+ * @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared,
+ * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared. Data types supported: All
+ * @param[in] biases The shared biases tensor to append. Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with
+ * dimensions [OFM, num_patches] if unshared. Data types supported: F16/F32, for quantized types this must be nullptr.
+ * @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types.
+ * @param[out] output The output tensor. Should be a 2D Tensor if there are no groups and the weights are not shared; a 3D Tensor otherwise.
+ * Data types supported: Same as @p input
+ * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout
+ * Number of groups greater than one are only supported for NCHW data layout, and the number of weights must be a multiple of it.
+ */
+ void configure(const ICLTensor *input, const ICLTensor *biases, ICLTensor *output, unsigned int num_groups = 1);
+ /** Set the input and output of the kernel.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared,
+ * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared. Data types supported: All
+ * @param[in] biases The shared biases tensor to append. Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with
+ * dimensions [OFM, num_patches] if unshared. Data types supported: F16/F32, for quantized types this must be nullptr.
+ * @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types.
+ * @param[out] output The output tensor. Should be a 2D Tensor if there are no groups and the weights are not shared; a 3D Tensor otherwise.
+ * Data types supported: Same as @p input
+ * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout
+ * Number of groups greater than one are only supported for NCHW data layout, and the number of weights must be a multiple of it.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *biases, ICLTensor *output, unsigned int num_groups = 1);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLWeightsReshapeKernel
+ *
+ * @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared,
+ * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared. Data types supported: All
+ * @param[in] biases The shared biases tensor to append. Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with
+ * dimensions [OFM, num_patches] if unshared. Data types supported: F16/F32, for quantized types this must be nullptr.
+ * @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types.
+ * @param[in] output The output tensor. Should be a 2D Tensor if there are no groups and the weights are not shared; a 3D Tensor otherwise.
+ * Data types supported: Same as @p input
+ * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout
+ * Number of groups greater than one are only supported for NCHW data layout, and the number of weights must be a multiple of it.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *biases, const ITensorInfo *output, unsigned int num_groups = 1);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input;
+ const ICLTensor *_biases;
+ ICLTensor *_output;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLWEIGHTSRESHAPEKERNEL_H */
\ No newline at end of file
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h"
+#include "src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ARM_COMPUTE_CLWIDTHCONCATENATE_2TENSORS_KERNEL_H
+#define ARM_COMPUTE_CLWIDTHCONCATENATE_2TENSORS_KERNEL_H
+
+#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+/** Interface for the width concatenate kernel of 2 tensors.
+ * The input1 and input2 tensors will be concatenated into the output tensor.
+ */
+class CLWidthConcatenate2TensorsKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLWidthConcatenate2TensorsKernel() = default;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLWidthConcatenate2TensorsKernel(const CLWidthConcatenate2TensorsKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLWidthConcatenate2TensorsKernel &operator=(const CLWidthConcatenate2TensorsKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLWidthConcatenate2TensorsKernel(CLWidthConcatenate2TensorsKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLWidthConcatenate2TensorsKernel &operator=(CLWidthConcatenate2TensorsKernel &&) = default;
+ /** Default destructor */
+ ~CLWidthConcatenate2TensorsKernel() = default;
+ /** Initialise the kernel's input1s and output
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input1 First input tensor. Data types supported: All.
+ * @param[in] input2 Second input tensor. Data types supported: same as @p input1
+ * @param[out] output Output tensor. Data types supported: Same as @p input1.
+ */
+ void configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLWidthConcatenate2TensorsKernel
+ *
+ * @param[in] input1 First tensor info. Data types supported: All.
+ * @param[in] input2 Second tensor info. Data types supported: same as @p input1
+ * @param[in] output Output tensor info. Data types supported: Same as @p input1.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CLWIDTHCONCATENATE_2TENSORS_KERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h"
+#include "src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ARM_COMPUTE_CLWIDTHCONCATENATE_4TENSORS_KERNEL_H
+#define ARM_COMPUTE_CLWIDTHCONCATENATE_4TENSORS_KERNEL_H
+
+#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+/** Interface for the width concatenate kernel of 4 tensors.
+ * All input tensors will be concatenated into the output tensor.
+ */
+class CLWidthConcatenate4TensorsKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLWidthConcatenate4TensorsKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLWidthConcatenate4TensorsKernel(const CLWidthConcatenate4TensorsKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLWidthConcatenate4TensorsKernel &operator=(const CLWidthConcatenate4TensorsKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLWidthConcatenate4TensorsKernel(CLWidthConcatenate4TensorsKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLWidthConcatenate4TensorsKernel &operator=(CLWidthConcatenate4TensorsKernel &&) = default;
+ /** Default destructor */
+ ~CLWidthConcatenate4TensorsKernel() = default;
+ /** Initialise the kernel's input1s and output
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input1 First input tensor. Data types supported: All.
+ * @param[in] input2 Second input tensor. Data types supported: same as @p input1
+ * @param[in] input3 Third input tensor. Data types supported: same as @p input1
+ * @param[in] input4 Fourth input tensor. Data types supported: same as @p input1
+ * @param[out] output Output tensor. Data types supported: Same as @p input1.
+ */
+ void configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *input3, ITensorInfo *input4, ITensorInfo *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLWidthConcatenate4TensorsKernel
+ *
+ * @param[in] input1 First tensor info. Data types supported: All.
+ * @param[in] input2 Second tensor info. Data types supported: same as @p input1
+ * @param[in] input3 Third tensor info. Data types supported: same as @p input1
+ * @param[in] input4 Fourth tensor info. Data types supported: same as @p input1
+ * @param[in] output Output tensor info. Data types supported: Same as @p input1.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *input3, const ITensorInfo *input4, const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CLWIDTHCONCATENATE_4TENSORS_KERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h"
+#include "src/core/CL/kernels/CLWidthConcatenateLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ARM_COMPUTE_CLWIDTHCONCATENATELAYERKERNEL_H
+#define ARM_COMPUTE_CLWIDTHCONCATENATELAYERKERNEL_H
+
+#include "arm_compute/core/Types.h"
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+/** Interface for the width concatenate kernel.
+ * The input tensor will be concatenated into the output tensor.
+ */
+class CLWidthConcatenateLayerKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLWidthConcatenateLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLWidthConcatenateLayerKernel(const CLWidthConcatenateLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLWidthConcatenateLayerKernel &operator=(const CLWidthConcatenateLayerKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLWidthConcatenateLayerKernel(CLWidthConcatenateLayerKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLWidthConcatenateLayerKernel &operator=(CLWidthConcatenateLayerKernel &&) = default;
+ /** Default destructor */
+ ~CLWidthConcatenateLayerKernel() = default;
+ /** Initialise the kernel's inputs and output
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Input tensor. Data types supported: All.
+ * @param[in] width_offset The offset on the X axis.
+ * @param[in,out] output Output tensor. Data types supported: Same as @p input.
+ *
+ */
+ void configure(const CLCompileContext &compile_context, ITensorInfo *input, unsigned int width_offset, ITensorInfo *output);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLWidthConcatenateLayerKernel
+ *
+ * @param[in] input Input tensor info. Data types supported: All.
+ * @param[in] width_offset The offset on the X axis.
+ * @param[in] output Output tensor info. Data types supported: Same as @p input.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, unsigned int width_offset, const ITensorInfo *output);
+
+ // Inherited methods overridden:
+ void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override;
+};
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CLWIDTHCONCATENATELAYERKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h"
+#include "src/core/CL/kernels/CLWinogradFilterTransformKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
const unsigned int num_elems_processed_per_iteration_y = input->dimension(1);
const unsigned int num_elems_read_per_iteration_z = input->data_layout() == DataLayout::NCHW ? 1 : input->dimension(2);
- Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y, num_elems_read_per_iteration_z));
+ Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y, num_elems_read_per_iteration_z));
Window win_collapsed = win.collapse(win, Window::DimZ);
return std::make_pair(Status{}, win_collapsed);
}
--- /dev/null
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLWINOGRADFILTERTRANSFORMKERNEL_H
+#define ARM_COMPUTE_CLWINOGRADFILTERTRANSFORMKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the Winograd filter transform kernel. */
+class CLWinogradFilterTransformKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLWinogradFilterTransformKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLWinogradFilterTransformKernel(const CLWinogradFilterTransformKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLWinogradFilterTransformKernel &operator=(const CLWinogradFilterTransformKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLWinogradFilterTransformKernel(CLWinogradFilterTransformKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLWinogradFilterTransformKernel &operator=(CLWinogradFilterTransformKernel &&) = default;
+ /** Default destructor */
+ ~CLWinogradFilterTransformKernel() = default;
+ /** Set the input and output tensor.
+ *
+ * @note Winograd filter transform supports the following configurations for NCWH data layout
+ * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3),
+ * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
+ * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
+ *
+ * @note Winograd filter transform supports the following configurations for NHWC data layout
+ * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
+ * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
+ *
+ * Strides: only unit strides
+ *
+ * @param[in] input Source tensor. The input is a 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] (NCHW data layout) or [IFM, kernel_x, kernel_y, OFM] (NHWC data layout). Data types supported: F16/F32.
+ * @param[out] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_filter_transform_shape. Data types supported: Same as @p input
+ * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info);
+ /** Set the input and output tensor.
+ *
+ * @note Winograd filter transform supports the following configurations for NCWH data layout
+ * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3),
+ * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
+ * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
+ *
+ * @note Winograd filter transform supports the following configurations for NHWC data layout
+ * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
+ * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
+ *
+ * Strides: only unit strides
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. The input is a 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] (NCHW data layout) or [IFM, kernel_x, kernel_y, OFM] (NHWC data layout). Data types supported: F16/F32.
+ * @param[out] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_filter_transform_shape. Data types supported: Same as @p input
+ * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLWinogradFilterTransformKernel
+ *
+ * @note Winograd filter transform supports the following configurations for NCWH data layout
+ * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3),
+ * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
+ * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
+ *
+ * @note Winograd filter transform supports the following configurations for NHWC data layout
+ * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
+ * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
+ *
+ * Strides: only unit strides
+ *
+ * @param[in] input Source tensor. The input is a 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] (NCHW data layout) or [IFM, kernel_x, kernel_y, OFM] (NHWC data layout). Data types supported: F16/F32.
+ * @param[out] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_filter_transform_shape. Data types supported: Same as @p input
+ * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ const ICLTensor *_input;
+ ICLTensor *_output;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLWINOGRADFILTERTRANSFORMKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLWinogradInputTransformKernel.h"
+#include "src/core/CL/kernels/CLWinogradInputTransformKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLWINOGRADINPUTTRANSFORMKERNEL_H
+#define ARM_COMPUTE_CLWINOGRADINPUTTRANSFORMKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** OpenCL kernel to perform Winograd input transform.*/
+class CLWinogradInputTransformKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLWinogradInputTransformKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLWinogradInputTransformKernel(const CLWinogradInputTransformKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLWinogradInputTransformKernel &operator=(const CLWinogradInputTransformKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLWinogradInputTransformKernel(CLWinogradInputTransformKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLWinogradInputTransformKernel &operator=(CLWinogradInputTransformKernel &&) = default;
+ /** Set the input and output of the kernel.
+ *
+ * @note Winograd input transform supports the following configurations for NCWH data layout
+ * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3),
+ * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
+ * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
+ *
+ * @note Winograd input transform supports the following configurations for NHWC data layout
+ * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
+ * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
+ *
+ * Strides: only unit strides
+ *
+ * @param[in] input The input tensor to transform. Data types supported: F16/F32
+ * @param[in] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_input_transform_shape. Data types supported: Same as @p input
+ * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo.
+ */
+ void configure(const ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info);
+ /** Set the input and output of the kernel.
+ *
+ * @note Winograd input transform supports the following configurations for NCWH data layout
+ * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3),
+ * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
+ * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
+ *
+ * @note Winograd input transform supports the following configurations for NHWC data layout
+ * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
+ * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
+ *
+ * Strides: only unit strides
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input The input tensor to transform. Data types supported: F16/F32
+ * @param[in] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_input_transform_shape. Data types supported: Same as @p input
+ * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLWinogradInputTransformKernel
+ *
+ * @note Winograd input transform supports the following configurations for NCWH data layout
+ * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3),
+ * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
+ * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
+ *
+ * @note Winograd input transform supports the following configurations for NHWC data layout
+ * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
+ * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
+ *
+ * Strides: only unit strides
+ *
+ * @param[in] input The input tensor to transform. Data types supported: F16/F32
+ * @param[in] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_input_transform_shape. Data types supported: Same as @p input
+ * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+ BorderSize border_size() const override;
+
+private:
+ using WinogradKey = std::pair<std::pair<int, int>, std::pair<int, int>>;
+
+ BorderSize _border_size;
+ const ICLTensor *_input;
+ ICLTensor *_output;
+ DataLayout _data_layout;
+ int _num_tiles_x;
+ int _num_tiles_y;
+ unsigned int _step_z;
+};
+} // arm_compute
+#endif /*ARM_COMPUTE_CLWINOGRADINPUTTRANSFORMKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLWinogradOutputTransformKernel.h"
+#include "src/core/CL/kernels/CLWinogradOutputTransformKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
--- /dev/null
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLWINOGRADOUTPUTTRANSFORMKERNEL_H
+#define ARM_COMPUTE_CLWINOGRADOUTPUTTRANSFORMKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the Winograd output transform kernel. */
+class CLWinogradOutputTransformKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLWinogradOutputTransformKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLWinogradOutputTransformKernel(const CLWinogradOutputTransformKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLWinogradOutputTransformKernel &operator=(const CLWinogradOutputTransformKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLWinogradOutputTransformKernel(CLWinogradOutputTransformKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLWinogradOutputTransformKernel &operator=(CLWinogradOutputTransformKernel &&) = default;
+ /** Default destructor */
+ ~CLWinogradOutputTransformKernel() = default;
+ /** Set the input and output tensor.
+ *
+ * @note Winograd output transform supports the following configurations for NCWH data layout
+ * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3),
+ * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
+ * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
+ *
+ * @note Winograd output transform supports the following configurations for NHWC data layout
+ * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
+ * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
+ *
+ * Strides: only unit strides
+ *
+ * @param[in] input Source tensor with shape [C, N, K, batches]. Data types supported: F16/F32.
+ * @param[in] bias Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. It can be a nullptr. Data type supported: as @p input
+ * @param[out] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_output_transform_shape. Data types supported: Same as @p input
+ * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
+ */
+ void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const WinogradInfo &winograd_info, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ /** Set the input and output tensor.
+ *
+ * @note Winograd output transform supports the following configurations for NCWH data layout
+ * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3),
+ * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
+ * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
+ *
+ * @note Winograd output transform supports the following configurations for NHWC data layout
+ * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
+ * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
+ *
+ * Strides: only unit strides
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor with shape [C, N, K, batches]. Data types supported: F16/F32.
+ * @param[in] bias Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. It can be a nullptr. Data type supported: as @p input
+ * @param[out] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_output_transform_shape. Data types supported: Same as @p input
+ * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
+ */
+ void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const WinogradInfo &winograd_info,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
+
+ /** Static function to check if given info will lead to a valid configuration of @ref CLWinogradOutputTransformKernel
+ *
+ * @note Winograd output transform supports the following configurations for NCWH data layout
+ * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3),
+ * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
+ * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
+ *
+ * @note Winograd output transform supports the following configurations for NHWC data layout
+ * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
+ * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
+ *
+ * Strides: only unit strides
+ *
+ * @param[in] input Source tensor with shape [C, N, K, batches]. Data types supported: F16/F32.
+ * @param[in] bias Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. It can be a nullptr. Data type supported: as @p input
+ * @param[out] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_output_transform_shape. Data types supported: Same as @p input
+ * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation @ref ActivationLayerInfo. Only RELU, BOUNDED_RELU, LU_BOUNDED_RELU, LEAKY_RELU and SOFT_RELU supported.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const WinogradInfo &winograd_info, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ using WinogradKey = std::pair<std::pair<int, int>, std::pair<int, int>>;
+
+ const ICLTensor *_input;
+ const ICLTensor *_bias;
+ ICLTensor *_output;
+ bool _is_nhwc;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLWINOGRADOUTPUTTRANSFORMKERNEL_H */
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLYOLOLayerKernel.h"
+#include "src/core/CL/kernels/CLYOLOLayerKernel.h"
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/CL/CLHelpers.h"
--- /dev/null
+/*
+ * Copyright (c) 2018-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CLYOLOLAYERKERNEL_H
+#define ARM_COMPUTE_CLYOLOLAYERKERNEL_H
+
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the YOLO layer kernel that performs partial activation.
+ * For each box, activate only:
+ * - x and y position (channel 0 and 1 of each box)
+ * - objectiveness (channel 4 of each box)
+ * - classes (channel 5 to (classes - 5) of each box)
+ */
+class CLYOLOLayerKernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ CLYOLOLayerKernel();
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLYOLOLayerKernel(const CLYOLOLayerKernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ CLYOLOLayerKernel &operator=(const CLYOLOLayerKernel &) = delete;
+ /** Allow instances of this class to be moved */
+ CLYOLOLayerKernel(CLYOLOLayerKernel &&) = default;
+ /** Allow instances of this class to be moved */
+ CLYOLOLayerKernel &operator=(CLYOLOLayerKernel &&) = default;
+ /** Default destructor */
+ ~CLYOLOLayerKernel() = default;
+ /** Set the input and output tensor.
+ *
+ * @note If the output tensor is a nullptr, the activation function will be performed in-place
+ *
+ * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result
+ * of the activation function. Data types supported: F16/F32.
+ * @param[out] output Destination tensor. Data type supported: same as @p input
+ * @param[in] act_info Activation layer information.
+ * @param[in] num_classes Number of classes to activate (must be submultiple of @p input channels)
+ */
+ void configure(ICLTensor *input, ICLTensor *output, const ActivationLayerInfo &act_info, int32_t num_classes);
+ /** Set the input and output tensor.
+ *
+ * @note If the output tensor is a nullptr, the activation function will be performed in-place
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result
+ * of the activation function. Data types supported: F16/F32.
+ * @param[out] output Destination tensor. Data type supported: same as @p input
+ * @param[in] act_info Activation layer information.
+ * @param[in] num_classes Number of classes to activate (must be submultiple of @p input channels)
+ */
+ void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const ActivationLayerInfo &act_info, int32_t num_classes);
+ /** Static function to check if given info will lead to a valid configuration of @ref CLYOLOLayerKernel
+ *
+ * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result
+ * of the activation function. Data types supported: F16/F32.
+ * @param[in] output Destination tensor info. Data type supported: same as @p input
+ * @param[in] act_info Activation layer information.
+ * @param[in] num_classes Number of classes to activate (must be submultiple of @p input channels)
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info, int32_t num_classes);
+
+ // Inherited methods overridden:
+ void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+ ICLTensor *_input;
+ ICLTensor *_output;
+ bool _run_in_place;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_CLYOLOLAYERKERNEL_H */
--- /dev/null
+/*
+ * Copyright (c) 2017-2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_ICLDEPTHWISECONVOLUTIONKERNEL3x3_H
+#define ARM_COMPUTE_ICLDEPTHWISECONVOLUTIONKERNEL3x3_H
+
+#include "src/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the kernel to run a 3x3 depthwise convolution on a tensor.
+ */
+class ICLDepthwiseConvolutionLayer3x3Kernel : public ICLKernel
+{
+public:
+ /** Default constructor */
+ ICLDepthwiseConvolutionLayer3x3Kernel()
+ : _border_size(0), _input(), _output(), _weights(), _biases(), _conv_stride_y(1), _output_multipliers(), _output_shifts(), _is_quantized(false)
+ {
+ }
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ ICLDepthwiseConvolutionLayer3x3Kernel(const ICLDepthwiseConvolutionLayer3x3Kernel &) = delete;
+ /** Prevent instances of this class from being copied (As this class contains pointers) */
+ ICLDepthwiseConvolutionLayer3x3Kernel &operator=(const ICLDepthwiseConvolutionLayer3x3Kernel &) = delete;
+ /** Default Move Constructor. */
+ ICLDepthwiseConvolutionLayer3x3Kernel(ICLDepthwiseConvolutionLayer3x3Kernel &&) = default;
+ /** Default move assignment operator */
+ ICLDepthwiseConvolutionLayer3x3Kernel &operator=(ICLDepthwiseConvolutionLayer3x3Kernel &&) = default;
+ /** Initialize the function's source, destination, conv and border_size.
+ *
+ * @param[in] input Source tensor. DataType supported: QASYMM8/F16/F32.
+ * @param[in] weights Weights tensor. A 3D tensor with dimensions [3, 3, IFM].
+ * Data type supported: Same as @p input, QASYMM8/QSYMM8_PER_CHANNEL when input is QASYMM8.
+ * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
+ * Data type supported: Same as @p input, S32 when input is QASYMM8.
+ * @param[out] output Destination tensor. Data type supported: Same as @p input.
+ * @param[in] conv_info Padding and stride information to use for the convolution.
+ * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported for QASYMM8.
+ * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+ * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization,
+ * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
+ * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization,
+ * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
+ */
+ virtual void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
+ unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U),
+ const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) = 0;
+ /** Initialize the function's source, destination, conv and border_size.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. DataType supported: QASYMM8/F16/F32.
+ * @param[in] weights Weights tensor. A 3D tensor with dimensions [3, 3, IFM].
+ * Data type supported: Same as @p input, QASYMM8/QSYMM8_PER_CHANNEL when input is QASYMM8.
+ * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
+ * Data type supported: Same as @p input, S32 when input is QASYMM8.
+ * @param[out] output Destination tensor. Data type supported: Same as @p input.
+ * @param[in] conv_info Padding and stride information to use for the convolution.
+ * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported for QASYMM8.
+ * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+ * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization,
+ * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
+ * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization,
+ * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
+ */
+ virtual void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
+ unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U),
+ const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) = 0;
+
+protected:
+ BorderSize _border_size;
+ const ICLTensor *_input;
+ ICLTensor *_output;
+ const ICLTensor *_weights;
+ const ICLTensor *_biases;
+ unsigned int _conv_stride_y;
+ const ICLTensor *_output_multipliers;
+ const ICLTensor *_output_shifts;
+ bool _is_quantized;
+};
+} // namespace arm_compute
+#endif /*ARM_COMPUTE_ICLDEPTHWISECONVOLUTIONKERNEL3x3_H */
#include "arm_compute/graph/backends/FunctionHelpers.h"
#include "arm_compute/runtime/CL/CLFunctions.h"
#include "arm_compute/runtime/CPP/CPPFunctions.h"
+#include "src/core/CL/CLKernels.h"
#include "support/Cast.h"
using namespace arm_compute::utils::cast;
#include "arm_compute/runtime/CL/CLFunctions.h"
#include "arm_compute/runtime/CPP/CPPFunctions.h"
+#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpReductionKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
+#include "src/core/CL/kernels/CLIm2ColKernel.h"
+#include "src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h"
+#include "src/core/CL/kernels/CLWeightsReshapeKernel.h"
#include "support/Cast.h"
using namespace arm_compute::utils::cast;
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "arm_compute/runtime/CL/ICLOperator.h"
+#include "src/core/CL/ICLKernel.h"
+
namespace arm_compute
{
namespace experimental
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/runtime/CL/CLHelpers.h"
#include "arm_compute/runtime/CL/CLTuner.h"
#include "arm_compute/runtime/CL/tuners/Tuners.h"
+#include "src/core/CL/ICLKernel.h"
namespace arm_compute
{
#include "arm_compute/runtime/CL/CLTuner.h"
#include "arm_compute/runtime/CL/tuners/CLLWSList.h"
-#include "arm_compute/core/CL/ICLKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/ICLKernel.h"
#include "support/StringSupport.h"
#include <cerrno>
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#include "arm_compute/core/Error.h"
#include "arm_compute/runtime/CL/CLHelpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/ICLKernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "support/MemorySupport.h"
using namespace arm_compute;
ICLSimpleFunction::ICLSimpleFunction(CLRuntimeContext *ctx) // NOLINT
: _kernel(),
- _border_handler(),
+ _border_handler(support::cpp14::make_unique<CLFillBorderKernel>()),
_ctx(ctx)
{
}
+ICLSimpleFunction::~ICLSimpleFunction() = default;
+
void ICLSimpleFunction::run()
{
ARM_COMPUTE_ERROR_ON_MSG(!_kernel, "The child class didn't set the CL kernel or function isn't configured");
- schedule_kernel_on_ctx(_ctx, &_border_handler, false);
+ schedule_kernel_on_ctx(_ctx, _border_handler.get(), false);
schedule_kernel_on_ctx(_ctx, _kernel.get());
}
*/
#include "arm_compute/runtime/CL/functions/CLAbsoluteDifference.h"
-#include "arm_compute/core/CL/kernels/CLAbsoluteDifferenceKernel.h"
+#include "src/core/CL/kernels/CLAbsoluteDifferenceKernel.h"
#include "support/MemorySupport.h"
#include <utility>
*/
#include "arm_compute/runtime/CL/functions/CLAccumulate.h"
-#include "arm_compute/core/CL/kernels/CLAccumulateKernel.h"
+#include "src/core/CL/kernels/CLAccumulateKernel.h"
#include "support/MemorySupport.h"
#include <utility>
#include "arm_compute/runtime/CL/functions/CLActivationLayer.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLActivationLayerKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLRuntimeContext.h"
+#include "src/core/CL/kernels/CLActivationLayerKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "src/core/CL/CLValidate.h"
+#include "src/core/CL/kernels/CLArgMinMaxLayerKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/runtime/Utils.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
{
}
+CLArgMinMaxLayer::~CLArgMinMaxLayer() = default;
+
Status CLArgMinMaxLayer::validate(const ITensorInfo *input, int axis, const ITensorInfo *output, const ReductionOperation &op)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(output_shape).set_data_type(output_data_type).reset_padding().set_is_resizable(true));
// Configure reduction operation kernels
- _reduction_kernels_vector.resize(_num_of_stages);
+ _reduction_kernels_vector.reserve(_num_of_stages);
+
+ auto add_reduction_kernel = [this, &compile_context, axis, op](const ICLTensor * input, const ICLTensor * prev_output, ICLTensor * output)
+ {
+ _reduction_kernels_vector.emplace_back(support::cpp14::make_unique<CLArgMinMaxLayerKernel>());
+ _reduction_kernels_vector.back()->configure(compile_context, input, prev_output, output, axis, op);
+ };
_memory_group.manage(&_not_reshaped_output);
// Create temporary tensors
if(_num_of_stages == 1)
{
- _reduction_kernels_vector[0].configure(compile_context, input, nullptr, &_not_reshaped_output, axis, op);
+ add_reduction_kernel(input, nullptr, &_not_reshaped_output);
}
else
{
// Apply ReductionOperation only on first kernel
_memory_group.manage(&_results_vector[0]);
- _reduction_kernels_vector[0].configure(compile_context, input, nullptr, &_results_vector[0], axis, op);
+ add_reduction_kernel(input, nullptr, &_results_vector[0]);
// Apply ReductionOperation on intermediate stages
for(unsigned int i = 1; i < _num_of_stages - 1; ++i)
{
_memory_group.manage(&_results_vector[i]);
- _reduction_kernels_vector[i].configure(compile_context, input, &_results_vector[i - 1], &_results_vector[i], axis, op);
+ add_reduction_kernel(input, &_results_vector[i - 1], &_results_vector[i]);
_results_vector[i - 1].allocator()->allocate();
}
// Apply ReductionOperation on the last stage
const unsigned int last_stage = _num_of_stages - 1;
- _reduction_kernels_vector[last_stage].configure(compile_context, input, &_results_vector[last_stage - 1], &_not_reshaped_output, axis, op);
+ add_reduction_kernel(input, &_results_vector[last_stage - 1], &_not_reshaped_output);
_results_vector[last_stage - 1].allocator()->allocate();
}
_reshape.configure(compile_context, &_not_reshaped_output, output);
for(unsigned int i = 0; i < _num_of_stages; ++i)
{
- CLScheduler::get().enqueue(_reduction_kernels_vector[i], false);
+ CLScheduler::get().enqueue(*_reduction_kernels_vector[i], false);
}
_reshape.run();
}
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "support/MemorySupport.h"
-using namespace arm_compute;
+#include "src/core/CL/kernels/CLBatchNormalizationLayerKernel.h"
+namespace arm_compute
+{
CLBatchNormalizationLayer::CLBatchNormalizationLayer()
- : _norm_kernel()
+ : _norm_kernel(support::cpp14::make_unique<CLBatchNormalizationLayerKernel>())
{
}
+CLBatchNormalizationLayer::~CLBatchNormalizationLayer() = default;
+
void CLBatchNormalizationLayer::configure(ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta, const ICLTensor *gamma, float epsilon,
ActivationLayerInfo act_info)
{
const ICLTensor *gamma, float epsilon,
ActivationLayerInfo act_info)
{
- _norm_kernel.configure(compile_context, input, output, mean, var, beta, gamma, epsilon, act_info);
+ _norm_kernel->configure(compile_context, input, output, mean, var, beta, gamma, epsilon, act_info);
}
Status CLBatchNormalizationLayer::validate(const ITensorInfo *input, const ITensorInfo *output,
void CLBatchNormalizationLayer::run()
{
- CLScheduler::get().enqueue(_norm_kernel, true);
+ CLScheduler::get().enqueue(*_norm_kernel, true);
}
+} // namespace arm_compute
\ No newline at end of file
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
-using namespace arm_compute;
+#include "src/core/CL/kernels/CLBatchToSpaceLayerKernel.h"
+#include "support/MemorySupport.h"
+namespace arm_compute
+{
CLBatchToSpaceLayer::CLBatchToSpaceLayer()
- : _batch_to_space_kernel()
+ : _batch_to_space_kernel(support::cpp14::make_unique<CLBatchToSpaceLayerKernel>())
{
}
+CLBatchToSpaceLayer::~CLBatchToSpaceLayer() = default;
+
void CLBatchToSpaceLayer::configure(const ICLTensor *input, const ICLTensor *block_shape, ICLTensor *output)
{
configure(CLKernelLibrary::get().get_compile_context(), input, block_shape, output);
void CLBatchToSpaceLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *block_shape, ICLTensor *output)
{
- _batch_to_space_kernel.configure(compile_context, input, block_shape, output);
+ _batch_to_space_kernel->configure(compile_context, input, block_shape, output);
}
void CLBatchToSpaceLayer::configure(const ICLTensor *input, int32_t block_shape_x, int32_t block_shape_y, ICLTensor *output)
void CLBatchToSpaceLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, int32_t block_shape_x, int32_t block_shape_y, ICLTensor *output)
{
- _batch_to_space_kernel.configure(compile_context, input, block_shape_x, block_shape_y, output);
+ _batch_to_space_kernel->configure(compile_context, input, block_shape_x, block_shape_y, output);
}
Status CLBatchToSpaceLayer::validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *output)
void CLBatchToSpaceLayer::run()
{
- CLScheduler::get().enqueue(_batch_to_space_kernel, true);
+ CLScheduler::get().enqueue(*_batch_to_space_kernel, true);
}
+} // namespace arm_compute
*/
#include "arm_compute/runtime/CL/functions/CLBitwiseAnd.h"
-#include "arm_compute/core/CL/kernels/CLBitwiseAndKernel.h"
+#include "src/core/CL/kernels/CLBitwiseAndKernel.h"
#include "support/MemorySupport.h"
#include <utility>
*/
#include "arm_compute/runtime/CL/functions/CLBitwiseNot.h"
-#include "arm_compute/core/CL/kernels/CLBitwiseNotKernel.h"
+#include "src/core/CL/kernels/CLBitwiseNotKernel.h"
#include "support/MemorySupport.h"
#include <utility>
*/
#include "arm_compute/runtime/CL/functions/CLBitwiseOr.h"
-#include "arm_compute/core/CL/kernels/CLBitwiseOrKernel.h"
+#include "src/core/CL/kernels/CLBitwiseOrKernel.h"
#include "support/MemorySupport.h"
#include <utility>
*/
#include "arm_compute/runtime/CL/functions/CLBitwiseXor.h"
-#include "arm_compute/core/CL/kernels/CLBitwiseXorKernel.h"
+#include "src/core/CL/kernels/CLBitwiseXorKernel.h"
#include "support/MemorySupport.h"
#include <utility>
*/
#include "arm_compute/runtime/CL/functions/CLBoundingBoxTransform.h"
-#include "arm_compute/core/CL/kernels/CLBoundingBoxTransformKernel.h"
+#include "src/core/CL/kernels/CLBoundingBoxTransformKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
*/
#include "arm_compute/runtime/CL/functions/CLBox3x3.h"
-#include "arm_compute/core/CL/kernels/CLBox3x3Kernel.h"
#include "arm_compute/core/PixelValue.h"
+#include "src/core/CL/kernels/CLBox3x3Kernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
#include "support/MemorySupport.h"
#include <utility>
auto k = arm_compute::support::cpp14::make_unique<CLBox3x3Kernel>();
k->configure(compile_context, input, output, border_mode == BorderMode::UNDEFINED);
_kernel = std::move(k);
- _border_handler.configure(compile_context, input, BorderSize(1), border_mode, PixelValue(constant_border_value));
+ _border_handler->configure(compile_context, input, BorderSize(1), border_mode, PixelValue(constant_border_value));
}
#include "arm_compute/runtime/CL/functions/CLSobel3x3.h"
#include "arm_compute/runtime/CL/functions/CLSobel5x5.h"
#include "arm_compute/runtime/CL/functions/CLSobel7x7.h"
+#include "src/core/CL/kernels/CLCannyEdgeKernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLSobel5x5Kernel.h"
+#include "src/core/CL/kernels/CLSobel7x7Kernel.h"
#include "support/MemorySupport.h"
using namespace arm_compute;
CLCannyEdge::CLCannyEdge(std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
: _memory_group(std::move(memory_manager)),
_sobel(),
- _gradient(),
- _border_mag_gradient(),
- _non_max_suppr(),
- _edge_trace(),
+ _gradient(support::cpp14::make_unique<CLGradientKernel>()),
+ _border_mag_gradient(support::cpp14::make_unique<CLFillBorderKernel>()),
+ _non_max_suppr(support::cpp14::make_unique<CLEdgeNonMaxSuppressionKernel>()),
+ _edge_trace(support::cpp14::make_unique<CLEdgeTraceKernel>()),
_gx(),
_gy(),
_mag(),
{
}
+CLCannyEdge::~CLCannyEdge() = default;
+
void CLCannyEdge::configure(ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr, int32_t gradient_size, int32_t norm_type, BorderMode border_mode,
uint8_t constant_border_value)
{
_memory_group.manage(&_phase);
// Configure gradient
- _gradient.configure(compile_context, &_gx, &_gy, &_mag, &_phase, norm_type);
+ _gradient->configure(compile_context, &_gx, &_gy, &_mag, &_phase, norm_type);
// Allocate intermediate buffers
_gx.allocator()->allocate();
_memory_group.manage(&_nonmax);
// Configure non-maxima suppression
- _non_max_suppr.configure(compile_context, &_mag, &_phase, &_nonmax, lower_thr, border_mode == BorderMode::UNDEFINED);
+ _non_max_suppr->configure(compile_context, &_mag, &_phase, &_nonmax, lower_thr, border_mode == BorderMode::UNDEFINED);
// Allocate intermediate buffers
_phase.allocator()->allocate();
// Fill border around magnitude image as non-maxima suppression will access
// it. If border mode is undefined filling the border is a nop.
- _border_mag_gradient.configure(compile_context, &_mag, _non_max_suppr.border_size(), border_mode, constant_border_value);
+ _border_mag_gradient->configure(compile_context, &_mag, _non_max_suppr->border_size(), border_mode, constant_border_value);
// Allocate intermediate buffers
_mag.allocator()->allocate();
_memory_group.manage(&_l1_list_counter);
// Configure edge tracing
- _edge_trace.configure(compile_context, &_nonmax, output, upper_thr, lower_thr, &_visited, &_recorded, &_l1_stack, &_l1_list_counter);
+ _edge_trace->configure(compile_context, &_nonmax, output, upper_thr, lower_thr, &_visited, &_recorded, &_l1_stack, &_l1_list_counter);
// Allocate intermediate buffers
_visited.allocator()->allocate();
_sobel->run();
// Run phase and magnitude calculation
- CLScheduler::get().enqueue(_gradient, false);
+ CLScheduler::get().enqueue(*_gradient, false);
// Fill border before non-maxima suppression. Nop for border mode undefined.
- CLScheduler::get().enqueue(_border_mag_gradient, false);
+ CLScheduler::get().enqueue(*_border_mag_gradient, false);
// Run non max suppresion
_nonmax.clear(CLScheduler::get().queue());
- CLScheduler::get().enqueue(_non_max_suppr, false);
+ CLScheduler::get().enqueue(*_non_max_suppr, false);
// Clear temporary structures and run edge trace
_output->clear(CLScheduler::get().queue());
_recorded.clear(CLScheduler::get().queue());
_l1_list_counter.clear(CLScheduler::get().queue());
_l1_stack.clear(CLScheduler::get().queue());
- CLScheduler::get().enqueue(_edge_trace, true);
+ CLScheduler::get().enqueue(*_edge_trace, true);
}
*/
#include "arm_compute/runtime/CL/functions/CLCast.h"
-#include "arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h"
+#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
#include "support/MemorySupport.h"
#include <utility>
*/
#include "arm_compute/runtime/CL/functions/CLChannelCombine.h"
-#include "arm_compute/core/CL/kernels/CLChannelCombineKernel.h"
+#include "src/core/CL/kernels/CLChannelCombineKernel.h"
#include "support/MemorySupport.h"
#include <utility>
*/
#include "arm_compute/runtime/CL/functions/CLChannelExtract.h"
-#include "arm_compute/core/CL/kernels/CLChannelExtractKernel.h"
+#include "src/core/CL/kernels/CLChannelExtractKernel.h"
#include "support/MemorySupport.h"
#include <utility>
*/
#include "arm_compute/runtime/CL/functions/CLChannelShuffleLayer.h"
-#include "arm_compute/core/CL/kernels/CLChannelShuffleLayerKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/CL/kernels/CLChannelShuffleLayerKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
*/
#include "arm_compute/runtime/CL/functions/CLColorConvert.h"
-#include "arm_compute/core/CL/kernels/CLColorConvertKernel.h"
+#include "src/core/CL/kernels/CLColorConvertKernel.h"
#include "support/MemorySupport.h"
#include <utility>
#include "arm_compute/runtime/CL/functions/CLComparison.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLComparisonKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/CL/kernels/CLComparisonKernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
if(broadcasted_info->info()->dimension(0) == 1)
{
- _border_handler.configure(compile_context, broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE);
+ _border_handler->configure(compile_context, broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE);
}
}
}
if(broadcasted_info->info()->dimension(0) == 1)
{
- _border_handler.configure(compile_context, broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE);
+ _border_handler->configure(compile_context, broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE);
}
}
}
* SOFTWARE.
*/
#include "arm_compute/runtime/CL/functions/CLComputeAllAnchors.h"
+#include "src/core/CL/kernels/CLGenerateProposalsLayerKernel.h"
#include "support/MemorySupport.h"
*/
#include "arm_compute/runtime/CL/functions/CLConcatenateLayer.h"
-#include "arm_compute/core/CL/kernels/CLBatchConcatenateLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLHeightConcatenateLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h"
-#include "arm_compute/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h"
-#include "arm_compute/core/CL/kernels/CLWidthConcatenateLayerKernel.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLDepthConcatenateLayerKernel.h"
+#include "src/core/CL/kernels/CLHeightConcatenateLayerKernel.h"
+#include "src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h"
+#include "src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.h"
+#include "src/core/CL/kernels/CLWidthConcatenateLayerKernel.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Types.h"
+#include "src/core/CL/kernels/CLBatchConcatenateLayerKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "support/MemorySupport.h"
* SOFTWARE.
*/
#include "arm_compute/runtime/CL/functions/CLConvertFullyConnectedWeights.h"
+#include "src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
#include "support/MemorySupport.h"
#include "arm_compute/runtime/CL/functions/CLConvolution.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLConvolutionKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "arm_compute/runtime/ITensorAllocator.h"
+#include "src/core/CL/kernels/CLConvolutionKernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
#include "support/MemorySupport.h"
#include <utility>
auto k = arm_compute::support::cpp14::make_unique<CLConvolution3x3Kernel>();
k->configure(compile_context, input, output, conv, scale, border_mode == BorderMode::UNDEFINED);
_kernel = std::move(k);
- _border_handler.configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+ _border_handler->configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
}
template <unsigned int matrix_size>
CLConvolutionSquare<matrix_size>::CLConvolutionSquare(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _tmp(), _is_separable(false), _kernel_hor(), _kernel_vert(), _kernel(), _border_handler()
+ : _memory_group(std::move(memory_manager)), _tmp(), _is_separable(false), _kernel_hor(support::cpp14::make_unique<CLSeparableConvolutionHorKernel<matrix_size>>()),
+ _kernel_vert(support::cpp14::make_unique<CLSeparableConvolutionVertKernel<matrix_size>>()), _kernel(support::cpp14::make_unique<CLConvolutionKernel<matrix_size>>()),
+ _border_handler(support::cpp14::make_unique<CLFillBorderKernel>())
{
}
+template <unsigned int matrix_size>
+CLConvolutionSquare<matrix_size>::~CLConvolutionSquare() = default;
+
template <unsigned int matrix_size>
void CLConvolutionSquare<matrix_size>::configure(ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode,
uint8_t constant_border_value)
scale = calculate_matrix_scale(conv, matrix_size);
}
- _kernel_hor.configure(compile_context, input, &_tmp, conv_row.data(), border_mode == BorderMode::UNDEFINED);
- _kernel_vert.configure(compile_context, &_tmp, output, conv_col.data(), scale, border_mode == BorderMode::UNDEFINED, type_pair.second);
- _border_handler.configure(compile_context, input, _kernel_hor.border_size(), border_mode, PixelValue(constant_border_value));
+ _kernel_hor->configure(compile_context, input, &_tmp, conv_row.data(), border_mode == BorderMode::UNDEFINED);
+ _kernel_vert->configure(compile_context, &_tmp, output, conv_col.data(), scale, border_mode == BorderMode::UNDEFINED, type_pair.second);
+ _border_handler->configure(compile_context, input, _kernel_hor->border_size(), border_mode, PixelValue(constant_border_value));
// Allocate intermediate buffer
_tmp.allocator()->allocate();
}
else
{
- _kernel.configure(compile_context, input, output, conv, scale, border_mode == BorderMode::UNDEFINED);
- _border_handler.configure(compile_context, input, _kernel.border_size(), border_mode, PixelValue(constant_border_value));
+ _kernel->configure(compile_context, input, output, conv, scale, border_mode == BorderMode::UNDEFINED);
+ _border_handler->configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
}
}
template <unsigned int matrix_size>
void CLConvolutionSquare<matrix_size>::run()
{
- CLScheduler::get().enqueue(_border_handler);
+ CLScheduler::get().enqueue(*_border_handler);
if(_is_separable)
{
MemoryGroupResourceScope scope_mg(_memory_group);
- CLScheduler::get().enqueue(_kernel_hor, false);
- CLScheduler::get().enqueue(_kernel_vert);
+ CLScheduler::get().enqueue(*_kernel_hor, false);
+ CLScheduler::get().enqueue(*_kernel_vert);
}
else
{
- CLScheduler::get().enqueue(_kernel);
+ CLScheduler::get().enqueue(*_kernel);
}
}
auto k = arm_compute::support::cpp14::make_unique<CLConvolutionRectangleKernel>();
k->configure(compile_context, input, output, conv, rows, cols, scale, border_mode == BorderMode::UNDEFINED);
_kernel = std::move(k);
- _border_handler.configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+ _border_handler->configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
}
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
-
#include "support/MemorySupport.h"
#include <cmath>
{
}
+CLConvolutionLayer::~CLConvolutionLayer() = default;
+
void CLConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info,
const Size2D &dilation, const ActivationLayerInfo &act_info, bool enable_fast_math, unsigned int num_groups)
{
#include "arm_compute/runtime/CL/functions/CLCopy.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLCopyKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/CL/kernels/CLCopyKernel.h"
#include "support/MemorySupport.h"
#include <utility>
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLCopyKernel.h"
+#include "src/core/CL/kernels/CLCropKernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLMemsetKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
{
}
+CLCropResize::~CLCropResize() = default;
+
Status CLCropResize::validate(const ITensorInfo *input, ITensorInfo *boxes, ITensorInfo *box_ind, const ITensorInfo *output,
Coordinates2D crop_size, InterpolationPolicy method, float extrapolation_value)
{
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
-
#include "support/MemorySupport.h"
#include <cmath>
#include "arm_compute/core/Utils.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "arm_compute/runtime/CL/CLTensor.h"
+#include "src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h"
+#include "src/core/CL/kernels/CLMemsetKernel.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
CLDeconvolutionLayerUpsample::CLDeconvolutionLayerUpsample() // NOLINT
- : _upsample(),
- _memset(),
+ : _upsample(support::cpp14::make_unique<CLDeconvolutionLayerUpsampleKernel>()),
+ _memset(support::cpp14::make_unique<CLMemsetKernel>()),
_output(nullptr)
{
}
+CLDeconvolutionLayerUpsample::~CLDeconvolutionLayerUpsample() = default;
+
Status CLDeconvolutionLayerUpsample::validate(const ITensorInfo *input, const ITensorInfo *output, const PadStrideInfo &info)
{
return CLDeconvolutionLayerUpsampleKernel::validate(input, output, info);
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
_output = output;
- _memset.configure(compile_context, _output, PixelValue(0, _output->info()->data_type(), _output->info()->quantization_info()));
- _upsample.configure(compile_context, input, _output, info);
+ _memset->configure(compile_context, _output, PixelValue(0, _output->info()->data_type(), _output->info()->quantization_info()));
+ _upsample->configure(compile_context, input, _output, info);
}
void CLDeconvolutionLayerUpsample::run()
{
- CLScheduler::get().enqueue(_memset, false);
- CLScheduler::get().enqueue(_upsample, true);
+ CLScheduler::get().enqueue(*_memset, false);
+ CLScheduler::get().enqueue(*_upsample, true);
}
} // namespace arm_compute
*/
#include "arm_compute/runtime/CL/functions/CLDepthConvertLayer.h"
-#include "arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h"
+#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
#include "support/MemorySupport.h"
#include <utility>
*/
#include "arm_compute/runtime/CL/functions/CLDepthToSpaceLayer.h"
-#include "arm_compute/core/CL/kernels/CLDepthToSpaceLayerKernel.h"
+#include "src/core/CL/kernels/CLDepthToSpaceLayerKernel.h"
#include "support/MemorySupport.h"
#include <utility>
#include "arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h"
-#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h"
+#include "src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h"
+#include "src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h"
+#include "src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h"
+#include "src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h"
+#include "src/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerGeneric::CLDepthwiseConvolutionLayerGeneric(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(std::move(memory_manager)),
- _dwc_native_kernel(),
+ _dwc_native_kernel(support::cpp14::make_unique<CLDepthwiseConvolutionLayerNativeKernel>()),
_permute_input_to_nhwc(),
_permute_weights_to_nhwc(),
_permute_output_to_nchw(),
{
}
+CLDepthwiseConvolutionLayer::~CLDepthwiseConvolutionLayer() = default;
+
void CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerGeneric::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation)
{
dwc_weights_info.n0 = (depth_multiplier == 1) ? 8 : 1;
DWCKernelInfo dwc_info;
dwc_info.activation_info = act_info;
- _dwc_native_kernel.configure(compile_context, input_to_use, weights_to_use, biases, output_to_use,
- dwc_weights_info, dwc_info, conv_info, depth_multiplier, dilation,
- output_multipliers_to_use, output_shifts_to_use);
+ _dwc_native_kernel->configure(compile_context, input_to_use, weights_to_use, biases, output_to_use,
+ dwc_weights_info, dwc_info, conv_info, depth_multiplier, dilation,
+ output_multipliers_to_use, output_shifts_to_use);
if(_needs_permute)
{
{
_permute_input_to_nhwc.run();
}
- CLScheduler::get().enqueue(_dwc_native_kernel);
+ CLScheduler::get().enqueue(*_dwc_native_kernel);
if(_needs_permute)
{
_permute_output_to_nchw.run();
CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerInternal3x3::CLDepthwiseConvolutionLayerInternal3x3(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(std::move(memory_manager)),
_kernel(nullptr),
- _border_handler(),
+ _border_handler(support::cpp14::make_unique<CLFillBorderKernel>()),
_permute_input_to_nchw(),
_permute_weights_to_nchw(),
_permute_output_to_nhwc(),
- _reshape_weights(),
+ _reshape_weights(support::cpp14::make_unique<CLDepthwiseConvolutionLayerReshapeWeightsKernel>()),
_permuted_input(),
_permuted_weights(),
_permuted_output(),
// Perform validation step
ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
ARM_COMPUTE_ERROR_THROW_ON(CLDepthwiseConvolutionLayerInternal3x3::validate(input->info(),
- weights->info(),
- biases != nullptr ? biases->info() : nullptr,
- output->info(),
- conv_info,
- depth_multiplier,
- act_info,
- gpu_target,
- dilation));
+ weights->info(),
+ biases != nullptr ? biases->info() : nullptr,
+ output->info(),
+ conv_info,
+ depth_multiplier,
+ act_info,
+ gpu_target,
+ dilation));
const bool is_nhwc = input->info()->data_layout() == DataLayout::NHWC;
_is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type());
{
if(_needs_weights_reshape)
{
- _reshape_weights.configure(compile_context, weights, &_permuted_weights, info);
+ _reshape_weights->configure(compile_context, weights, &_permuted_weights, info);
weights_to_use = &_permuted_weights;
}
_kernel = arm_compute::support::cpp14::make_unique<CLDepthwiseConvolutionLayer3x3NHWCKernel>();
{
zero_value = PixelValue(static_cast<uint8_t>(input->info()->quantization_info().uniform().offset));
}
- _border_handler.configure(compile_context, input_to_use, _kernel->border_size(), BorderMode::CONSTANT, zero_value);
+ _border_handler->configure(compile_context, input_to_use, _kernel->border_size(), BorderMode::CONSTANT, zero_value);
}
Status CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayerInternal3x3::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output,
{
_permute_input_to_nchw.run();
}
- CLScheduler::get().enqueue(_border_handler);
+ CLScheduler::get().enqueue(*_border_handler);
CLScheduler::get().enqueue(*_kernel);
if(_needs_permute)
ARM_COMPUTE_ERROR_ON(_needs_permute);
ARM_COMPUTE_ERROR_ON(!_original_weights->is_used());
_permuted_weights.allocator()->allocate();
- CLScheduler::get().enqueue(_reshape_weights);
+ CLScheduler::get().enqueue(*_reshape_weights);
_original_weights->mark_as_unused();
}
_is_prepared = true;
void CLDepthwiseConvolutionLayer::configure(const CLCompileContext &compile_context, ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output,
const PadStrideInfo &conv_info,
- unsigned int depth_multiplier,
+ unsigned int depth_multiplier,
ActivationLayerInfo act_info, const Size2D &dilation)
{
const GPUTarget gpu_target = CLScheduler::get().target();
*/
#include "arm_compute/runtime/CL/functions/CLDequantizationLayer.h"
-#include "arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h"
+#include "src/core/CL/kernels/CLDequantizationLayerKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
*/
#include "arm_compute/runtime/CL/functions/CLDerivative.h"
-#include "arm_compute/core/CL/kernels/CLDerivativeKernel.h"
#include "arm_compute/core/PixelValue.h"
+#include "src/core/CL/kernels/CLDerivativeKernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
#include "support/MemorySupport.h"
#include <utility>
auto k = arm_compute::support::cpp14::make_unique<CLDerivativeKernel>();
k->configure(compile_context, input, output_x, output_y, border_mode == BorderMode::UNDEFINED);
_kernel = std::move(k);
- _border_handler.configure(compile_context, input, BorderSize(1), border_mode, PixelValue(constant_border_value));
+ _border_handler->configure(compile_context, input, BorderSize(1), border_mode, PixelValue(constant_border_value));
}
*/
#include "arm_compute/runtime/CL/functions/CLDilate.h"
-#include "arm_compute/core/CL/kernels/CLDilateKernel.h"
#include "arm_compute/core/PixelValue.h"
+#include "src/core/CL/kernels/CLDilateKernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
#include "support/MemorySupport.h"
#include <utility>
auto k = arm_compute::support::cpp14::make_unique<CLDilateKernel>();
k->configure(compile_context, input, output, border_mode == BorderMode::UNDEFINED);
_kernel = std::move(k);
- _border_handler.configure(compile_context, input, BorderSize(1), border_mode, PixelValue(constant_border_value));
+ _border_handler->configure(compile_context, input, BorderSize(1), border_mode, PixelValue(constant_border_value));
}
#include "arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h"
#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLDirectConvolutionLayerKernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "support/MemorySupport.h"
using namespace arm_compute;
CLDirectConvolutionLayer::CLDirectConvolutionLayer()
- : _direct_conv_kernel(), _input_border_handler(), _activationlayer_function(), _is_activationlayer_enabled(false)
+ : _direct_conv_kernel(support::cpp14::make_unique<CLDirectConvolutionLayerKernel>()), _input_border_handler(support::cpp14::make_unique<CLFillBorderKernel>()), _activationlayer_function(),
+ _is_activationlayer_enabled(false)
{
}
+CLDirectConvolutionLayer::~CLDirectConvolutionLayer() = default;
+
void CLDirectConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info)
{
configure(CLKernelLibrary::get().get_compile_context(), input, weights, biases, output, conv_info, act_info);
const ActivationLayerInfo &act_info)
{
// Set GPU target
- _direct_conv_kernel.set_target(CLScheduler::get().target());
+ _direct_conv_kernel->set_target(CLScheduler::get().target());
// Configure direct convolution
- _direct_conv_kernel.configure(compile_context, input, weights, biases, output, conv_info);
+ _direct_conv_kernel->configure(compile_context, input, weights, biases, output, conv_info);
// Configure border handler
PixelValue &&zero_value(0.f);
{
zero_value = PixelValue(0, input->info()->data_type(), input->info()->quantization_info());
}
- _input_border_handler.configure(compile_context, input, _direct_conv_kernel.border_size(), BorderMode::CONSTANT, zero_value);
+ _input_border_handler->configure(compile_context, input, _direct_conv_kernel->border_size(), BorderMode::CONSTANT, zero_value);
// Tune kernels
- CLScheduler::get().tune_kernel_static(_direct_conv_kernel);
+ CLScheduler::get().tune_kernel_static(*_direct_conv_kernel);
_is_activationlayer_enabled = act_info.enabled();
void CLDirectConvolutionLayer::run()
{
// Run border handler
- CLScheduler::get().enqueue(_input_border_handler, false);
+ CLScheduler::get().enqueue(*_input_border_handler, false);
// Run direct convolution
- CLScheduler::get().enqueue(_direct_conv_kernel);
+ CLScheduler::get().enqueue(*_direct_conv_kernel);
//Run Activation Layer
if(_is_activationlayer_enabled)
*/
#include "arm_compute/runtime/CL/functions/CLDirectDeconvolutionLayer.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
+#include "src/core/CL/kernels/CLMemsetKernel.h"
+#include "src/core/CL/kernels/CLWeightsReshapeKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
#include <memory>
*/
#include "arm_compute/runtime/CL/functions/CLElementWiseUnaryLayer.h"
-#include "arm_compute/core/CL/kernels/CLElementWiseUnaryLayerKernel.h"
+#include "src/core/CL/kernels/CLElementWiseUnaryLayerKernel.h"
#include "support/MemorySupport.h"
#include <utility>
#include "arm_compute/runtime/CL/functions/CLElementwiseOperations.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLElementwiseOperationKernel.h"
#include "support/MemorySupport.h"
#include <utility>
#include "arm_compute/core/CL/OpenCL.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLHistogramKernel.h"
+#include "src/core/CL/kernels/CLTableLookupKernel.h"
+#include "support/MemorySupport.h"
#include <algorithm>
#include <cmath>
} // namespace
CLEqualizeHistogram::CLEqualizeHistogram()
- : _histogram_kernel(), _border_histogram_kernel(), _map_histogram_kernel(), _hist(nr_bins, 0, max_range), _cum_dist(nr_bins, 0, max_range), _cd_lut(nr_bins, DataType::U8)
+ : _histogram_kernel(support::cpp14::make_unique<CLHistogramKernel>()),
+ _border_histogram_kernel(support::cpp14::make_unique<CLHistogramBorderKernel>()),
+ _map_histogram_kernel(support::cpp14::make_unique<CLTableLookupKernel>()),
+ _hist(nr_bins, 0, max_range),
+ _cum_dist(nr_bins, 0, max_range),
+ _cd_lut(nr_bins, DataType::U8)
{
}
+CLEqualizeHistogram::~CLEqualizeHistogram() = default;
+
void CLEqualizeHistogram::configure(const ICLImage *input, ICLImage *output)
{
configure(CLKernelLibrary::get().get_compile_context(), input, output);
void CLEqualizeHistogram::configure(const CLCompileContext &compile_context, const ICLImage *input, ICLImage *output)
{
- _histogram_kernel.configure(compile_context, input, &_hist);
- _border_histogram_kernel.configure(compile_context, input, &_hist);
- _map_histogram_kernel.configure(compile_context, input, &_cd_lut, output);
+ _histogram_kernel->configure(compile_context, input, &_hist);
+ _border_histogram_kernel->configure(compile_context, input, &_hist);
+ _map_histogram_kernel->configure(compile_context, input, &_cd_lut, output);
}
void CLEqualizeHistogram::run()
{
// Calculate histogram of input.
- CLScheduler::get().enqueue(_histogram_kernel, false);
+ CLScheduler::get().enqueue(*_histogram_kernel, false);
// Calculate remaining pixels when image is not multiple of the elements of histogram kernel
- CLScheduler::get().enqueue(_border_histogram_kernel, false);
+ CLScheduler::get().enqueue(*_border_histogram_kernel, false);
// Calculate cumulative distribution of histogram and create LUT.
calculate_cum_dist_and_lut(_hist, _cum_dist, _cd_lut);
// Map input to output using created LUT.
- CLScheduler::get().enqueue(_map_histogram_kernel);
+ CLScheduler::get().enqueue(*_map_histogram_kernel);
}
*/
#include "arm_compute/runtime/CL/functions/CLErode.h"
-#include "arm_compute/core/CL/kernels/CLErodeKernel.h"
#include "arm_compute/core/PixelValue.h"
+#include "src/core/CL/kernels/CLErodeKernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
#include "support/MemorySupport.h"
#include <utility>
auto k = arm_compute::support::cpp14::make_unique<CLErodeKernel>();
k->configure(compile_context, input, output, border_mode == BorderMode::UNDEFINED);
_kernel = std::move(k);
- _border_handler.configure(compile_context, input, BorderSize(1), border_mode, PixelValue(constant_border_value));
+ _border_handler->configure(compile_context, input, BorderSize(1), border_mode, PixelValue(constant_border_value));
}
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLFFTDigitReverseKernel.h"
+#include "src/core/CL/kernels/CLFFTRadixStageKernel.h"
+#include "src/core/CL/kernels/CLFFTScaleKernel.h"
#include "src/core/utils/helpers/fft.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
CLFFT1D::CLFFT1D(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _digit_reverse_kernel(), _fft_kernels(), _scale_kernel(), _digit_reversed_input(), _digit_reverse_indices(), _num_ffts(0), _run_scale(false)
+ : _memory_group(std::move(memory_manager)),
+ _digit_reverse_kernel(support::cpp14::make_unique<CLFFTDigitReverseKernel>()),
+ _fft_kernels(),
+ _scale_kernel(support::cpp14::make_unique<CLFFTScaleKernel>()),
+ _digit_reversed_input(),
+ _digit_reverse_indices(),
+ _num_ffts(0),
+ _run_scale(false)
{
}
+CLFFT1D::~CLFFT1D() = default;
+
void CLFFT1D::configure(const ICLTensor *input, ICLTensor *output, const FFT1DInfo &config)
{
configure(CLKernelLibrary::get().get_compile_context(), input, output, config);
TensorInfo digit_reverse_indices_info(TensorShape(input->info()->tensor_shape()[config.axis]), 1, DataType::U32);
_digit_reverse_indices.allocator()->init(digit_reverse_indices_info);
_memory_group.manage(&_digit_reversed_input);
- _digit_reverse_kernel.configure(compile_context, input, &_digit_reversed_input, &_digit_reverse_indices, digit_reverse_config);
+ _digit_reverse_kernel->configure(compile_context, input, &_digit_reversed_input, &_digit_reverse_indices, digit_reverse_config);
// Create and configure FFT kernels
unsigned int Nx = 1;
_num_ffts = decomposed_vector.size();
- _fft_kernels.resize(_num_ffts);
+ _fft_kernels.reserve(_num_ffts);
for(unsigned int i = 0; i < _num_ffts; ++i)
{
const unsigned int radix_for_stage = decomposed_vector.at(i);
fft_kernel_info.radix = radix_for_stage;
fft_kernel_info.Nx = Nx;
fft_kernel_info.is_first_stage = (i == 0);
- _fft_kernels[i].configure(compile_context, &_digit_reversed_input, ((i == (_num_ffts - 1)) && !is_c2r) ? output : nullptr, fft_kernel_info);
+ _fft_kernels.emplace_back(support::cpp14::make_unique<CLFFTRadixStageKernel>());
+ _fft_kernels.back()->configure(compile_context, &_digit_reversed_input, ((i == (_num_ffts - 1)) && !is_c2r) ? output : nullptr, fft_kernel_info);
Nx *= radix_for_stage;
}
FFTScaleKernelInfo scale_config;
scale_config.scale = static_cast<float>(N);
scale_config.conjugate = config.direction == FFTDirection::Inverse;
- is_c2r ? _scale_kernel.configure(compile_context, &_digit_reversed_input, output, scale_config) : _scale_kernel.configure(output, nullptr, scale_config);
+ is_c2r ? _scale_kernel->configure(compile_context, &_digit_reversed_input, output, scale_config) : _scale_kernel->configure(output, nullptr, scale_config);
}
// Allocate tensors
MemoryGroupResourceScope scope_mg(_memory_group);
// Run digit reverse
- CLScheduler::get().enqueue(_digit_reverse_kernel, false);
+ CLScheduler::get().enqueue(*_digit_reverse_kernel, false);
// Run radix kernels
for(unsigned int i = 0; i < _num_ffts; ++i)
{
- CLScheduler::get().enqueue(_fft_kernels[i], i == (_num_ffts - 1) && !_run_scale);
+ CLScheduler::get().enqueue(*_fft_kernels[i], i == (_num_ffts - 1) && !_run_scale);
}
// Run output scaling
if(_run_scale)
{
- CLScheduler::get().enqueue(_scale_kernel, true);
+ CLScheduler::get().enqueue(*_scale_kernel, true);
}
}
} // namespace arm_compute
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLFFTDigitReverseKernel.h"
+#include "src/core/CL/kernels/CLFFTRadixStageKernel.h"
+#include "src/core/CL/kernels/CLFFTScaleKernel.h"
namespace arm_compute
{
{
}
+CLFFT2D::~CLFFT2D() = default;
+
void CLFFT2D::configure(const ICLTensor *input, ICLTensor *output, const FFT2DInfo &config)
{
configure(CLKernelLibrary::get().get_compile_context(), input, output, config);
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "arm_compute/runtime/CPP/CPPScheduler.h"
+#include "src/core/CL/kernels/CLCopyKernel.h"
+#include "src/core/CL/kernels/CLFFTDigitReverseKernel.h"
+#include "src/core/CL/kernels/CLFFTRadixStageKernel.h"
+#include "src/core/CL/kernels/CLFFTScaleKernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLPadLayerKernel.h"
+#include "src/core/CL/kernels/CLReductionOperationKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/utils/helpers/fft.h"
#include "arm_compute/runtime/CL/functions/CLFastCorners.h"
#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/CL/kernels/CLFastCornersKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "arm_compute/runtime/ITensorAllocator.h"
+#include "src/core/CL/kernels/CLFastCornersKernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "support/MemorySupport.h"
#include <algorithm>
#include <cstring>
CLFastCorners::CLFastCorners(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(std::move(memory_manager)),
- _fast_corners_kernel(),
+ _fast_corners_kernel(support::cpp14::make_unique<CLFastCornersKernel>()),
_suppr_func(),
- _copy_array_kernel(),
+ _copy_array_kernel(support::cpp14::make_unique<CLCopyToArrayKernel>()),
_output(),
_suppr(),
_win(),
{
}
+CLFastCorners::~CLFastCorners() = default;
+
void CLFastCorners::configure(const ICLImage *input, float threshold, bool nonmax_suppression, ICLKeyPointArray *corners,
unsigned int *num_corners, BorderMode border_mode, uint8_t constant_border_value)
{
const bool update_number = (nullptr != _num_corners);
_memory_group.manage(&_output);
- _fast_corners_kernel.configure(compile_context, input, &_output, threshold, nonmax_suppression, border_mode);
+ _fast_corners_kernel->configure(compile_context, input, &_output, threshold, nonmax_suppression, border_mode);
if(!_non_max)
{
- _copy_array_kernel.configure(compile_context, &_output, update_number, _corners, &_num_buffer);
+ _copy_array_kernel->configure(compile_context, &_output, update_number, _corners, &_num_buffer);
}
else
{
_memory_group.manage(&_suppr);
_suppr_func.configure(compile_context, &_output, &_suppr, border_mode);
- _copy_array_kernel.configure(compile_context, &_suppr, update_number, _corners, &_num_buffer);
+ _copy_array_kernel->configure(compile_context, &_suppr, update_number, _corners, &_num_buffer);
_suppr.allocator()->allocate();
}
q.enqueueUnmapMemObject(_output.cl_buffer(), out_buffer);
}
- CLScheduler::get().enqueue(_fast_corners_kernel, false);
+ CLScheduler::get().enqueue(*_fast_corners_kernel, false);
if(_non_max)
{
_suppr_func.run();
}
- CLScheduler::get().enqueue(_copy_array_kernel, false);
+ CLScheduler::get().enqueue(*_copy_array_kernel, false);
unsigned int get_num_corners = 0;
q.enqueueReadBuffer(_num_buffer, CL_TRUE, 0, sizeof(unsigned int), &get_num_corners);
*/
#include "arm_compute/runtime/CL/functions/CLFill.h"
-#include "arm_compute/core/CL/kernels/CLMemsetKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/CL/kernels/CLMemsetKernel.h"
#include "support/MemorySupport.h"
*/
#include "arm_compute/runtime/CL/functions/CLFillBorder.h"
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
#include "support/MemorySupport.h"
#include <utility>
*/
#include "arm_compute/runtime/CL/functions/CLFlattenLayer.h"
-#include "arm_compute/core/CL/kernels/CLFlattenLayerKernel.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLFlattenLayerKernel.h"
#include "support/MemorySupport.h"
using namespace arm_compute;
*/
#include "arm_compute/runtime/CL/functions/CLFloor.h"
-#include "arm_compute/core/CL/kernels/CLFloorKernel.h"
+#include "src/core/CL/kernels/CLFloorKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpReductionKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
+#include "src/core/CL/kernels/CLTransposeKernel.h"
#include "support/Cast.h"
#include "support/MemorySupport.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLFuseBatchNormalizationKernel.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
CLFuseBatchNormalization::CLFuseBatchNormalization()
- : _fuse_bn_kernel()
+ : _fuse_bn_kernel(support::cpp14::make_unique<CLFuseBatchNormalizationKernel>())
{
}
+CLFuseBatchNormalization::~CLFuseBatchNormalization() = default;
+
void CLFuseBatchNormalization::configure(const ICLTensor *input_weights, const ICLTensor *bn_mean, const ICLTensor *bn_var,
ICLTensor *fused_weights, ICLTensor *fused_bias,
const ICLTensor *input_bias, const ICLTensor *bn_beta, const ICLTensor *bn_gamma,
const ICLTensor *input_bias, const ICLTensor *bn_beta, const ICLTensor *bn_gamma,
float epsilon, FuseBatchNormalizationType fbn_type)
{
- _fuse_bn_kernel.configure(compile_context, input_weights, bn_mean, bn_var, fused_weights, fused_bias, input_bias, bn_beta, bn_gamma, epsilon, fbn_type);
+ _fuse_bn_kernel->configure(compile_context, input_weights, bn_mean, bn_var, fused_weights, fused_bias, input_bias, bn_beta, bn_gamma, epsilon, fbn_type);
}
Status CLFuseBatchNormalization::validate(const ITensorInfo *input_weights, const ITensorInfo *bn_mean, const ITensorInfo *bn_var,
void CLFuseBatchNormalization::run()
{
- CLScheduler::get().enqueue(_fuse_bn_kernel, true);
+ CLScheduler::get().enqueue(*_fuse_bn_kernel, true);
}
} // namespace arm_compute
*/
#include "arm_compute/runtime/CL/functions/CLGEMM.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/GPUTarget.h"
#include "src/core/CL/ICLGEMMKernelConfiguration.h"
#include "src/core/CL/gemm/reshaped/CLGEMMReshapedKernelConfiguration.h"
#include "src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfiguration.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/utils/helpers/float_ops.h"
#include "src/runtime/CL/gemm/CLGEMMKernelSelection.h"
using namespace arm_compute::cl_gemm;
using namespace arm_compute::utils::cast;
+namespace weights_transformations
+{
+CLGEMMReshapeRHSMatrixKernelManaged::CLGEMMReshapeRHSMatrixKernelManaged()
+ : _kernel(support::cpp14::make_unique<CLGEMMReshapeRHSMatrixKernel>())
+{
+}
+
+CLGEMMReshapeRHSMatrixKernelManaged::~CLGEMMReshapeRHSMatrixKernelManaged() = default;
+
+void CLGEMMReshapeRHSMatrixKernelManaged::run()
+{
+ _output.allocator()->allocate();
+ CLScheduler::get().enqueue(*_kernel, false);
+ _reshape_run = true;
+}
+
+void CLGEMMReshapeRHSMatrixKernelManaged::release()
+{
+ _output.allocator()->free();
+}
+
+ICLTensor *CLGEMMReshapeRHSMatrixKernelManaged::get_weights()
+{
+ return &_output;
+}
+
+uint32_t CLGEMMReshapeRHSMatrixKernelManaged::uid()
+{
+ return _uid;
+}
+
+void CLGEMMReshapeRHSMatrixKernelManaged::configure(const ICLTensor *input, GEMMRHSMatrixInfo info)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, info);
+}
+
+void CLGEMMReshapeRHSMatrixKernelManaged::configure(const CLCompileContext &compile_context, const ICLTensor *input, GEMMRHSMatrixInfo info)
+{
+ _kernel->configure(compile_context, input, &_output, info);
+}
+} // namespace weights_transformations
+
CLGEMM::CLGEMM(std::shared_ptr<IMemoryManager> memory_manager, IWeightsManager *weights_manager)
: _memory_group(std::move(memory_manager)),
_weights_manager(weights_manager),
- _mm_kernel(),
- _reshape_lhs_kernel(),
- _reshape_rhs_kernel(),
- _reshape_rhs_kernel_managed(),
- _mm_reshaped_kernel(),
- _mm_reshaped_only_rhs_kernel(),
- _mm_reshaped_only_rhs_fallback_kernel(),
+ _mm_kernel(support::cpp14::make_unique<CLGEMMMatrixMultiplyKernel>()),
+ _reshape_lhs_kernel(support::cpp14::make_unique<CLGEMMReshapeLHSMatrixKernel>()),
+ _reshape_rhs_kernel(support::cpp14::make_unique<CLGEMMReshapeRHSMatrixKernel>()),
+ _reshape_rhs_kernel_managed(support::cpp14::make_unique<weights_transformations::CLGEMMReshapeRHSMatrixKernelManaged>()),
+ _mm_reshaped_kernel(support::cpp14::make_unique<CLGEMMMatrixMultiplyReshapedKernel>()),
+ _mm_reshaped_only_rhs_kernel(support::cpp14::make_unique<CLGEMMMatrixMultiplyReshapedOnlyRHSKernel>()),
+ _mm_reshaped_only_rhs_fallback_kernel(support::cpp14::make_unique<CLGEMMMatrixMultiplyReshapedOnlyRHSKernel>()),
_tmp_a(),
_tmp_b(),
_original_b(nullptr),
{
}
+CLGEMM::~CLGEMM() = default;
+
CLGEMMKernelType CLGEMM::select_gemm_kernel(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type, bool reshape_b_only_on_first_run)
{
std::unique_ptr<ICLGEMMKernelSelection> gemm_kernel = CLGEMMKernelSelectionFactory::create(CLScheduler::get().target());
const GPUTarget gpu_target = CLScheduler::get().target();
// Set the target for the kernels
- _mm_kernel.set_target(gpu_target);
+ _mm_kernel->set_target(gpu_target);
GEMMReshapeInfo reshape_info(m, n, k, 1, 1, gemm_info.depth_output_gemm3d(), gemm_info.reinterpret_input_as_3d(), gemm_info.broadcast_bias());
// Configure and tune matrix multiply kernel
- _mm_kernel.configure(compile_context, a, b, c, output, alpha, beta, false, reshape_info, gemm_info.fp_mixed_precision(), gemm_info.activation_info());
+ _mm_kernel->configure(compile_context, a, b, c, output, alpha, beta, false, reshape_info, gemm_info.fp_mixed_precision(), gemm_info.activation_info());
// Tune kernel statically
- CLScheduler::get().tune_kernel_static(_mm_kernel);
+ CLScheduler::get().tune_kernel_static(*_mm_kernel);
}
void CLGEMM::configure_reshaped_v1(const CLCompileContext &compile_context, const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta,
int mult_interleave4x4_height = 1;
// Set the target for the kernels
- _reshape_lhs_kernel.set_target(gpu_target);
- _mm_kernel.set_target(gpu_target);
+ _reshape_lhs_kernel->set_target(gpu_target);
+ _mm_kernel->set_target(gpu_target);
if(get_arch_from_target(gpu_target) == GPUTarget::BIFROST)
{
}
// Configure interleave kernel
- _reshape_lhs_kernel.configure(compile_context, a, &_tmp_a, lhs_info, reinterpret_input_as_3d);
+ _reshape_lhs_kernel->configure(compile_context, a, &_tmp_a, lhs_info, reinterpret_input_as_3d);
// Configure transpose kernel
ICLTensor *reshaped_rhs = &_tmp_b;
if(_weights_manager && _weights_manager->are_weights_managed(b))
{
- _reshape_rhs_kernel_managed.configure(compile_context, b, rhs_info);
- reshaped_rhs = utils::cast::polymorphic_downcast<ICLTensor *>(_weights_manager->acquire(b, &_reshape_rhs_kernel_managed));
+ _reshape_rhs_kernel_managed->configure(compile_context, b, rhs_info);
+ reshaped_rhs = utils::cast::polymorphic_downcast<ICLTensor *>(_weights_manager->acquire(b, _reshape_rhs_kernel_managed.get()));
}
else
{
- _reshape_rhs_kernel.configure(compile_context, b, &_tmp_b, rhs_info);
+ _reshape_rhs_kernel->configure(compile_context, b, &_tmp_b, rhs_info);
}
// Configure and tune matrix multiply kernel
- _mm_kernel.configure(compile_context, &_tmp_a, reshaped_rhs, c, output, alpha, beta, true, reshape_info, gemm_info.fp_mixed_precision(), gemm_info.activation_info());
+ _mm_kernel->configure(compile_context, &_tmp_a, reshaped_rhs, c, output, alpha, beta, true, reshape_info, gemm_info.fp_mixed_precision(), gemm_info.activation_info());
- CLScheduler::get().tune_kernel_static(_mm_kernel);
+ CLScheduler::get().tune_kernel_static(*_mm_kernel);
// Allocate intermediate tensors
_tmp_a.allocator()->allocate();
kernel_info.activation_info = gemm_info.activation_info();
// Set the target for the kernels
- _reshape_lhs_kernel.set_target(gpu_target);
- _mm_kernel.set_target(gpu_target);
+ _reshape_lhs_kernel->set_target(gpu_target);
+ _mm_kernel->set_target(gpu_target);
const bool use_mm_b = (!_weights_manager || !_weights_manager->are_weights_managed(b));
// Configure lhs_info and rhs_info
std::tie(lhs_info, rhs_info) = gemm_config->configure(m, n, k, batch_size, data_type);
- _reshape_lhs_kernel.configure(compile_context, a, &_tmp_a, lhs_info, gemm_info.reinterpret_input_as_3d());
+ _reshape_lhs_kernel->configure(compile_context, a, &_tmp_a, lhs_info, gemm_info.reinterpret_input_as_3d());
ICLTensor *reshaped_rhs = &_tmp_b;
if(_weights_manager && _weights_manager->are_weights_managed(b))
{
- _reshape_rhs_kernel_managed.configure(compile_context, b, rhs_info);
- reshaped_rhs = utils::cast::polymorphic_downcast<ICLTensor *>(_weights_manager->acquire(b, &_reshape_rhs_kernel_managed));
+ _reshape_rhs_kernel_managed->configure(compile_context, b, rhs_info);
+ reshaped_rhs = utils::cast::polymorphic_downcast<ICLTensor *>(_weights_manager->acquire(b, _reshape_rhs_kernel_managed.get()));
}
else
{
- _reshape_rhs_kernel.configure(compile_context, b, &_tmp_b, rhs_info);
+ _reshape_rhs_kernel->configure(compile_context, b, &_tmp_b, rhs_info);
}
// Configure and tune matrix multiply kernel
- _mm_reshaped_kernel.configure(compile_context, &_tmp_a, reshaped_rhs, c, output, alpha, beta, lhs_info, rhs_info, kernel_info);
+ _mm_reshaped_kernel->configure(compile_context, &_tmp_a, reshaped_rhs, c, output, alpha, beta, lhs_info, rhs_info, kernel_info);
// Allocate intermediate tensors
_tmp_a.allocator()->allocate();
kernel_info.activation_info = gemm_info.activation_info();
// Set the target for the kernels
- _mm_kernel.set_target(gpu_target);
+ _mm_kernel->set_target(gpu_target);
const bool use_mm_b = (!_weights_manager || !_weights_manager->are_weights_managed(b));
ICLTensor *reshaped_rhs = &_tmp_b;
if(_weights_manager && _weights_manager->are_weights_managed(b))
{
- _reshape_rhs_kernel_managed.configure(compile_context, b, rhs_info);
- reshaped_rhs = utils::cast::polymorphic_downcast<ICLTensor *>(_weights_manager->acquire(b, &_reshape_rhs_kernel_managed));
+ _reshape_rhs_kernel_managed->configure(compile_context, b, rhs_info);
+ reshaped_rhs = utils::cast::polymorphic_downcast<ICLTensor *>(_weights_manager->acquire(b, _reshape_rhs_kernel_managed.get()));
}
else
{
- _reshape_rhs_kernel.configure(compile_context, b, &_tmp_b, rhs_info);
+ _reshape_rhs_kernel->configure(compile_context, b, &_tmp_b, rhs_info);
}
// Configure two variants of CLGEMMMatrixMultiplyReshapedOnlyRHSKernel (has_pad_y = false/true)
// Configure matrix multiply kernel with no y padding support
kernel_info.has_pad_y = false;
- _mm_reshaped_only_rhs_kernel.configure(compile_context, a, reshaped_rhs, c, output, alpha, beta, lhs_info, rhs_info, kernel_info);
+ _mm_reshaped_only_rhs_kernel->configure(compile_context, a, reshaped_rhs, c, output, alpha, beta, lhs_info, rhs_info, kernel_info);
// Configure matrix multiply kernel with y padding support
kernel_info.has_pad_y = true;
- _mm_reshaped_only_rhs_fallback_kernel.configure(compile_context, a, reshaped_rhs, c, output, alpha, beta, lhs_info, rhs_info, kernel_info);
+ _mm_reshaped_only_rhs_fallback_kernel->configure(compile_context, a, reshaped_rhs, c, output, alpha, beta, lhs_info, rhs_info, kernel_info);
if(!_reshape_b_only_on_first_run && use_mm_b)
{
{
case CLGEMMKernelType::NATIVE_V1:
{
- CLScheduler::get().enqueue(_mm_kernel, true);
+ CLScheduler::get().enqueue(*_mm_kernel, true);
break;
}
case CLGEMMKernelType::RESHAPED_V1:
{
// Run interleave kernel
- CLScheduler::get().enqueue(_reshape_lhs_kernel, false);
+ CLScheduler::get().enqueue(*_reshape_lhs_kernel, false);
if(!_reshape_b_only_on_first_run)
{
// Run transpose kernel
if(_weights_manager && _weights_manager->are_weights_managed(_original_b))
{
- _weights_manager->run(_original_b, &_reshape_rhs_kernel_managed);
+ _weights_manager->run(_original_b, _reshape_rhs_kernel_managed.get());
}
else
{
- CLScheduler::get().enqueue(_reshape_rhs_kernel, false);
+ CLScheduler::get().enqueue(*_reshape_rhs_kernel, false);
}
}
- CLScheduler::get().enqueue(_mm_kernel, true);
+ CLScheduler::get().enqueue(*_mm_kernel, true);
break;
}
case CLGEMMKernelType::RESHAPED:
{
// Run interleave kernel
- CLScheduler::get().enqueue(_reshape_lhs_kernel, false);
+ CLScheduler::get().enqueue(*_reshape_lhs_kernel, false);
if(!_reshape_b_only_on_first_run)
{
// Run transpose kernel
if(_weights_manager && _weights_manager->are_weights_managed(_original_b))
{
- _weights_manager->run(_original_b, &_reshape_rhs_kernel_managed);
+ _weights_manager->run(_original_b, _reshape_rhs_kernel_managed.get());
}
else
{
- CLScheduler::get().enqueue(_reshape_rhs_kernel, false);
+ CLScheduler::get().enqueue(*_reshape_rhs_kernel, false);
}
}
- CLScheduler::get().enqueue(_mm_reshaped_kernel, true);
+ CLScheduler::get().enqueue(*_mm_reshaped_kernel, true);
break;
}
case CLGEMMKernelType::RESHAPED_ONLY_RHS:
// Run transpose kernel
if(_weights_manager && _weights_manager->are_weights_managed(_original_b))
{
- _weights_manager->run(_original_b, &_reshape_rhs_kernel_managed);
+ _weights_manager->run(_original_b, _reshape_rhs_kernel_managed.get());
}
else
{
- CLScheduler::get().enqueue(_reshape_rhs_kernel, false);
+ CLScheduler::get().enqueue(*_reshape_rhs_kernel, false);
}
}
if(_has_pad_y)
{
- CLScheduler::get().enqueue(_mm_reshaped_only_rhs_fallback_kernel, true);
+ CLScheduler::get().enqueue(*_mm_reshaped_only_rhs_fallback_kernel, true);
}
else
{
- CLScheduler::get().enqueue(_mm_reshaped_only_rhs_kernel, true);
+ CLScheduler::get().enqueue(*_mm_reshaped_only_rhs_kernel, true);
}
break;
}
{
if(_weights_manager && _weights_manager->are_weights_managed(_original_b))
{
- _weights_manager->run(_original_b, &_reshape_rhs_kernel_managed);
+ _weights_manager->run(_original_b, _reshape_rhs_kernel_managed.get());
}
else
{
// Run transpose kernel and mark original weights tensor as unused
_tmp_b.allocator()->allocate();
- CLScheduler::get().enqueue(_reshape_rhs_kernel, false);
+ CLScheduler::get().enqueue(*_reshape_rhs_kernel, false);
_original_b->mark_as_unused();
}
}
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLCol2ImKernel.h"
+#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpReductionKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
+#include "src/core/CL/kernels/CLIm2ColKernel.h"
+#include "src/core/CL/kernels/CLWeightsReshapeKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "support/Cast.h"
+#include "support/MemorySupport.h"
#include <cmath>
#include <memory>
using namespace arm_compute::utils::cast;
CLConvolutionLayerReshapeWeights::CLConvolutionLayerReshapeWeights()
- : _weights_reshape_kernel()
+ : _weights_reshape_kernel(support::cpp14::make_unique<CLWeightsReshapeKernel>())
{
}
+CLConvolutionLayerReshapeWeights::~CLConvolutionLayerReshapeWeights() = default;
+
void CLConvolutionLayerReshapeWeights::configure(const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, unsigned int num_groups)
{
configure(CLKernelLibrary::get().get_compile_context(), weights, biases, output, num_groups);
const bool append_biases = (biases != nullptr) && !is_data_type_quantized_asymmetric(weights->info()->data_type());
const ICLTensor *biases_to_use = (append_biases) ? biases : nullptr;
- _weights_reshape_kernel.configure(compile_context, weights, biases_to_use, output, num_groups);
+ _weights_reshape_kernel->configure(compile_context, weights, biases_to_use, output, num_groups);
output->info()->set_quantization_info(weights->info()->quantization_info());
}
void CLConvolutionLayerReshapeWeights::run()
{
- CLScheduler::get().enqueue(_weights_reshape_kernel);
+ CLScheduler::get().enqueue(*_weights_reshape_kernel);
}
CLGEMMConvolutionLayer::CLGEMMConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager, IWeightsManager *weights_manager)
- : _memory_group(memory_manager), _weights_manager(weights_manager), _reshape_weights(), _reshape_weights_managed(), _im2col_kernel(), _mm_gemm(memory_manager, weights_manager),
- _mm_gemmlowp(memory_manager), _col2im_kernel(), _activationlayer_function(), _original_weights(nullptr), _im2col_output(), _weights_reshaped(), _gemm_output(), _skip_im2col(false),
- _skip_col2im(false), _is_quantized(false), _fuse_activation(true), _is_prepared(false)
+ : _memory_group(memory_manager), _weights_manager(weights_manager), _reshape_weights(), _reshape_weights_managed(), _im2col_kernel(support::cpp14::make_unique<CLIm2ColKernel>()),
+ _mm_gemm(memory_manager, weights_manager), _mm_gemmlowp(memory_manager), _col2im_kernel(support::cpp14::make_unique<CLCol2ImKernel>()), _activationlayer_function(), _original_weights(nullptr),
+ _im2col_output(), _weights_reshaped(), _gemm_output(), _skip_im2col(false), _skip_col2im(false), _is_quantized(false), _fuse_activation(true), _is_prepared(false)
{
}
+CLGEMMConvolutionLayer::~CLGEMMConvolutionLayer() = default;
+
void CLGEMMConvolutionLayer::configure_mm(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output,
const GEMMLowpOutputStageInfo &gemmlowp_output_stage,
int gemm_3d_depth, const ActivationLayerInfo &act_info)
_fuse_activation = true;
// Set the GPU target for im2col and col2im
- _im2col_kernel.set_target(CLScheduler::get().target());
- _col2im_kernel.set_target(CLScheduler::get().target());
+ _im2col_kernel->set_target(CLScheduler::get().target());
+ _col2im_kernel->set_target(CLScheduler::get().target());
const ICLTensor *gemm_input_to_use = input;
ICLTensor *gemm_output_to_use = output;
_memory_group.manage(&_im2col_output);
// Configure and tune im2col. im2col output shape is auto-initialized
- _im2col_kernel.configure(compile_context, input, &_im2col_output, Size2D(kernel_width, kernel_height), conv_info, append_bias, dilation, num_groups);
+ _im2col_kernel->configure(compile_context, input, &_im2col_output, Size2D(kernel_width, kernel_height), conv_info, append_bias, dilation, num_groups);
// Set quantization info
_im2col_output.info()->set_quantization_info(input->info()->quantization_info());
- CLScheduler::get().tune_kernel_static(_im2col_kernel);
+ CLScheduler::get().tune_kernel_static(*_im2col_kernel);
// Update GEMM input
gemm_input_to_use = &_im2col_output;
if(!_skip_col2im)
{
// Configure and tune Col2Im
- _col2im_kernel.configure(compile_context, gemm_output_to_use, output, Size2D(conv_w, conv_h), num_groups);
- CLScheduler::get().tune_kernel_static(_col2im_kernel);
+ _col2im_kernel->configure(compile_context, gemm_output_to_use, output, Size2D(conv_w, conv_h), num_groups);
+ CLScheduler::get().tune_kernel_static(*_col2im_kernel.get());
}
if(!_skip_col2im)
// Run im2col
if(!_skip_im2col)
{
- CLScheduler::get().enqueue(_im2col_kernel);
+ CLScheduler::get().enqueue(*_im2col_kernel);
}
// Runs CLGEMM or CLGEMMLowpMatrixMultiplyCore functions
// Reshape output matrix
if(!_skip_col2im)
{
- CLScheduler::get().enqueue(_col2im_kernel, false);
+ CLScheduler::get().enqueue(*_col2im_kernel.get(), false);
}
//Run Activation Layer if we cannot fuse in GEMM
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.h"
+#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpReductionKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
+#include "src/core/CL/kernels/CLIm2ColKernel.h"
+#include "src/core/CL/kernels/CLWeightsReshapeKernel.h"
+#include "support/MemorySupport.h"
-#include <memory>
#include <tuple>
namespace arm_compute
_permute_weights_to_nhwc(),
_reshape_weights(),
_transpose_weights(),
- _deconv_reshape(),
+ _deconv_reshape(support::cpp14::make_unique<CLDeconvolutionReshapeOutputKernel>()),
_slice_gemm(),
_gemmlowp_final(),
_reshaped_weights(),
{
}
+CLGEMMDeconvolutionLayer::~CLGEMMDeconvolutionLayer() = default;
+
Status CLGEMMDeconvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *output, const PadStrideInfo &deconv_info)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
}
// Configure a Col2Im call to reshape the output of GEMM
- _deconv_reshape.configure(compile_context, &_gemm_output, bias, deconv_reshape_output, input->info(), weights->info(), deconv_info);
+ _deconv_reshape->configure(compile_context, &_gemm_output, bias, deconv_reshape_output, input->info(), weights->info(), deconv_info);
_gemm_output.allocator()->allocate();
if(_is_quantized)
_mm_gemm.run();
}
- CLScheduler::get().enqueue(_deconv_reshape, false);
+ CLScheduler::get().enqueue(*_deconv_reshape, false);
if(_is_quantized)
{
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "src/core/CL/gemm/native/CLGEMMNativeKernelConfiguration.h"
#include "src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfiguration.h"
+#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpReductionKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/runtime/CL/gemm/CLGEMMKernelSelection.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
CLGEMMLowpMatrixMultiplyCore::CLGEMMLowpMatrixMultiplyCore(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(std::move(memory_manager)),
- _weights_to_qasymm8(),
- _mm_native_kernel(),
- _mm_reshaped_only_rhs_kernel(),
- _mtx_b_reshape_kernel(),
- _mtx_a_reduction_kernel(),
- _mtx_b_reduction_kernel(),
- _offset_contribution_kernel(),
- _offset_contribution_output_stage_kernel(),
+ _weights_to_qasymm8(support::cpp14::make_unique<CLDepthConvertLayerKernel>()),
+ _mm_native_kernel(support::cpp14::make_unique<CLGEMMLowpMatrixMultiplyNativeKernel>()),
+ _mm_reshaped_only_rhs_kernel(support::cpp14::make_unique<CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel>()),
+ _mtx_b_reshape_kernel(support::cpp14::make_unique<CLGEMMReshapeRHSMatrixKernel>()),
+ _mtx_a_reduction_kernel(support::cpp14::make_unique<CLGEMMLowpMatrixAReductionKernel>()),
+ _mtx_b_reduction_kernel(support::cpp14::make_unique<CLGEMMLowpMatrixBReductionKernel>()),
+ _offset_contribution_kernel(support::cpp14::make_unique<CLGEMMLowpOffsetContributionKernel>()),
+ _offset_contribution_output_stage_kernel(support::cpp14::make_unique<CLGEMMLowpOffsetContributionOutputStageKernel>()),
_qasymm8_weights(),
_vector_sum_col(),
_vector_sum_row(),
{
}
+CLGEMMLowpMatrixMultiplyCore::~CLGEMMLowpMatrixMultiplyCore() = default;
+
void CLGEMMLowpMatrixMultiplyCore::configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, const GEMMInfo &gemm_info)
{
configure(CLKernelLibrary::get().get_compile_context(), a, b, c, output, gemm_info);
const GPUTarget gpu_target = CLScheduler::get().target();
// Set the target for the kernels
- _mm_native_kernel.set_target(gpu_target);
- _mm_reshaped_only_rhs_kernel.set_target(gpu_target);
+ _mm_native_kernel->set_target(gpu_target);
+ _mm_reshaped_only_rhs_kernel->set_target(gpu_target);
GEMMRHSMatrixInfo rhs_info;
GEMMLHSMatrixInfo lhs_info;
TensorInfo weights_info(*b->info());
weights_info.set_data_type(DataType::QASYMM8);
_qasymm8_weights.allocator()->init(weights_info);
- _weights_to_qasymm8.configure(compile_context, b, &_qasymm8_weights, ConvertPolicy::WRAP, 0);
+ _weights_to_qasymm8->configure(compile_context, b, &_qasymm8_weights, ConvertPolicy::WRAP, 0);
}
const ICLTensor *matrix_b = _convert_to_qasymm8 ? &_qasymm8_weights : b;
std::tie(lhs_info, rhs_info) = CLGEMMReshapedOnlyRHSKernelConfigurationFactory::create(gpu_target)->configure(m, n, k, batch_size, DataType::QASYMM8);
// Configure reshape RHS kernel
- _mtx_b_reshape_kernel.configure(compile_context, _convert_to_qasymm8 ? &_qasymm8_weights : b, &_tmp_b, rhs_info);
+ _mtx_b_reshape_kernel->configure(compile_context, _convert_to_qasymm8 ? &_qasymm8_weights : b, &_tmp_b, rhs_info);
}
// Using default reduction info
}
// Configure Matrix B reduction kernel
- _mtx_b_reduction_kernel.configure(compile_context, _convert_to_qasymm8 ? &_qasymm8_weights : b, &_vector_sum_col, reduction_info);
+ _mtx_b_reduction_kernel->configure(compile_context, _convert_to_qasymm8 ? &_qasymm8_weights : b, &_vector_sum_col, reduction_info);
}
// Initialize Matrix A reduction kernel only if _b_offset is not equal to 0
_memory_group.manage(&_vector_sum_row);
// Configure matrix A reduction kernel
- _mtx_a_reduction_kernel.configure(compile_context, a, &_vector_sum_row, reduction_info);
+ _mtx_a_reduction_kernel->configure(compile_context, a, &_vector_sum_row, reduction_info);
}
GEMMKernelInfo gemm_kernel_info;
if(_is_gemm_reshaped && gemmlowp_output_stage.type == GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT)
{
// Configure and tune matrix multiply kernel with fused output stage
- _mm_reshaped_only_rhs_kernel.configure(compile_context, _matrix_a, matrix_b, output, gemm_kernel_info, _a_offset == 0 ? nullptr : &_vector_sum_col,
- _b_offset == 0 ? nullptr : &_vector_sum_row, c, &_gemm_output_stage_multipliers, &_gemm_output_stage_shifts);
+ _mm_reshaped_only_rhs_kernel->configure(compile_context, _matrix_a, matrix_b, output, gemm_kernel_info, _a_offset == 0 ? nullptr : &_vector_sum_col,
+ _b_offset == 0 ? nullptr : &_vector_sum_row, c, &_gemm_output_stage_multipliers, &_gemm_output_stage_shifts);
}
else
{
if(_is_gemm_reshaped)
{
- _mm_reshaped_only_rhs_kernel.configure(compile_context, _matrix_a, matrix_b, &_mm_result_s32, gemm_kernel_info);
+ _mm_reshaped_only_rhs_kernel->configure(compile_context, _matrix_a, matrix_b, &_mm_result_s32, gemm_kernel_info);
}
else
{
std::tie(lhs_info, rhs_info) = CLGEMMNativeKernelConfigurationFactory::create(gpu_target)->configure(m, n, k, batch_size, DataType::QASYMM8);
// Configure matrix multiply kernel
- _mm_native_kernel.configure(compile_context, _matrix_a, matrix_b, &_mm_result_s32, lhs_info, rhs_info, GEMMReshapeInfo(m, n, k, 1, 1, depth_output_gemm3d, reinterpret_input_as_3d));
+ _mm_native_kernel->configure(compile_context, _matrix_a, matrix_b, &_mm_result_s32, lhs_info, rhs_info, GEMMReshapeInfo(m, n, k, 1, 1, depth_output_gemm3d, reinterpret_input_as_3d));
- _offset_contribution_output_stage_kernel.configure(compile_context, &_mm_result_s32, _a_offset == 0 ? nullptr : &_vector_sum_col, _b_offset == 0 ? nullptr : &_vector_sum_row, c, output,
- a->info()->dimension(0),
- _a_offset, _b_offset, gemmlowp_output_stage, &_gemm_output_stage_multipliers, &_gemm_output_stage_shifts);
+ _offset_contribution_output_stage_kernel->configure(compile_context, &_mm_result_s32, _a_offset == 0 ? nullptr : &_vector_sum_col, _b_offset == 0 ? nullptr : &_vector_sum_row, c, output,
+ a->info()->dimension(0),
+ _a_offset, _b_offset, gemmlowp_output_stage, &_gemm_output_stage_multipliers, &_gemm_output_stage_shifts);
_mm_result_s32.allocator()->allocate();
}
}
if(_is_gemm_reshaped)
{
// Configure and tune matrix multiply kernel
- _mm_reshaped_only_rhs_kernel.configure(compile_context, _matrix_a, matrix_b, output, gemm_kernel_info);
+ _mm_reshaped_only_rhs_kernel->configure(compile_context, _matrix_a, matrix_b, output, gemm_kernel_info);
}
else
{
std::tie(lhs_info, rhs_info) = CLGEMMNativeKernelConfigurationFactory::create(gpu_target)->configure(m, n, k, batch_size, DataType::QASYMM8);
// Configure matrix multiply kernel
- _mm_native_kernel.configure(compile_context, _matrix_a, matrix_b, output, lhs_info, rhs_info, GEMMReshapeInfo(m, n, k, 1, 1, depth_output_gemm3d, reinterpret_input_as_3d));
+ _mm_native_kernel->configure(compile_context, _matrix_a, matrix_b, output, lhs_info, rhs_info, GEMMReshapeInfo(m, n, k, 1, 1, depth_output_gemm3d, reinterpret_input_as_3d));
}
// Configure offset contribution kernel
- _offset_contribution_kernel.configure(compile_context, output, _a_offset == 0 ? nullptr : &_vector_sum_col, _b_offset == 0 ? nullptr : &_vector_sum_row, c, a->info()->dimension(0), _a_offset,
- _b_offset);
+ _offset_contribution_kernel->configure(compile_context, output, _a_offset == 0 ? nullptr : &_vector_sum_col, _b_offset == 0 ? nullptr : &_vector_sum_row, c, a->info()->dimension(0), _a_offset,
+ _b_offset);
}
// Allocate tensors
if(!_reshape_b_only_on_first_run)
{
// Run reshape matrix B
- CLScheduler::get().enqueue(_mtx_b_reshape_kernel, false);
+ CLScheduler::get().enqueue(*_mtx_b_reshape_kernel, false);
}
}
// Run matrix B reduction kernel only if _a_offset is not equal to 0
if(_a_offset != 0 && !_reshape_b_only_on_first_run)
{
- CLScheduler::get().enqueue(_mtx_b_reduction_kernel, false);
+ CLScheduler::get().enqueue(*_mtx_b_reduction_kernel, false);
}
// Run matrix A reduction kernel only if _b_offset is not equal to 0
if(_b_offset != 0)
{
- CLScheduler::get().enqueue(_mtx_a_reduction_kernel, false);
+ CLScheduler::get().enqueue(*_mtx_a_reduction_kernel, false);
}
// Run matrix multiply
if(_is_gemm_reshaped)
{
- CLScheduler::get().enqueue(_mm_reshaped_only_rhs_kernel, false);
+ CLScheduler::get().enqueue(*_mm_reshaped_only_rhs_kernel, false);
}
else
{
- CLScheduler::get().enqueue(_mm_native_kernel, false);
+ CLScheduler::get().enqueue(*_mm_native_kernel, false);
}
if(_run_output_stage)
{
// Run offset contribution/output stage kernel
- CLScheduler::get().enqueue(_offset_contribution_output_stage_kernel, true);
+ CLScheduler::get().enqueue(*_offset_contribution_output_stage_kernel, true);
}
if(_run_offset_contribution)
{
// Run offset contribution kernel
- CLScheduler::get().enqueue(_offset_contribution_kernel, true);
+ CLScheduler::get().enqueue(*_offset_contribution_kernel, true);
}
}
if(_convert_to_qasymm8)
{
_qasymm8_weights.allocator()->allocate();
- CLScheduler::get().enqueue(_weights_to_qasymm8, false);
+ CLScheduler::get().enqueue(*_weights_to_qasymm8, false);
}
if(_is_gemm_reshaped && _reshape_b_only_on_first_run)
// Run reshape kernel and mark original weights tensor as unused
_tmp_b.allocator()->allocate();
- CLScheduler::get().enqueue(_mtx_b_reshape_kernel, false);
+ CLScheduler::get().enqueue(*_mtx_b_reshape_kernel, false);
_original_b->mark_as_unused();
}
if(_a_offset != 0 && _reshape_b_only_on_first_run)
{
_vector_sum_col.allocator()->allocate();
- CLScheduler::get().enqueue(_mtx_b_reduction_kernel, false);
+ CLScheduler::get().enqueue(*_mtx_b_reduction_kernel, false);
}
CLScheduler::get().queue().finish();
#include "arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h"
+#include "arm_compute/core/Types.h"
+#include "src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpQuantizeDownInt32ScaleKernel.h"
#include "support/MemorySupport.h"
+#include <algorithm>
+
namespace arm_compute
{
void CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint::configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output,
#include "arm_compute/runtime/CL/functions/CLGather.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLGatherKernel.h"
+#include "src/core/CL/kernels/CLGatherKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
*/
#include "arm_compute/runtime/CL/functions/CLGaussian3x3.h"
-#include "arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h"
#include "arm_compute/core/PixelValue.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLGaussian3x3Kernel.h"
#include "support/MemorySupport.h"
#include <utility>
auto k = arm_compute::support::cpp14::make_unique<CLGaussian3x3Kernel>();
k->configure(compile_context, input, output, border_mode == BorderMode::UNDEFINED);
_kernel = std::move(k);
- _border_handler.configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+ _border_handler->configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
}
#include "arm_compute/runtime/CL/functions/CLGaussian5x5.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h"
#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "arm_compute/runtime/ITensorAllocator.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLGaussian5x5Kernel.h"
+#include "support/MemorySupport.h"
#include <utility>
using namespace arm_compute;
CLGaussian5x5::CLGaussian5x5(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _kernel_hor(), _kernel_vert(), _border_handler(), _tmp()
+ : _memory_group(std::move(memory_manager)),
+ _kernel_hor(support::cpp14::make_unique<CLGaussian5x5HorKernel>()),
+ _kernel_vert(support::cpp14::make_unique<CLGaussian5x5VertKernel>()),
+ _border_handler(support::cpp14::make_unique<CLFillBorderKernel>()),
+ _tmp()
{
}
+CLGaussian5x5::~CLGaussian5x5() = default;
+
void CLGaussian5x5::configure(ICLTensor *input, ICLTensor *output, BorderMode border_mode, uint8_t constant_border_value)
{
configure(CLKernelLibrary::get().get_compile_context(), input, output, border_mode, constant_border_value);
_memory_group.manage(&_tmp);
// Configure kernels
- _kernel_hor.configure(compile_context, input, &_tmp, border_mode == BorderMode::UNDEFINED);
- _kernel_vert.configure(compile_context, &_tmp, output, border_mode == BorderMode::UNDEFINED);
- _border_handler.configure(compile_context, input, _kernel_hor.border_size(), border_mode, PixelValue(constant_border_value));
+ _kernel_hor->configure(compile_context, input, &_tmp, border_mode == BorderMode::UNDEFINED);
+ _kernel_vert->configure(compile_context, &_tmp, output, border_mode == BorderMode::UNDEFINED);
+ _border_handler->configure(compile_context, input, _kernel_hor->border_size(), border_mode, PixelValue(constant_border_value));
// Allocate intermediate buffers
_tmp.allocator()->allocate();
void CLGaussian5x5::run()
{
- CLScheduler::get().enqueue(_border_handler, false);
+ CLScheduler::get().enqueue(*_border_handler, false);
MemoryGroupResourceScope scope_mg(_memory_group);
- CLScheduler::get().enqueue(_kernel_hor, false);
- CLScheduler::get().enqueue(_kernel_vert);
+ CLScheduler::get().enqueue(*_kernel_hor, false);
+ CLScheduler::get().enqueue(*_kernel_vert);
}
#include "arm_compute/runtime/CL/functions/CLGaussianPyramid.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLGaussianPyramidKernel.h"
-#include "arm_compute/core/CL/kernels/CLScaleKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
-
#include "arm_compute/runtime/CL/CLPyramid.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/CLTensorAllocator.h"
#include "arm_compute/runtime/CL/functions/CLGaussian5x5.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLGaussian5x5Kernel.h"
+#include "src/core/CL/kernels/CLGaussianPyramidKernel.h"
+#include "src/core/CL/kernels/CLScaleKernel.h"
+#include "support/MemorySupport.h"
#include <cstddef>
{
}
+CLGaussianPyramid::~CLGaussianPyramid() = default;
+
CLGaussianPyramidHalf::CLGaussianPyramidHalf() // NOLINT
: _horizontal_border_handler(),
_vertical_border_handler(),
{
}
+CLGaussianPyramidHalf::~CLGaussianPyramidHalf() = default;
+
void CLGaussianPyramidHalf::configure(ICLTensor *input, CLPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value)
{
configure(CLKernelLibrary::get().get_compile_context(), input, pyramid, border_mode, constant_border_value);
if(num_levels > 1)
{
- _horizontal_border_handler.resize(num_levels - 1);
- _vertical_border_handler.resize(num_levels - 1);
- _horizontal_reduction.resize(num_levels - 1);
- _vertical_reduction.resize(num_levels - 1);
+ _horizontal_border_handler.reserve(num_levels - 1);
+ _vertical_border_handler.reserve(num_levels - 1);
+ _horizontal_reduction.reserve(num_levels - 1);
+ _vertical_reduction.reserve(num_levels - 1);
// Apply half scale to the X dimension of the tensor shape
TensorShape tensor_shape = pyramid->info()->tensor_shape();
for(size_t i = 0; i < num_levels - 1; ++i)
{
/* Configure horizontal kernel */
- _horizontal_reduction[i].configure(compile_context, _pyramid->get_pyramid_level(i), _tmp.get_pyramid_level(i));
+ _horizontal_reduction.emplace_back(support::cpp14::make_unique<CLGaussianPyramidHorKernel>());
+ _horizontal_reduction.back()->configure(compile_context, _pyramid->get_pyramid_level(i), _tmp.get_pyramid_level(i));
/* Configure vertical kernel */
- _vertical_reduction[i].configure(compile_context, _tmp.get_pyramid_level(i), _pyramid->get_pyramid_level(i + 1));
+ _vertical_reduction.emplace_back(support::cpp14::make_unique<CLGaussianPyramidVertKernel>());
+ _vertical_reduction.back()->configure(compile_context, _tmp.get_pyramid_level(i), _pyramid->get_pyramid_level(i + 1));
/* Configure border */
- _horizontal_border_handler[i].configure(compile_context, _pyramid->get_pyramid_level(i), _horizontal_reduction[i].border_size(), border_mode, PixelValue(constant_border_value));
+ _horizontal_border_handler.emplace_back(support::cpp14::make_unique<CLFillBorderKernel>());
+ _horizontal_border_handler.back()->configure(compile_context, _pyramid->get_pyramid_level(i), _horizontal_reduction.back()->border_size(), border_mode, PixelValue(constant_border_value));
/* Configure border */
- _vertical_border_handler[i].configure(compile_context, _tmp.get_pyramid_level(i), _vertical_reduction[i].border_size(), border_mode, PixelValue(pixel_value_u16));
+ _vertical_border_handler.emplace_back(support::cpp14::make_unique<CLFillBorderKernel>());
+ _vertical_border_handler.back()->configure(compile_context, _tmp.get_pyramid_level(i), _vertical_reduction.back()->border_size(), border_mode, PixelValue(pixel_value_u16));
}
_tmp.allocate();
}
for(unsigned int i = 0; i < num_levels - 1; ++i)
{
- CLScheduler::get().enqueue(_horizontal_border_handler[i], false);
- CLScheduler::get().enqueue(_horizontal_reduction[i], false);
- CLScheduler::get().enqueue(_vertical_border_handler[i], false);
- CLScheduler::get().enqueue(_vertical_reduction[i], false);
+ CLScheduler::get().enqueue(*_horizontal_border_handler[i], false);
+ CLScheduler::get().enqueue(*_horizontal_reduction[i], false);
+ CLScheduler::get().enqueue(*_vertical_border_handler[i], false);
+ CLScheduler::get().enqueue(*_vertical_reduction[i], false);
}
}
if(num_levels > 1)
{
_gauss5x5.resize(num_levels - 1);
- _scale_nearest.resize(num_levels - 1);
+ _scale_nearest.reserve(num_levels - 1);
PyramidInfo pyramid_info(num_levels - 1, SCALE_PYRAMID_ORB, pyramid->info()->tensor_shape(), Format::U8);
_gauss5x5[i].configure(compile_context, _pyramid->get_pyramid_level(i), _tmp.get_pyramid_level(i), border_mode, constant_border_value);
/* Configure scale image kernel */
- _scale_nearest[i].configure(compile_context, _tmp.get_pyramid_level(i), _pyramid->get_pyramid_level(i + 1), ScaleKernelInfo{ InterpolationPolicy::NEAREST_NEIGHBOR, border_mode, PixelValue(), SamplingPolicy::CENTER });
+ _scale_nearest.emplace_back(support::cpp14::make_unique<CLScaleKernel>());
+ _scale_nearest.back()->configure(compile_context, _tmp.get_pyramid_level(i), _pyramid->get_pyramid_level(i + 1), ScaleKernelInfo{ InterpolationPolicy::NEAREST_NEIGHBOR, border_mode, PixelValue(), SamplingPolicy::CENTER });
}
_tmp.allocate();
for(unsigned int i = 0; i < num_levels - 1; ++i)
{
_gauss5x5[i].run();
- CLScheduler::get().enqueue(_scale_nearest[i]);
+ CLScheduler::get().enqueue(*_scale_nearest[i]);
}
}
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/core/Types.h"
+#include "src/core/CL/kernels/CLBoundingBoxTransformKernel.h"
+#include "src/core/CL/kernels/CLDequantizationLayerKernel.h"
+#include "src/core/CL/kernels/CLGenerateProposalsLayerKernel.h"
+#include "src/core/CL/kernels/CLPadLayerKernel.h"
+#include "src/core/CL/kernels/CLPermuteKernel.h"
+#include "src/core/CL/kernels/CLQuantizationLayerKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
CLGenerateProposalsLayer::CLGenerateProposalsLayer(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(memory_manager),
- _permute_deltas_kernel(),
+ _permute_deltas_kernel(support::cpp14::make_unique<CLPermuteKernel>()),
_flatten_deltas(),
- _permute_scores_kernel(),
+ _permute_scores_kernel(support::cpp14::make_unique<CLPermuteKernel>()),
_flatten_scores(),
- _compute_anchors_kernel(),
- _bounding_box_kernel(),
- _pad_kernel(),
- _dequantize_anchors(),
- _dequantize_deltas(),
- _quantize_all_proposals(),
+ _compute_anchors_kernel(support::cpp14::make_unique<CLComputeAllAnchorsKernel>()),
+ _bounding_box_kernel(support::cpp14::make_unique<CLBoundingBoxTransformKernel>()),
+ _pad_kernel(support::cpp14::make_unique<CLPadLayerKernel>()),
+ _dequantize_anchors(support::cpp14::make_unique<CLDequantizationLayerKernel>()),
+ _dequantize_deltas(support::cpp14::make_unique<CLDequantizationLayerKernel>()),
+ _quantize_all_proposals(support::cpp14::make_unique<CLQuantizationLayerKernel>()),
_cpp_nms(memory_manager),
_is_nhwc(false),
_is_qasymm8(false),
{
}
+CLGenerateProposalsLayer::~CLGenerateProposalsLayer() = default;
+
void CLGenerateProposalsLayer::configure(const ICLTensor *scores, const ICLTensor *deltas, const ICLTensor *anchors, ICLTensor *proposals, ICLTensor *scores_out, ICLTensor *num_valid_proposals,
const GenerateProposalsInfo &info)
{
// Compute all the anchors
_memory_group.manage(&_all_anchors);
- _compute_anchors_kernel.configure(compile_context, anchors, &_all_anchors, ComputeAnchorsInfo(feat_width, feat_height, info.spatial_scale()));
+ _compute_anchors_kernel->configure(compile_context, anchors, &_all_anchors, ComputeAnchorsInfo(feat_width, feat_height, info.spatial_scale()));
const TensorShape flatten_shape_deltas(values_per_roi, total_num_anchors);
_deltas_flattened.allocator()->init(TensorInfo(flatten_shape_deltas, 1, scores_data_type, deltas->info()->quantization_info()));
if(!_is_nhwc)
{
_memory_group.manage(&_deltas_permuted);
- _permute_deltas_kernel.configure(compile_context, deltas, &_deltas_permuted, PermutationVector{ 2, 0, 1 });
+ _permute_deltas_kernel->configure(compile_context, deltas, &_deltas_permuted, PermutationVector{ 2, 0, 1 });
_flatten_deltas.configure(compile_context, &_deltas_permuted, &_deltas_flattened);
_deltas_permuted.allocator()->allocate();
}
if(!_is_nhwc)
{
_memory_group.manage(&_scores_permuted);
- _permute_scores_kernel.configure(compile_context, scores, &_scores_permuted, PermutationVector{ 2, 0, 1 });
+ _permute_scores_kernel->configure(compile_context, scores, &_scores_permuted, PermutationVector{ 2, 0, 1 });
_flatten_scores.configure(compile_context, &_scores_permuted, &_scores_flattened);
_scores_permuted.allocator()->allocate();
}
_memory_group.manage(&_all_anchors_f32);
_memory_group.manage(&_deltas_flattened_f32);
// Dequantize anchors to float
- _dequantize_anchors.configure(compile_context, &_all_anchors, &_all_anchors_f32);
+ _dequantize_anchors->configure(compile_context, &_all_anchors, &_all_anchors_f32);
_all_anchors.allocator()->allocate();
anchors_to_use = &_all_anchors_f32;
// Dequantize deltas to float
- _dequantize_deltas.configure(compile_context, &_deltas_flattened, &_deltas_flattened_f32);
+ _dequantize_deltas->configure(compile_context, &_deltas_flattened, &_deltas_flattened_f32);
_deltas_flattened.allocator()->allocate();
deltas_to_use = &_deltas_flattened_f32;
}
// Bounding box transform
_memory_group.manage(&_all_proposals);
BoundingBoxTransformInfo bbox_info(info.im_width(), info.im_height(), 1.f);
- _bounding_box_kernel.configure(compile_context, anchors_to_use, &_all_proposals, deltas_to_use, bbox_info);
+ _bounding_box_kernel->configure(compile_context, anchors_to_use, &_all_proposals, deltas_to_use, bbox_info);
deltas_to_use->allocator()->allocate();
anchors_to_use->allocator()->allocate();
_memory_group.manage(&_all_proposals_quantized);
// Requantize all_proposals to QASYMM16 with 0.125 scale and 0 offset
_all_proposals_quantized.allocator()->init(TensorInfo(_all_proposals.info()->tensor_shape(), 1, DataType::QASYMM16, QuantizationInfo(0.125f, 0)));
- _quantize_all_proposals.configure(compile_context, &_all_proposals, &_all_proposals_quantized);
+ _quantize_all_proposals->configure(compile_context, &_all_proposals, &_all_proposals_quantized);
_all_proposals.allocator()->allocate();
_all_proposals_to_use = &_all_proposals_quantized;
}
_scores_flattened.allocator()->allocate();
// Add the first column that represents the batch id. This will be all zeros, as we don't support multiple images
- _pad_kernel.configure(compile_context, &_proposals_4_roi_values, proposals, PaddingList{ { 1, 0 } });
+ _pad_kernel->configure(compile_context, &_proposals_4_roi_values, proposals, PaddingList{ { 1, 0 } });
_proposals_4_roi_values.allocator()->allocate();
}
MemoryGroupResourceScope scope_mg(_memory_group);
// Compute all the anchors
- CLScheduler::get().enqueue(_compute_anchors_kernel, false);
+ CLScheduler::get().enqueue(*_compute_anchors_kernel, false);
// Transpose and reshape the inputs
if(!_is_nhwc)
{
- CLScheduler::get().enqueue(_permute_deltas_kernel, false);
- CLScheduler::get().enqueue(_permute_scores_kernel, false);
+ CLScheduler::get().enqueue(*_permute_deltas_kernel, false);
+ CLScheduler::get().enqueue(*_permute_scores_kernel, false);
}
_flatten_deltas.run();
_flatten_scores.run();
if(_is_qasymm8)
{
- CLScheduler::get().enqueue(_dequantize_anchors, false);
- CLScheduler::get().enqueue(_dequantize_deltas, false);
+ CLScheduler::get().enqueue(*_dequantize_anchors, false);
+ CLScheduler::get().enqueue(*_dequantize_deltas, false);
}
// Build the boxes
- CLScheduler::get().enqueue(_bounding_box_kernel, false);
+ CLScheduler::get().enqueue(*_bounding_box_kernel, false);
if(_is_qasymm8)
{
- CLScheduler::get().enqueue(_quantize_all_proposals, false);
+ CLScheduler::get().enqueue(*_quantize_all_proposals, false);
}
// Non maxima suppression
run_cpp_nms_kernel();
// Add dummy batch indexes
- CLScheduler::get().enqueue(_pad_kernel, true);
+ CLScheduler::get().enqueue(*_pad_kernel, true);
}
} // namespace arm_compute
#include "arm_compute/core/Size2D.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLHOGDescriptorKernel.h"
+#include "src/core/CL/kernels/CLMagnitudePhaseKernel.h"
+#include "support/MemorySupport.h"
using namespace arm_compute;
CLHOGDescriptor::CLHOGDescriptor(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _gradient(), _orient_bin(), _block_norm(), _mag(), _phase(), _hog_space()
+ : _memory_group(std::move(memory_manager)),
+ _gradient(),
+ _orient_bin(support::cpp14::make_unique<CLHOGOrientationBinningKernel>()),
+ _block_norm(support::cpp14::make_unique<CLHOGBlockNormalizationKernel>()),
+ _mag(),
+ _phase(),
+ _hog_space()
{
}
+CLHOGDescriptor::~CLHOGDescriptor() = default;
+
void CLHOGDescriptor::configure(ICLTensor *input, ICLTensor *output, const IHOG *hog, BorderMode border_mode, uint8_t constant_border_value)
{
configure(CLKernelLibrary::get().get_compile_context(), input, output, hog, border_mode, constant_border_value);
_memory_group.manage(&_hog_space);
// Initialise orientation binning kernel
- _orient_bin.configure(compile_context, &_mag, &_phase, &_hog_space, hog->info());
+ _orient_bin->configure(compile_context, &_mag, &_phase, &_hog_space, hog->info());
// Initialize HOG norm kernel
- _block_norm.configure(compile_context, &_hog_space, output, hog->info());
+ _block_norm->configure(compile_context, &_hog_space, output, hog->info());
// Allocate intermediate tensors
_mag.allocator()->allocate();
_gradient.run();
// Run orientation binning
- CLScheduler::get().enqueue(_orient_bin, false);
+ CLScheduler::get().enqueue(*_orient_bin, false);
// Run block normalization
- CLScheduler::get().enqueue(_block_norm);
+ CLScheduler::get().enqueue(*_block_norm);
}
\ No newline at end of file
*/
#include "arm_compute/runtime/CL/functions/CLHOGDetector.h"
-#include "arm_compute/core/CL/kernels/CLHOGDetectorKernel.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLHOGDetectorKernel.h"
+#include "support/MemorySupport.h"
#include <algorithm>
using namespace arm_compute;
CLHOGDetector::CLHOGDetector()
- : _hog_detector_kernel(), _detection_windows(nullptr), _num_detection_windows()
+ : _hog_detector_kernel(support::cpp14::make_unique<CLHOGDetectorKernel>()), _detection_windows(nullptr), _num_detection_windows()
{
}
+CLHOGDetector::~CLHOGDetector() = default;
+
void CLHOGDetector::configure(const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, const Size2D &detection_window_stride, float threshold, size_t idx_class)
{
configure(CLKernelLibrary::get().get_compile_context(), input, hog, detection_windows, detection_window_stride, threshold, idx_class);
_num_detection_windows = cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(unsigned int));
// Configure HOGDetectorKernel
- _hog_detector_kernel.configure(compile_context, input, hog, detection_windows, &_num_detection_windows, detection_window_stride, threshold, idx_class);
+ _hog_detector_kernel->configure(compile_context, input, hog, detection_windows, &_num_detection_windows, detection_window_stride, threshold, idx_class);
}
void CLHOGDetector::run()
q.enqueueWriteBuffer(_num_detection_windows, CL_FALSE, 0, sizeof(unsigned int), &init_num_detection_windows);
// Run CLHOGDetectorKernel
- CLScheduler::get().enqueue(_hog_detector_kernel);
+ CLScheduler::get().enqueue(*_hog_detector_kernel);
// Read number of detections
unsigned int num_detection_windows = 0;
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLMagnitudePhaseKernel.h"
+#include "support/MemorySupport.h"
using namespace arm_compute;
CLHOGGradient::CLHOGGradient(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _derivative(), _mag_phase(), _gx(), _gy()
+ : _memory_group(std::move(memory_manager)),
+ _derivative(),
+ _mag_phase(support::cpp14::make_unique<CLMagnitudePhaseKernel>()),
+ _gx(),
+ _gy()
{
}
// Initialise magnitude/phase kernel
if(PhaseType::UNSIGNED == phase_type)
{
- _mag_phase.configure(compile_context, &_gx, &_gy, output_magnitude, output_phase, MagnitudeType::L2NORM, PhaseType::UNSIGNED);
+ _mag_phase->configure(compile_context, &_gx, &_gy, output_magnitude, output_phase, MagnitudeType::L2NORM, PhaseType::UNSIGNED);
}
else
{
- _mag_phase.configure(compile_context, &_gx, &_gy, output_magnitude, output_phase, MagnitudeType::L2NORM, PhaseType::SIGNED);
+ _mag_phase->configure(compile_context, &_gx, &_gy, output_magnitude, output_phase, MagnitudeType::L2NORM, PhaseType::SIGNED);
}
// Allocate intermediate tensors
_derivative.run();
// Run magnitude/phase kernel
- CLScheduler::get().enqueue(_mag_phase);
+ CLScheduler::get().enqueue(*_mag_phase);
}
\ No newline at end of file
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/Scheduler.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLHOGDescriptorKernel.h"
+#include "src/core/CL/kernels/CLHOGDetectorKernel.h"
+#include "src/core/CL/kernels/CLMagnitudePhaseKernel.h"
+#include "support/MemorySupport.h"
using namespace arm_compute;
{
}
+CLHOGMultiDetection::~CLHOGMultiDetection() = default;
+
void CLHOGMultiDetection::configure(ICLTensor *input, const ICLMultiHOG *multi_hog, ICLDetectionWindowArray *detection_windows, ICLSize2DArray *detection_window_strides, BorderMode border_mode,
uint8_t constant_border_value, float threshold, bool non_maxima_suppression, float min_distance)
{
_num_block_norm_kernel = input_block_norm.size(); // Number of CLHOGBlockNormalizationKernel kernels to compute
_num_hog_detect_kernel = input_hog_detect.size(); // Number of CLHOGDetector functions to compute
- _orient_bin_kernel.resize(_num_orient_bin_kernel);
- _block_norm_kernel.resize(_num_block_norm_kernel);
+ _orient_bin_kernel.reserve(_num_orient_bin_kernel);
+ _block_norm_kernel.reserve(_num_block_norm_kernel);
_hog_detect_kernel.resize(_num_hog_detect_kernel);
_hog_space.resize(_num_orient_bin_kernel);
_hog_norm_space.resize(_num_block_norm_kernel);
_memory_group.manage(&_hog_space[i]);
// Initialise orientation binning kernel
- _orient_bin_kernel[i].configure(compile_context, &_mag, &_phase, &_hog_space[i], multi_hog->model(idx_multi_hog)->info());
+ _orient_bin_kernel.emplace_back(support::cpp14::make_unique<CLHOGOrientationBinningKernel>());
+ _orient_bin_kernel.back()->configure(compile_context, &_mag, &_phase, &_hog_space[i], multi_hog->model(idx_multi_hog)->info());
}
// Allocate intermediate tensors
_memory_group.manage(&_hog_norm_space[i]);
// Initialize block normalization kernel
- _block_norm_kernel[i].configure(compile_context, &_hog_space[idx_orient_bin], &_hog_norm_space[i], multi_hog->model(idx_multi_hog)->info());
+ _block_norm_kernel.emplace_back(support::cpp14::make_unique<CLHOGBlockNormalizationKernel>());
+ _block_norm_kernel.back()->configure(compile_context, &_hog_space[idx_orient_bin], &_hog_norm_space[i], multi_hog->model(idx_multi_hog)->info());
}
// Allocate intermediate tensors
// Run orientation binning kernel
for(size_t i = 0; i < _num_orient_bin_kernel; ++i)
{
- CLScheduler::get().enqueue(_orient_bin_kernel[i], false);
+ CLScheduler::get().enqueue(*_orient_bin_kernel[i], false);
}
// Run block normalization kernel
for(size_t i = 0; i < _num_block_norm_kernel; ++i)
{
- CLScheduler::get().enqueue(_block_norm_kernel[i], false);
+ CLScheduler::get().enqueue(*_block_norm_kernel[i], false);
}
// Run HOG detector kernel
#include "arm_compute/runtime/CL/functions/CLHarrisCorners.h"
#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
-#include "arm_compute/core/CL/kernels/CLHarrisCornersKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/CL/functions/CLSobel7x7.h"
#include "arm_compute/runtime/ITensorAllocator.h"
#include "arm_compute/runtime/Scheduler.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLHarrisCornersKernel.h"
+#include "src/core/CL/kernels/CLSobel5x5Kernel.h"
+#include "src/core/CL/kernels/CLSobel7x7Kernel.h"
#include "support/MemorySupport.h"
#include <cmath>
CLHarrisCorners::CLHarrisCorners(std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
: _memory_group(std::move(memory_manager)),
_sobel(nullptr),
- _harris_score(),
+ _harris_score(support::cpp14::make_unique<CLHarrisScoreKernel>()),
_non_max_suppr(),
_candidates(),
_sort_euclidean(),
- _border_gx(),
- _border_gy(),
+ _border_gx(support::cpp14::make_unique<CLFillBorderKernel>()),
+ _border_gy(support::cpp14::make_unique<CLFillBorderKernel>()),
_gx(),
_gy(),
_score(),
{
}
+CLHarrisCorners::~CLHarrisCorners() = default;
+
void CLHarrisCorners::configure(ICLImage *input, float threshold, float min_dist,
float sensitivity, int32_t gradient_size, int32_t block_size, ICLKeyPointArray *corners,
BorderMode border_mode, uint8_t constant_border_value, bool use_fp16)
_memory_group.manage(&_score);
// Set/init Harris Score kernel accordingly with block_size
- _harris_score.configure(compile_context, &_gx, &_gy, &_score, block_size, pow4_normalization_factor, threshold, sensitivity, border_mode == BorderMode::UNDEFINED);
+ _harris_score->configure(compile_context, &_gx, &_gy, &_score, block_size, pow4_normalization_factor, threshold, sensitivity, border_mode == BorderMode::UNDEFINED);
// Configure border filling using harris score kernel's block size
- _border_gx.configure(compile_context, &_gx, _harris_score.border_size(), border_mode, PixelValue(constant_border_value));
- _border_gy.configure(compile_context, &_gy, _harris_score.border_size(), border_mode, PixelValue(constant_border_value));
+ _border_gx->configure(compile_context, &_gx, _harris_score->border_size(), border_mode, PixelValue(constant_border_value));
+ _border_gy->configure(compile_context, &_gy, _harris_score->border_size(), border_mode, PixelValue(constant_border_value));
// Allocate intermediate buffers
_gx.allocator()->allocate();
_sobel->run();
// Fill border before harris score kernel
- CLScheduler::get().enqueue(_border_gx, false);
- CLScheduler::get().enqueue(_border_gy, false);
+ CLScheduler::get().enqueue(*_border_gx, false);
+ CLScheduler::get().enqueue(*_border_gy, false);
// Run harris score kernel
- CLScheduler::get().enqueue(_harris_score, false);
+ CLScheduler::get().enqueue(*_harris_score, false);
// Run non-maxima suppression
_non_max_suppr.run();
*/
#include "arm_compute/runtime/CL/functions/CLInstanceNormalizationLayer.h"
-#include "arm_compute/core/CL/kernels/CLInstanceNormalizationLayerKernel.h"
#include "arm_compute/core/Types.h"
-
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLInstanceNormalizationLayerKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
*/
#include "arm_compute/runtime/CL/functions/CLIntegralImage.h"
-#include "arm_compute/core/CL/kernels/CLIntegralImageKernel.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLIntegralImageKernel.h"
+#include "support/MemorySupport.h"
using namespace arm_compute;
CLIntegralImage::CLIntegralImage()
- : _integral_hor(), _integral_vert()
+ : _integral_hor(support::cpp14::make_unique<CLIntegralImageHorKernel>()),
+ _integral_vert(support::cpp14::make_unique<CLIntegralImageVertKernel>())
{
}
+CLIntegralImage::~CLIntegralImage() = default;
+
void CLIntegralImage::configure(const ICLTensor *input, ICLTensor *output)
{
configure(CLKernelLibrary::get().get_compile_context(), input, output);
void CLIntegralImage::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output)
{
- _integral_hor.configure(compile_context, input, output);
- _integral_vert.configure(compile_context, output);
+ _integral_hor->configure(compile_context, input, output);
+ _integral_vert->configure(compile_context, output);
}
void CLIntegralImage::run()
{
- CLScheduler::get().enqueue(_integral_hor, false);
- CLScheduler::get().enqueue(_integral_vert);
+ CLScheduler::get().enqueue(*_integral_hor, false);
+ CLScheduler::get().enqueue(*_integral_vert);
}
#include "arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLL2NormalizeLayerKernel.h"
+#include "src/core/CL/kernels/CLReductionOperationKernel.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
} // namespace
CLL2NormalizeLayer::CLL2NormalizeLayer(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _reduce_func(), _normalize_kernel(), _sumsq()
+ : _memory_group(std::move(memory_manager)),
+ _reduce_func(),
+ _normalize_kernel(support::cpp14::make_unique<CLL2NormalizeLayerKernel>()),
+ _sumsq()
{
}
+CLL2NormalizeLayer::~CLL2NormalizeLayer() = default;
+
void CLL2NormalizeLayer::configure(ICLTensor *input, ICLTensor *output, int axis, float epsilon)
{
configure(CLKernelLibrary::get().get_compile_context(), input, output, axis, epsilon);
// Configure kernels
const uint32_t actual_axis = wrap_around(axis, max_input_tensor_dim);
_reduce_func.configure(compile_context, input, &_sumsq, actual_axis, ReductionOperation::SUM_SQUARE);
- _normalize_kernel.configure(compile_context, input, &_sumsq, output, axis, epsilon);
+ _normalize_kernel->configure(compile_context, input, &_sumsq, output, axis, epsilon);
// Allocate intermediate tensor
_sumsq.allocator()->allocate();
MemoryGroupResourceScope scope_mg(_memory_group);
_reduce_func.run();
- CLScheduler::get().enqueue(_normalize_kernel, true);
+ CLScheduler::get().enqueue(*_normalize_kernel, true);
}
} // namespace arm_compute
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLCopyKernel.h"
+#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpReductionKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
+#include "src/core/CL/kernels/CLMemsetKernel.h"
+#include "src/core/CL/kernels/CLTransposeKernel.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
CLLSTMLayer::CLLSTMLayer(std::shared_ptr<IMemoryManager> memory_manager)
: _memory_group(std::move(memory_manager)), _fully_connected_input_gate(), _accum_input_gate1(), _subtract_input_gate(), _pixelwise_mul_input_gate(), _activation_input_gate(),
- _fully_connected_forget_gate(), _accum_forget_gate1(), _pixelwise_mul_forget_gate(), _activation_forget_gate(), _fully_connected_cell_state(), _gemm_cell_state1(), _transpose_cell_state(),
- _accum_cell_state1(), _accum_cell_state2(), _pixelwise_mul_cell_state1(), _activation_cell_state(), _cell_clip(), _pixelwise_mul_cell_state2(), _fully_connected_output(),
- _pixelwise_mul_output_state1(), _accum_output1(), _activation_output(), _activation_output_state(), _pixelwise_mul_output_state2(), _fully_connected_output_state(), _projection_clip(),
- _copy_cell_state(), _copy_output(), _concat_scratch_buffer(), _concat_inputs_forget_gate(), _concat_weights_forget_gate(), _concat_weights_input_gate(), _concat_weights_output(),
- _ones_memset_kernel(), _mean_std_norm_input_gate(), _pixelwise_mul_input_gate_coeff(), _accum_input_gate_bias(), _mean_std_norm_forget_gate(), _pixelwise_mul_forget_gate_coeff(),
- _accum_forget_gate_bias(), _mean_std_norm_cell_gate(), _pixelwise_mul_cell_gate_coeff(), _accum_cell_gate_bias(), _mean_std_norm_output_gate(), _pixelwise_mul_output_gate_coeff(),
- _accum_output_gate_bias(), _input_gate_out1(), _input_gate_out2(), _input_gate_out3(), _input_gate_out4(), _forget_gate_out1(), _forget_gate_out2(), _forget_gate_out3(), _forget_gate_out4(),
- _forget_gate_out5(), _forget_gate_out6(), _cell_state_out1(), _cell_state_out2(), _cell_state_out3(), _cell_state_out4(), _cell_state_out5(), _output1(), _output2(), _output3(), _output4(),
- _cell_state_activation(), _output_state1(), _ones(), _input_layer_norm_out1(), _input_layer_norm_out2(), _forget_layer_norm_out1(), _forget_layer_norm_out2(), _cell_layer_norm_out1(),
- _cell_layer_norm_out2(), _output_layer_norm_out1(), _output_layer_norm_out2(), _run_peephole_opt(false), _run_cifg_opt(false), _perform_cell_clipping(false), _has_projection_weights(false),
- _perform_projection_clipping(false), _is_prepared(false), _is_layer_norm_lstm(false)
+ _fully_connected_forget_gate(), _accum_forget_gate1(), _pixelwise_mul_forget_gate(), _activation_forget_gate(), _fully_connected_cell_state(), _gemm_cell_state1(),
+ _transpose_cell_state(support::cpp14::make_unique<CLTransposeKernel>()), _accum_cell_state1(), _accum_cell_state2(), _pixelwise_mul_cell_state1(), _activation_cell_state(), _cell_clip(),
+ _pixelwise_mul_cell_state2(), _fully_connected_output(), _pixelwise_mul_output_state1(), _accum_output1(), _activation_output(), _activation_output_state(), _pixelwise_mul_output_state2(),
+ _fully_connected_output_state(), _projection_clip(), _copy_cell_state(support::cpp14::make_unique<CLCopyKernel>()), _copy_output(support::cpp14::make_unique<CLCopyKernel>()), _concat_scratch_buffer(),
+ _concat_inputs_forget_gate(), _concat_weights_forget_gate(), _concat_weights_input_gate(), _concat_weights_output(), _ones_memset_kernel(support::cpp14::make_unique<CLMemsetKernel>()),
+ _mean_std_norm_input_gate(), _pixelwise_mul_input_gate_coeff(), _accum_input_gate_bias(), _mean_std_norm_forget_gate(), _pixelwise_mul_forget_gate_coeff(), _accum_forget_gate_bias(),
+ _mean_std_norm_cell_gate(), _pixelwise_mul_cell_gate_coeff(), _accum_cell_gate_bias(), _mean_std_norm_output_gate(), _pixelwise_mul_output_gate_coeff(), _accum_output_gate_bias(), _input_gate_out1(),
+ _input_gate_out2(), _input_gate_out3(), _input_gate_out4(), _forget_gate_out1(), _forget_gate_out2(), _forget_gate_out3(), _forget_gate_out4(), _forget_gate_out5(), _forget_gate_out6(),
+ _cell_state_out1(), _cell_state_out2(), _cell_state_out3(), _cell_state_out4(), _cell_state_out5(), _output1(), _output2(), _output3(), _output4(), _cell_state_activation(), _output_state1(), _ones(),
+ _input_layer_norm_out1(), _input_layer_norm_out2(), _forget_layer_norm_out1(), _forget_layer_norm_out2(), _cell_layer_norm_out1(), _cell_layer_norm_out2(), _output_layer_norm_out1(),
+ _output_layer_norm_out2(), _run_peephole_opt(false), _run_cifg_opt(false), _perform_cell_clipping(false), _has_projection_weights(false), _perform_projection_clipping(false), _is_prepared(false),
+ _is_layer_norm_lstm(false)
{
}
+CLLSTMLayer::~CLLSTMLayer() = default;
+
void CLLSTMLayer::configure(const ICLTensor *input,
const ICLTensor *input_to_forget_weights, const ICLTensor *input_to_cell_weights, const ICLTensor *input_to_output_weights,
const ICLTensor *recurrent_to_forget_weights, const ICLTensor *recurrent_to_cell_weights, const ICLTensor *recurrent_to_output_weights,
{
_memory_group.manage(&_input_gate_out1);
_ones.allocator()->init(TensorInfo(cell_state_shape, 1, input->info()->data_type()));
- _ones_memset_kernel.configure(compile_context, &_ones, PixelValue(1, _ones.info()->data_type()));
+ _ones_memset_kernel->configure(compile_context, &_ones, PixelValue(1, _ones.info()->data_type()));
_subtract_input_gate.configure(compile_context, &_ones, forget_gate_out, &_input_gate_out1, ConvertPolicy::SATURATE);
_ones.allocator()->allocate();
_run_cifg_opt = true;
_memory_group.manage(&_cell_state_out1);
_fully_connected_cell_state.configure(compile_context, input, input_to_cell_weights, (_is_layer_norm_lstm) ? nullptr : cell_bias, &_cell_state_out1);
_memory_group.manage(&_cell_state_out2);
- _transpose_cell_state.configure(compile_context, recurrent_to_cell_weights, &_cell_state_out2);
+ _transpose_cell_state->configure(compile_context, recurrent_to_cell_weights, &_cell_state_out2);
_memory_group.manage(&_cell_state_out3);
_gemm_cell_state1.configure(compile_context, output_state_in, &_cell_state_out2, nullptr, &_cell_state_out3, 1.f, 0.f);
_cell_state_out2.allocator()->allocate();
}
// Copy cell state and output
- _copy_cell_state.configure(compile_context, &_cell_state_out1, cell_state_out);
- _copy_output.configure(compile_context, output_state_out, output);
+ _copy_cell_state->configure(compile_context, &_cell_state_out1, cell_state_out);
+ _copy_output->configure(compile_context, output_state_out, output);
// Vector for holding the tensors to store in scratch buffer
std::vector<const ICLTensor *> scratch_inputs;
if(_run_cifg_opt)
{
- CLScheduler::get().enqueue(_ones_memset_kernel);
+ CLScheduler::get().enqueue(*_ones_memset_kernel);
_subtract_input_gate.run();
}
else
}
_fully_connected_cell_state.run();
- CLScheduler::get().enqueue(_transpose_cell_state);
+ CLScheduler::get().enqueue(*_transpose_cell_state);
_gemm_cell_state1.run();
_accum_cell_state1.run();
if(_is_layer_norm_lstm)
}
}
- CLScheduler::get().enqueue(_copy_cell_state);
- CLScheduler::get().enqueue(_copy_output);
+ CLScheduler::get().enqueue(*_copy_cell_state);
+ CLScheduler::get().enqueue(*_copy_output);
_concat_scratch_buffer.run();
}
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
+#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpReductionKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
#include <memory>
#include "arm_compute/runtime/CL/functions/CLElementwiseOperations.h"
#include "arm_compute/runtime/CL/functions/CLGaussian5x5.h"
#include "arm_compute/runtime/CL/functions/CLGaussianPyramid.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLGaussian5x5Kernel.h"
+#include "src/core/CL/kernels/CLGaussianPyramidKernel.h"
using namespace arm_compute;
*/
#include "arm_compute/runtime/CL/functions/CLLaplacianReconstruct.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/IPyramid.h"
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
#include <cstddef>
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLCol2ImKernel.h"
+#include "src/core/CL/kernels/CLIm2ColKernel.h"
+#include "src/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h"
+#include "src/core/CL/kernels/CLWeightsReshapeKernel.h"
+#include "support/MemorySupport.h"
#include <cmath>
#include <tuple>
} // namespace
CLLocallyConnectedLayer::CLLocallyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _input_im2col_kernel(), _weights_reshape_kernel(), _mm_kernel(), _output_col2im_kernel(), _input_im2col_reshaped(), _weights_reshaped(), _gemm_output(),
- _is_prepared(false), _original_weights(nullptr)
+ : _memory_group(std::move(memory_manager)),
+ _input_im2col_kernel(support::cpp14::make_unique<CLIm2ColKernel>()),
+ _weights_reshape_kernel(support::cpp14::make_unique<CLWeightsReshapeKernel>()),
+ _mm_kernel(support::cpp14::make_unique<CLLocallyConnectedMatrixMultiplyKernel>()),
+ _output_col2im_kernel(support::cpp14::make_unique<CLCol2ImKernel>()),
+ _input_im2col_reshaped(),
+ _weights_reshaped(),
+ _gemm_output(),
+ _is_prepared(false),
+ _original_weights(nullptr)
{
}
_memory_group.manage(&_gemm_output);
// Configure kernels
- _input_im2col_kernel.configure(compile_context, input, &_input_im2col_reshaped, Size2D(kernel_width, kernel_height), conv_info, _has_bias);
- _weights_reshape_kernel.configure(compile_context, weights, biases, &_weights_reshaped);
- _mm_kernel.configure(compile_context, &_input_im2col_reshaped, &_weights_reshaped, &_gemm_output);
- _output_col2im_kernel.configure(compile_context, &_gemm_output, output, Size2D(conv_w, conv_h));
+ _input_im2col_kernel->configure(compile_context, input, &_input_im2col_reshaped, Size2D(kernel_width, kernel_height), conv_info, _has_bias);
+ _weights_reshape_kernel->configure(compile_context, weights, biases, &_weights_reshaped);
+ _mm_kernel->configure(compile_context, &_input_im2col_reshaped, &_weights_reshaped, &_gemm_output);
+ _output_col2im_kernel->configure(compile_context, &_gemm_output, output, Size2D(conv_w, conv_h));
// Allocate intermediate tensors
_input_im2col_reshaped.allocator()->allocate();
_gemm_output.allocator()->allocate();
- CLScheduler::get().tune_kernel_static(_input_im2col_kernel);
+ CLScheduler::get().tune_kernel_static(*_input_im2col_kernel);
}
void CLLocallyConnectedLayer::run()
MemoryGroupResourceScope scope_mg(_memory_group);
// Run input reshaping
- CLScheduler::get().enqueue(_input_im2col_kernel);
+ CLScheduler::get().enqueue(*_input_im2col_kernel);
// Runs vector matrix multiply on reshaped matrices
- CLScheduler::get().enqueue(_mm_kernel);
+ CLScheduler::get().enqueue(*_mm_kernel);
// Reshape output matrix
- CLScheduler::get().enqueue(_output_col2im_kernel, false);
+ CLScheduler::get().enqueue(*_output_col2im_kernel.get(), false);
}
void CLLocallyConnectedLayer::prepare()
// Run weights reshaping and mark original weights tensor as unused
_weights_reshaped.allocator()->allocate();
- CLScheduler::get().enqueue(_weights_reshape_kernel);
+ CLScheduler::get().enqueue(*_weights_reshape_kernel);
_original_weights->mark_as_unused();
CLScheduler::get().queue().finish();
*/
#include "arm_compute/runtime/CL/functions/CLMagnitude.h"
-#include "arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h"
+#include "src/core/CL/kernels/CLMagnitudePhaseKernel.h"
#include "support/MemorySupport.h"
#include <utility>
#include "arm_compute/runtime/CL/functions/CLMaxUnpoolingLayer.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLMaxUnpoolingLayerKernel.h"
#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLMaxUnpoolingLayerKernel.h"
+#include "src/core/CL/kernels/CLMemsetKernel.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
CLMaxUnpoolingLayer::CLMaxUnpoolingLayer()
- : _memset_kernel(), _unpooling_layer_kernel()
+ : _memset_kernel(support::cpp14::make_unique<CLMemsetKernel>()),
+ _unpooling_layer_kernel(support::cpp14::make_unique<CLMaxUnpoolingLayerKernel>())
{
}
+CLMaxUnpoolingLayer::~CLMaxUnpoolingLayer() = default;
+
void CLMaxUnpoolingLayer::configure(ICLTensor *input, ICLTensor *indices, ICLTensor *output, const PoolingLayerInfo &pool_info)
{
configure(CLKernelLibrary::get().get_compile_context(), input, indices, output, pool_info);
void CLMaxUnpoolingLayer::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *indices, ICLTensor *output, const PoolingLayerInfo &pool_info)
{
const PixelValue zero_value(0.f);
- _memset_kernel.configure(output, zero_value);
+ _memset_kernel->configure(output, zero_value);
- _unpooling_layer_kernel.configure(compile_context, input, indices, output, pool_info);
+ _unpooling_layer_kernel->configure(compile_context, input, indices, output, pool_info);
}
Status CLMaxUnpoolingLayer::validate(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, const PoolingLayerInfo &pool_info)
void CLMaxUnpoolingLayer::run()
{
// Run memset
- CLScheduler::get().enqueue(_memset_kernel, false);
+ CLScheduler::get().enqueue(*_memset_kernel, false);
// Run max unpooling layer
- CLScheduler::get().enqueue(_unpooling_layer_kernel);
+ CLScheduler::get().enqueue(*_unpooling_layer_kernel);
}
} /* namespace arm_compute */
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "arm_compute/runtime/CL/functions/CLMeanStdDev.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLMeanStdDevKernel.h"
+#include "src/core/CL/kernels/CLReductionOperationKernel.h"
+#include "support/MemorySupport.h"
using namespace arm_compute;
_reduction_output_stddev(),
_mean(nullptr),
_stddev(nullptr),
- _mean_stddev_kernel(),
- _fill_border_kernel(),
+ _mean_stddev_kernel(support::cpp14::make_unique<CLMeanStdDevKernel>()),
+ _fill_border_kernel(support::cpp14::make_unique<CLFillBorderKernel>()),
_global_sum(),
_global_sum_squared()
{
}
+CLMeanStdDev::~CLMeanStdDev() = default;
+
Status CLMeanStdDev::validate(ITensorInfo *input, float *mean, float *stddev)
{
ARM_COMPUTE_RETURN_ERROR_ON_TENSOR_NOT_2D(input);
_global_sum_squared = cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, sizeof(cl_ulong));
}
- _mean_stddev_kernel.configure(compile_context, input, mean, &_global_sum, stddev, &_global_sum_squared);
- _fill_border_kernel.configure(compile_context, input, _mean_stddev_kernel.border_size(), BorderMode::CONSTANT, PixelValue(static_cast<uint8_t>(0)));
+ _mean_stddev_kernel->configure(compile_context, input, mean, &_global_sum, stddev, &_global_sum_squared);
+ _fill_border_kernel->configure(compile_context, input, _mean_stddev_kernel->border_size(), BorderMode::CONSTANT, PixelValue(static_cast<uint8_t>(0)));
}
}
void CLMeanStdDev::run_int()
{
- CLScheduler::get().enqueue(_fill_border_kernel);
- CLScheduler::get().enqueue(_mean_stddev_kernel);
+ CLScheduler::get().enqueue(*_fill_border_kernel);
+ CLScheduler::get().enqueue(*_mean_stddev_kernel);
}
void CLMeanStdDev::run()
*/
#include "arm_compute/runtime/CL/functions/CLMeanStdDevNormalizationLayer.h"
-#include "arm_compute/core/CL/kernels/CLMeanStdDevNormalizationKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/CL/kernels/CLMeanStdDevNormalizationKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
*/
#include "arm_compute/runtime/CL/functions/CLMedian3x3.h"
-#include "arm_compute/core/CL/kernels/CLMedian3x3Kernel.h"
#include "arm_compute/core/PixelValue.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLMedian3x3Kernel.h"
#include "support/MemorySupport.h"
#include <utility>
auto k = arm_compute::support::cpp14::make_unique<CLMedian3x3Kernel>();
k->configure(compile_context, input, output, border_mode == BorderMode::UNDEFINED);
_kernel = std::move(k);
- _border_handler.configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+ _border_handler->configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
}
* SOFTWARE.
*/
#include "arm_compute/runtime/CL/functions/CLMinMaxLocation.h"
-
#include "arm_compute/core/CL/CLHelpers.h"
+#include "src/core/CL/kernels/CLMinMaxLocationKernel.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
CLMinMaxLocation::CLMinMaxLocation()
- : _min_max_kernel(),
- _min_max_loc_kernel(),
+ : _min_max_kernel(support::cpp14::make_unique<CLMinMaxKernel>()),
+ _min_max_loc_kernel(support::cpp14::make_unique<CLMinMaxLocationKernel>()),
_min_max_vals(),
_min_max_count_vals(),
_min(nullptr),
{
}
+CLMinMaxLocation::~CLMinMaxLocation() = default;
+
void CLMinMaxLocation::configure(const ICLImage *input, void *min, void *max, CLCoordinates2DArray *min_loc, CLCoordinates2DArray *max_loc, uint32_t *min_count, uint32_t *max_count)
{
configure(CLKernelLibrary::get().get_compile_context(), input, min, max, min_loc, max_loc, min_count, max_count);
_min_loc = min_loc;
_max_loc = max_loc;
- _min_max_kernel.configure(compile_context, input, &_min_max_vals);
- _min_max_loc_kernel.configure(compile_context, input, &_min_max_vals, &_min_max_count_vals, _min_loc, _max_loc);
+ _min_max_kernel->configure(compile_context, input, &_min_max_vals);
+ _min_max_loc_kernel->configure(compile_context, input, &_min_max_vals, &_min_max_count_vals, _min_loc, _max_loc);
}
void CLMinMaxLocation::run()
{
cl::CommandQueue q = CLScheduler::get().queue();
- CLScheduler::get().enqueue(_min_max_kernel, false);
- CLScheduler::get().enqueue(_min_max_loc_kernel, false);
+ CLScheduler::get().enqueue(*_min_max_kernel, false);
+ CLScheduler::get().enqueue(*_min_max_loc_kernel, false);
// Update min and max
q.enqueueReadBuffer(_min_max_vals, CL_FALSE, 0 * sizeof(int32_t), sizeof(int32_t), static_cast<int32_t *>(_min));
*/
#include "arm_compute/runtime/CL/functions/CLNonLinearFilter.h"
-#include "arm_compute/core/CL/kernels/CLNonLinearFilterKernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLNonLinearFilterKernel.h"
#include "support/MemorySupport.h"
#include <utility>
auto k = arm_compute::support::cpp14::make_unique<CLNonLinearFilterKernel>();
k->configure(compile_context, input, output, function, mask_size, pattern, mask, border_mode == BorderMode::UNDEFINED);
_kernel = std::move(k);
- _border_handler.configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+ _border_handler->configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
}
*/
#include "arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h"
-#include "arm_compute/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h"
#include "support/MemorySupport.h"
#include <utility>
if(border_mode != BorderMode::UNDEFINED)
{
- _border_handler.configure(compile_context, input, _kernel->border_size(), BorderMode::CONSTANT);
+ _border_handler->configure(compile_context, input, _kernel->border_size(), BorderMode::CONSTANT);
}
else
{
- _border_handler.configure(compile_context, input, _kernel->border_size(), BorderMode::UNDEFINED);
+ _border_handler->configure(compile_context, input, _kernel->border_size(), BorderMode::UNDEFINED);
}
}
#include "arm_compute/runtime/CL/functions/CLNormalizationLayer.h"
#include "arm_compute/core/Error.h"
+#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLNormalizationLayerKernel.h"
+#include "support/MemorySupport.h"
using namespace arm_compute;
CLNormalizationLayer::CLNormalizationLayer()
- : _norm_kernel(), _border_handler()
+ : _norm_kernel(support::cpp14::make_unique<CLNormalizationLayerKernel>()),
+ _border_handler(support::cpp14::make_unique<CLFillBorderKernel>())
{
}
+CLNormalizationLayer::~CLNormalizationLayer() = default;
+
void CLNormalizationLayer::configure(ICLTensor *input, ICLTensor *output, const NormalizationLayerInfo &norm_info)
{
configure(CLKernelLibrary::get().get_compile_context(), input, output, norm_info);
ARM_COMPUTE_ERROR_ON(input == nullptr);
// Configure normalization kernel
- _norm_kernel.configure(compile_context, input, output, norm_info);
+ _norm_kernel->configure(compile_context, input, output, norm_info);
// Fill the border by 3 elements since we need vload4 in the IN_MAP normalization kernel
- _border_handler.configure(compile_context, input, _norm_kernel.border_size(), BorderMode::CONSTANT, PixelValue());
+ _border_handler->configure(compile_context, input, _norm_kernel->border_size(), BorderMode::CONSTANT, PixelValue());
}
Status CLNormalizationLayer::validate(const ITensorInfo *input, const ITensorInfo *output, const NormalizationLayerInfo &norm_info)
void CLNormalizationLayer::run()
{
// Run border handler
- CLScheduler::get().enqueue(_border_handler, false);
+ CLScheduler::get().enqueue(*_border_handler, false);
// Run normalization kernel
- CLScheduler::get().enqueue(_norm_kernel);
+ CLScheduler::get().enqueue(*_norm_kernel);
}
#include "arm_compute/runtime/CL/functions/CLNormalizePlanarYUVLayer.h"
-#include "arm_compute/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h"
+#include "src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h"
#include "support/MemorySupport.h"
#include <utility>
#include "arm_compute/runtime/CL/functions/CLOpticalFlow.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLLKTrackerKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Window.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/CLTensorAllocator.h"
#include "arm_compute/runtime/CL/functions/CLScharr3x3.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLLKTrackerKernel.h"
#include "support/MemorySupport.h"
using namespace arm_compute;
_tracker_init_kernel(),
_tracker_stage0_kernel(),
_tracker_stage1_kernel(),
- _tracker_finalize_kernel(),
+ _tracker_finalize_kernel(support::cpp14::make_unique<CLLKTrackerFinalizeKernel>()),
_func_scharr(),
_scharr_gx(),
_scharr_gy(),
{
}
+CLOpticalFlow::~CLOpticalFlow() = default;
+
void CLOpticalFlow::configure(const CLPyramid *old_pyramid, const CLPyramid *new_pyramid,
const ICLKeyPointArray *old_points, const ICLKeyPointArray *new_points_estimates, ICLKeyPointArray *new_points,
Termination termination, float epsilon, size_t num_iterations, size_t window_dimension, bool use_initial_estimate,
const int old_values_list_length = list_length * window_dimension * window_dimension;
// Create kernels and tensors
- _tracker_init_kernel.resize(_num_levels);
- _tracker_stage0_kernel.resize(_num_levels);
- _tracker_stage1_kernel.resize(_num_levels);
+ _tracker_init_kernel.reserve(_num_levels);
+ _tracker_stage0_kernel.reserve(_num_levels);
+ _tracker_stage1_kernel.reserve(_num_levels);
_func_scharr.resize(_num_levels);
_scharr_gx.resize(_num_levels);
_scharr_gy.resize(_num_levels);
_func_scharr[i].configure(compile_context, old_ith_input, &_scharr_gx[i], &_scharr_gy[i], border_mode, constant_border_value);
// Init Lucas-Kanade init kernel
- _tracker_init_kernel[i].configure(compile_context, old_points, new_points_estimates, _old_points_internal.get(), _new_points_internal.get(), use_initial_estimate, i, _num_levels, pyr_scale);
+ _tracker_init_kernel.emplace_back(support::cpp14::make_unique<CLLKTrackerInitKernel>());
+ _tracker_init_kernel.back()->configure(compile_context, old_points, new_points_estimates, _old_points_internal.get(), _new_points_internal.get(), use_initial_estimate, i, _num_levels, pyr_scale);
// Init Lucas-Kanade stage0 kernel
- _tracker_stage0_kernel[i].configure(compile_context, old_ith_input, &_scharr_gx[i], &_scharr_gy[i],
- _old_points_internal.get(), _new_points_internal.get(), _coefficient_table.get(), _old_values.get(),
- window_dimension, i);
+ _tracker_stage0_kernel.emplace_back(support::cpp14::make_unique<CLLKTrackerStage0Kernel>());
+ _tracker_stage0_kernel.back()->configure(compile_context, old_ith_input, &_scharr_gx[i], &_scharr_gy[i],
+ _old_points_internal.get(), _new_points_internal.get(), _coefficient_table.get(), _old_values.get(),
+ window_dimension, i);
// Init Lucas-Kanade stage1 kernel
- _tracker_stage1_kernel[i].configure(compile_context, new_ith_input, _new_points_internal.get(), _coefficient_table.get(), _old_values.get(),
- termination, epsilon, num_iterations, window_dimension, i);
+ _tracker_stage1_kernel.emplace_back(support::cpp14::make_unique<CLLKTrackerStage1Kernel>());
+ _tracker_stage1_kernel.back()->configure(compile_context, new_ith_input, _new_points_internal.get(), _coefficient_table.get(), _old_values.get(),
+ termination, epsilon, num_iterations, window_dimension, i);
// Allocate intermediate buffers
_scharr_gx[i].allocator()->allocate();
}
// Finalize Lucas-Kanade
- _tracker_finalize_kernel.configure(compile_context, _new_points_internal.get(), new_points);
+ _tracker_finalize_kernel->configure(compile_context, _new_points_internal.get(), new_points);
}
void CLOpticalFlow::run()
_func_scharr[level - 1].run();
// Run Lucas-Kanade init kernel
- CLScheduler::get().enqueue(_tracker_init_kernel[level - 1]);
+ CLScheduler::get().enqueue(*_tracker_init_kernel[level - 1]);
// Run Lucas-Kanade stage0 kernel
- CLScheduler::get().enqueue(_tracker_stage0_kernel[level - 1]);
+ CLScheduler::get().enqueue(*_tracker_stage0_kernel[level - 1]);
// Run Lucas-Kanade stage1 kernel
- CLScheduler::get().enqueue(_tracker_stage1_kernel[level - 1]);
+ CLScheduler::get().enqueue(*_tracker_stage1_kernel[level - 1]);
}
- CLScheduler::get().enqueue(_tracker_finalize_kernel, true);
+ CLScheduler::get().enqueue(*_tracker_finalize_kernel, true);
}
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h"
+#include "src/core/CL/kernels/CLElementwiseOperationKernel.h"
#include "arm_compute/core/CL/ICLTensor.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
* SOFTWARE.
*/
#include "arm_compute/runtime/CL/functions/CLPadLayer.h"
+#include "src/core/CL/kernels/CLCopyKernel.h"
+#include "src/core/CL/kernels/CLPadLayerKernel.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
CLPadLayer::CLPadLayer()
- : _pad_kernel(), _copy_kernel(), _perform_pad(false)
+ : _pad_kernel(support::cpp14::make_unique<CLPadLayerKernel>()),
+ _copy_kernel(support::cpp14::make_unique<CLCopyKernel>()),
+ _perform_pad(false)
{
}
+CLPadLayer::~CLPadLayer() = default;
+
void CLPadLayer::configure(ICLTensor *input, ICLTensor *output, const PaddingList &padding, PixelValue constant_value, PaddingMode mode)
{
configure(CLKernelLibrary::get().get_compile_context(), input, output, padding, constant_value, mode);
if(_perform_pad)
{
- _pad_kernel.configure(compile_context, input, output, padding, constant_value, mode);
+ _pad_kernel->configure(compile_context, input, output, padding, constant_value, mode);
}
else
{
// Copy the input to the whole output if no padding is applied
- _copy_kernel.configure(compile_context, input, output);
+ _copy_kernel->configure(compile_context, input, output);
}
}
Status CLPadLayer::validate(const ITensorInfo *input, const ITensorInfo *output, const PaddingList &padding, PixelValue constant_value, PaddingMode mode)
{
if(_perform_pad)
{
- CLScheduler::get().enqueue(_pad_kernel);
+ CLScheduler::get().enqueue(*_pad_kernel);
}
else
{
- CLScheduler::get().enqueue(_copy_kernel);
+ CLScheduler::get().enqueue(*_copy_kernel);
}
}
} // namespace arm_compute
\ No newline at end of file
#include "arm_compute/runtime/CL/functions/CLPermute.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLPermuteKernel.h"
#include "arm_compute/core/Error.h"
+#include "src/core/CL/kernels/CLPermuteKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
*/
#include "arm_compute/runtime/CL/functions/CLPhase.h"
-#include "arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h"
+#include "src/core/CL/kernels/CLMagnitudePhaseKernel.h"
#include "support/MemorySupport.h"
#include <utility>
#include "arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLPixelWiseMultiplicationKernel.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLPixelWiseMultiplicationKernel.h"
#include "support/MemorySupport.h"
#include <utility>
namespace experimental
{
CLPixelWiseMultiplication::CLPixelWiseMultiplication()
- : _border_handler()
+ : _border_handler(support::cpp14::make_unique<CLFillBorderKernel>())
{
}
if(broadcasted_info->dimension(0) == 1)
{
- _border_handler.configure(compile_context, broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE);
+ _border_handler->configure(compile_context, broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE);
}
}
}
void CLPixelWiseMultiplication::run(ITensorPack &tensors)
{
auto border_pack = select_border_input(tensors);
- CLScheduler::get().enqueue_op(_border_handler, border_pack);
+ CLScheduler::get().enqueue_op(*_border_handler, border_pack);
ICLOperator::run(tensors);
}
CLComplexPixelWiseMultiplication::CLComplexPixelWiseMultiplication()
- : _border_handler()
+ : _border_handler(support::cpp14::make_unique<CLFillBorderKernel>())
{
}
if(broadcasted_info->dimension(0) == 1)
{
- _border_handler.configure(compile_context, broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE);
+ _border_handler->configure(compile_context, broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE);
}
}
}
void CLComplexPixelWiseMultiplication::run(ITensorPack &tensors)
{
auto border_pack = select_border_input(tensors);
- CLScheduler::get().enqueue_op(_border_handler, border_pack);
+ CLScheduler::get().enqueue_op(*_border_handler, border_pack);
ICLOperator::run(tensors);
}
} // namespace experimental
#include "arm_compute/runtime/CL/functions/CLPoolingLayer.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLPoolingLayerKernel.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLPoolingLayerKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
default:
ARM_COMPUTE_ERROR("Data layout not supported");
}
- _border_handler.configure(compile_context, input, _kernel->border_size(), border_mode, pixel_value);
+ _border_handler->configure(compile_context, input, _kernel->border_size(), border_mode, pixel_value);
// Tune kernels
CLScheduler::get().tune_kernel_static(*_kernel);
#include "arm_compute/runtime/CL/functions/CLPriorBoxLayer.h"
-#include "arm_compute/core/CL/kernels/CLPriorBoxLayerKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
-
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLPriorBoxLayerKernel.h"
#include "support/MemorySupport.h"
using namespace arm_compute;
#include "arm_compute/core/utils/misc/InfoHelpers.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLCopyKernel.h"
+#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpReductionKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
+#include "src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h"
#include "src/core/helpers/WindowHelpers.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
}
CLQLSTMLayer::CLQLSTMLayer(std::shared_ptr<IMemoryManager> memory_manager)
+ : _input_to_input_reduction(support::cpp14::make_unique<CLGEMMLowpMatrixAReductionKernel>()),
+ _recurrent_to_input_reduction(support::cpp14::make_unique<CLGEMMLowpMatrixAReductionKernel>()),
+ _input_to_forget_reduction(support::cpp14::make_unique<CLGEMMLowpMatrixAReductionKernel>()),
+ _recurrent_to_forget_reduction(support::cpp14::make_unique<CLGEMMLowpMatrixAReductionKernel>()),
+ _input_to_cell_reduction(support::cpp14::make_unique<CLGEMMLowpMatrixAReductionKernel>()),
+ _recurrent_to_cell_reduction(support::cpp14::make_unique<CLGEMMLowpMatrixAReductionKernel>()),
+ _input_to_output_reduction(support::cpp14::make_unique<CLGEMMLowpMatrixAReductionKernel>()),
+ _recurrent_to_output_reduction(support::cpp14::make_unique<CLGEMMLowpMatrixAReductionKernel>()),
+ _projection_reduction(support::cpp14::make_unique<CLGEMMLowpMatrixAReductionKernel>()),
+ _layer_norms(),
+ _copy_output(support::cpp14::make_unique<CLCopyKernel>())
{
+ for(auto &norm : _layer_norms)
+ {
+ norm = support::cpp14::make_unique<CLQLSTMLayerNormalizationKernel>();
+ }
+
_memory_group = MemoryGroup(std::move(memory_manager));
}
+CLQLSTMLayer::~CLQLSTMLayer() = default;
+
+void CLQLSTMLayer::configure_layer_norm(LayerNormGate g, const ICLTensor *in)
+{
+ ARM_COMPUTE_ERROR_ON(!_has_layer_norm);
+
+ CLTensor *out = &get_layer_norm_output(g);
+ _memory_group.manage(out);
+ out->allocator()->init(*(in->info()));
+
+ get_layer_norm(g).configure(in, out, get_layer_norm_weight(g), get_layer_norm_bias(g));
+}
+
+Status CLQLSTMLayer::validate_layer_norm(const ITensorInfo &in, const ITensorInfo &weight, const ITensorInfo &bias)
+{
+ // Output quantization scale will be different, but ignored here
+ // since it will be configured at configure() stage.
+ const TensorInfo out
+ {
+ in
+ };
+ return CLQLSTMLayerNormalizationKernel::validate(&in, &out, &weight, &bias);
+}
+
void CLQLSTMLayer::configure_mm(const CLCompileContext &compile_context, CLGEMMLowpMatrixMultiplyCore &mm, CLGEMMLowpOutputStage &outstage, GEMMLowpOutputStageInfo &gemmlowp_info,
const ICLTensor *mm_input, const ICLTensor *mm_weights, const ICLTensor *bias,
CLTensor *mm_res, CLTensor *outstage_res, float gemmlowp_scale,
_input_to_input_weights = lstm_params.input_to_input_weights();
_recurrent_to_input_weights = lstm_params.recurrent_to_input_weights();
- _input_to_input_reduction.configure(compile_context, _input_to_input_weights, &_input_to_input_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
- _recurrent_to_input_reduction.configure(compile_context, _recurrent_to_input_weights, &_recurrent_to_input_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
+ _input_to_input_reduction->configure(compile_context, _input_to_input_weights, &_input_to_input_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
+ _recurrent_to_input_reduction->configure(compile_context, _recurrent_to_input_weights, &_recurrent_to_input_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
}
- _input_to_forget_reduction.configure(compile_context, input_to_forget_weights, &_input_to_forget_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
- _recurrent_to_forget_reduction.configure(compile_context, recurrent_to_forget_weights, &_recurrent_to_forget_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
- _input_to_cell_reduction.configure(compile_context, input_to_cell_weights, &_input_to_cell_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
- _recurrent_to_cell_reduction.configure(compile_context, recurrent_to_cell_weights, &_recurrent_to_cell_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
- _input_to_output_reduction.configure(compile_context, input_to_output_weights, &_input_to_output_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
- _recurrent_to_output_reduction.configure(compile_context, recurrent_to_output_weights, &_recurrent_to_output_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
+ _input_to_forget_reduction->configure(compile_context, input_to_forget_weights, &_input_to_forget_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
+ _recurrent_to_forget_reduction->configure(compile_context, recurrent_to_forget_weights, &_recurrent_to_forget_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
+ _input_to_cell_reduction->configure(compile_context, input_to_cell_weights, &_input_to_cell_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
+ _recurrent_to_cell_reduction->configure(compile_context, recurrent_to_cell_weights, &_recurrent_to_cell_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
+ _input_to_output_reduction->configure(compile_context, input_to_output_weights, &_input_to_output_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qinput.offset, true));
+ _recurrent_to_output_reduction->configure(compile_context, recurrent_to_output_weights, &_recurrent_to_output_eff_bias, GEMMLowpReductionKernelInfo(num_units, false, -qoutput_state_in.offset, true));
if(_has_projection)
{
- _projection_reduction.configure(compile_context, _projection_weights, &_projection_eff_bias, GEMMLowpReductionKernelInfo(output_size, false, lstm_params.hidden_state_zero(), true));
+ _projection_reduction->configure(compile_context, _projection_weights, &_projection_eff_bias, GEMMLowpReductionKernelInfo(output_size, false, lstm_params.hidden_state_zero(), true));
if(_projection_bias != nullptr)
{
_projection_bias_add.configure(compile_context, _projection_bias, &_projection_eff_bias, &_projection_eff_bias, ConvertPolicy::SATURATE);
}
// Copy output_state_out to output
- _copy_output.configure(compile_context, output_state_out, output);
+ _copy_output->configure(compile_context, output_state_out, output);
}
Status CLQLSTMLayer::validate(const ITensorInfo *input,
}
// Copy output_state_out to output
- CLScheduler::get().enqueue(_copy_output);
+ CLScheduler::get().enqueue(*_copy_output);
}
void CLQLSTMLayer::prepare()
{
_input_to_input_eff_bias.allocator()->allocate();
_recurrent_to_input_eff_bias.allocator()->allocate();
- CLScheduler::get().enqueue(_input_to_input_reduction);
- CLScheduler::get().enqueue(_recurrent_to_input_reduction);
+ CLScheduler::get().enqueue(*_input_to_input_reduction);
+ CLScheduler::get().enqueue(*_recurrent_to_input_reduction);
_input_to_input_weights_transposed.allocator()->allocate();
_recurrent_to_input_weights_transposed.allocator()->allocate();
_recurrent_to_cell_eff_bias.allocator()->allocate();
_input_to_output_eff_bias.allocator()->allocate();
_recurrent_to_output_eff_bias.allocator()->allocate();
- CLScheduler::get().enqueue(_input_to_forget_reduction);
- CLScheduler::get().enqueue(_recurrent_to_forget_reduction);
- CLScheduler::get().enqueue(_input_to_cell_reduction);
- CLScheduler::get().enqueue(_recurrent_to_cell_reduction);
- CLScheduler::get().enqueue(_input_to_output_reduction);
- CLScheduler::get().enqueue(_recurrent_to_output_reduction);
+ CLScheduler::get().enqueue(*_input_to_forget_reduction);
+ CLScheduler::get().enqueue(*_recurrent_to_forget_reduction);
+ CLScheduler::get().enqueue(*_input_to_cell_reduction);
+ CLScheduler::get().enqueue(*_recurrent_to_cell_reduction);
+ CLScheduler::get().enqueue(*_input_to_output_reduction);
+ CLScheduler::get().enqueue(*_recurrent_to_output_reduction);
if(_has_projection)
{
_projection_eff_bias.allocator()->allocate();
- CLScheduler::get().enqueue(_projection_reduction);
+ CLScheduler::get().enqueue(*_projection_reduction);
if(_projection_bias != nullptr)
{
_projection_bias_add.run();
*/
#include "arm_compute/runtime/CL/functions/CLQuantizationLayer.h"
-#include "arm_compute/core/CL/kernels/CLQuantizationLayerKernel.h"
+#include "src/core/CL/kernels/CLQuantizationLayerKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLCopyKernel.h"
+#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpReductionKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
using namespace arm_compute::misc::shape_calculator;
CLRNNLayer::CLRNNLayer(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _gemm_state_f(), _add_kernel(), _activation(), _fully_connected_kernel(), _copy_kernel(), _fully_connected_out(), _gemm_output(), _add_output(),
- _is_prepared(false)
+ : _memory_group(std::move(memory_manager)), _gemm_state_f(), _add_kernel(), _activation(), _fully_connected_kernel(), _copy_kernel(support::cpp14::make_unique<CLCopyKernel>()), _fully_connected_out(),
+ _gemm_output(), _add_output(), _is_prepared(false)
{
}
+CLRNNLayer::~CLRNNLayer() = default;
+
Status CLRNNLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *recurrent_weights, const ITensorInfo *bias, const ITensorInfo *hidden_state,
const ITensorInfo *output, const ActivationLayerInfo &info)
{
_activation.configure(compile_context, &_add_output, hidden_state, info);
_add_output.allocator()->allocate();
- _copy_kernel.configure(compile_context, hidden_state, output);
+ _copy_kernel->configure(compile_context, hidden_state, output);
}
void CLRNNLayer::run()
_activation.run();
// copy hidden out to output
- CLScheduler::get().enqueue(_copy_kernel);
+ CLScheduler::get().enqueue(*_copy_kernel);
}
void CLRNNLayer::prepare()
#include "arm_compute/runtime/CL/functions/CLROIAlignLayer.h"
#include "arm_compute/core/CL/ICLArray.h"
-#include "arm_compute/core/CL/kernels/CLROIAlignLayerKernel.h"
+#include "src/core/CL/kernels/CLROIAlignLayerKernel.h"
+#include "src/core/CL/kernels/CLROIPoolingLayerKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
* SOFTWARE.
*/
#include "arm_compute/runtime/CL/functions/CLROIPoolingLayer.h"
-
#include "arm_compute/core/CL/ICLArray.h"
-
-#include "arm_compute/core/CL/kernels/CLROIPoolingLayerKernel.h"
+#include "src/core/CL/kernels/CLROIPoolingLayerKernel.h"
#include "support/MemorySupport.h"
using namespace arm_compute;
#include "arm_compute/runtime/CL/functions/CLRange.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLRangeKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLRangeKernel.h"
#include "support/MemorySupport.h"
using namespace arm_compute;
#include "arm_compute/runtime/CL/functions/CLReduceMean.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLReductionOperationKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "src/core/CL/CLValidate.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLReductionOperationKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
namespace arm_compute
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLReductionOperationKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/runtime/Utils.h"
-
#include "support/MemorySupport.h"
namespace arm_compute
{
}
+CLReductionOperation::~CLReductionOperation() = default;
+
Status CLReductionOperation::validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op, bool keep_dims)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
}
// Configure reduction operation kernels
- _reduction_kernels_vector.resize(_num_of_stages);
+ _reduction_kernels_vector.reserve(_num_of_stages);
// Create temporary tensors
if(_is_serial)
_memory_group.manage(&_results_vector.back());
}
- _reduction_kernels_vector[0].configure(compile_context, input, output_internal, axis, op, 0);
+ _reduction_kernels_vector.emplace_back(support::cpp14::make_unique<CLReductionOperationKernel>());
+ _reduction_kernels_vector[0]->configure(compile_context, input, output_internal, axis, op, 0);
}
else
{
- _border_handlers_vector.resize(_num_of_stages);
+ _border_handlers_vector.reserve(_num_of_stages);
_memory_group.manage(&_results_vector[0]);
ReductionOperation first_kernel_op;
ARM_COMPUTE_ERROR("Not supported");
}
- _reduction_kernels_vector[0].configure(compile_context, input, &_results_vector[0], axis, first_kernel_op);
- _border_handlers_vector[0].configure(compile_context, input, _reduction_kernels_vector[0].border_size(), BorderMode::CONSTANT, pixelValue);
+ _reduction_kernels_vector.emplace_back(support::cpp14::make_unique<CLReductionOperationKernel>());
+ _reduction_kernels_vector[0]->configure(compile_context, input, &_results_vector[0], axis, first_kernel_op);
+
+ _border_handlers_vector.emplace_back(support::cpp14::make_unique<CLFillBorderKernel>());
+ _border_handlers_vector[0]->configure(compile_context, input, _reduction_kernels_vector[0]->border_size(), BorderMode::CONSTANT, pixelValue);
// Apply ReductionOperation on intermediate stages
for(unsigned int i = 1; i < _num_of_stages - 1; ++i)
{
_memory_group.manage(&_results_vector[i]);
- _reduction_kernels_vector[i].configure(compile_context, &_results_vector[i - 1], &_results_vector[i], axis, intermediate_kernel_op);
- _border_handlers_vector[i].configure(compile_context, &_results_vector[i - 1], _reduction_kernels_vector[i].border_size(), BorderMode::CONSTANT, pixelValue);
+
+ _reduction_kernels_vector.emplace_back(support::cpp14::make_unique<CLReductionOperationKernel>());
+ _reduction_kernels_vector[i]->configure(compile_context, &_results_vector[i - 1], &_results_vector[i], axis, intermediate_kernel_op);
+
+ _border_handlers_vector.emplace_back(support::cpp14::make_unique<CLFillBorderKernel>());
+ _border_handlers_vector[i]->configure(compile_context, &_results_vector[i - 1], _reduction_kernels_vector[i]->border_size(), BorderMode::CONSTANT, pixelValue);
+
_results_vector[i - 1].allocator()->allocate();
}
_memory_group.manage(&_results_vector.back());
}
- _reduction_kernels_vector[last_stage].configure(compile_context, &_results_vector[last_stage - 1], output_internal, axis, last_kernel_op, input_width);
- _border_handlers_vector[last_stage].configure(compile_context, &_results_vector[last_stage - 1], _reduction_kernels_vector[last_stage].border_size(), BorderMode::CONSTANT, pixelValue);
+ _reduction_kernels_vector.emplace_back(support::cpp14::make_unique<CLReductionOperationKernel>());
+ _reduction_kernels_vector[last_stage]->configure(compile_context, &_results_vector[last_stage - 1], output_internal, axis, last_kernel_op, input_width);
+
+ _border_handlers_vector.emplace_back(support::cpp14::make_unique<CLFillBorderKernel>());
+ _border_handlers_vector[last_stage]->configure(compile_context, &_results_vector[last_stage - 1], _reduction_kernels_vector[last_stage]->border_size(), BorderMode::CONSTANT, pixelValue);
+
_results_vector[last_stage - 1].allocator()->allocate();
}
if(_is_serial)
{
- CLScheduler::get().enqueue(_reduction_kernels_vector[0], false);
+ CLScheduler::get().enqueue(*_reduction_kernels_vector[0], false);
}
else
{
for(unsigned int i = 0; i < _num_of_stages; ++i)
{
- CLScheduler::get().enqueue(_border_handlers_vector[i], false);
- CLScheduler::get().enqueue(_reduction_kernels_vector[i], false);
+ CLScheduler::get().enqueue(*_border_handlers_vector[i], false);
+ CLScheduler::get().enqueue(*_reduction_kernels_vector[i], false);
}
}
#include "arm_compute/runtime/CL/functions/CLRemap.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLRemapKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLRemapKernel.h"
#include "support/MemorySupport.h"
#include <utility>
auto k = arm_compute::support::cpp14::make_unique<CLRemapKernel>();
k->configure(compile_context, input, map_x, map_y, output, policy, border_mode == BorderMode::UNDEFINED);
_kernel = std::move(k);
- _border_handler.configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+ _border_handler->configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
}
#include "arm_compute/runtime/CL/functions/CLReorgLayer.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLReorgLayerKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
+#include "src/core/CL/kernels/CLReorgLayerKernel.h"
#include "support/MemorySupport.h"
#include <utility>
#include "arm_compute/runtime/CL/functions/CLReshapeLayer.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLReshapeLayerKernel.h"
+#include "src/core/CL/kernels/CLReshapeLayerKernel.h"
#include "support/MemorySupport.h"
/** [CLReshapeLayer snippet] **/
*/
#include "arm_compute/runtime/CL/functions/CLReverse.h"
-#include "arm_compute/core/CL/kernels/CLReverseKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/CL/kernels/CLReverseKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
#include "arm_compute/runtime/CL/functions/CLScale.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLScaleKernel.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLScaleKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
{
border_mode_to_use = BorderMode::CONSTANT;
}
- _border_handler.configure(compile_context, input, _kernel->border_size(), border_mode_to_use, info.constant_border_value);
+ _border_handler->configure(compile_context, input, _kernel->border_size(), border_mode_to_use, info.constant_border_value);
}
void CLScale::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value,
*/
#include "arm_compute/runtime/CL/functions/CLScharr3x3.h"
-#include "arm_compute/core/CL/kernels/CLScharr3x3Kernel.h"
#include "arm_compute/core/PixelValue.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLScharr3x3Kernel.h"
#include "support/MemorySupport.h"
#include <utility>
auto k = arm_compute::support::cpp14::make_unique<CLScharr3x3Kernel>();
k->configure(compile_context, input, output_x, output_y, border_mode == BorderMode::UNDEFINED);
_kernel = std::move(k);
- _border_handler.configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+ _border_handler->configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
}
*/
#include "arm_compute/runtime/CL/functions/CLSelect.h"
-#include "arm_compute/core/CL/kernels/CLSelectKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLSelectKernel.h"
#include "support/MemorySupport.h"
#include "arm_compute/runtime/CL/functions/CLSlice.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLStridedSliceKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/helpers/tensor_transform.h"
+#include "src/core/CL/kernels/CLStridedSliceKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
*/
#include "arm_compute/runtime/CL/functions/CLSobel3x3.h"
-#include "arm_compute/core/CL/kernels/CLSobel3x3Kernel.h"
#include "arm_compute/core/PixelValue.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLSobel3x3Kernel.h"
#include "support/MemorySupport.h"
#include <utility>
using namespace arm_compute;
+CLSobel3x3::~CLSobel3x3() = default;
+
void CLSobel3x3::configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value)
{
configure(CLKernelLibrary::get().get_compile_context(), input, output_x, output_y, border_mode, constant_border_value);
auto k = arm_compute::support::cpp14::make_unique<CLSobel3x3Kernel>();
k->configure(compile_context, input, output_x, output_y, border_mode == BorderMode::UNDEFINED);
_kernel = std::move(k);
- _border_handler.configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+ _border_handler->configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
}
#include "arm_compute/runtime/CL/functions/CLSobel5x5.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLSobel5x5Kernel.h"
#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "arm_compute/runtime/ITensorAllocator.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLSobel5x5Kernel.h"
+#include "support/MemorySupport.h"
using namespace arm_compute;
CLSobel5x5::CLSobel5x5(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _sobel_hor(), _sobel_vert(), _border_handler(), _tmp_x(), _tmp_y()
+ : _memory_group(std::move(memory_manager)),
+ _sobel_hor(support::cpp14::make_unique<CLSobel5x5HorKernel>()),
+ _sobel_vert(support::cpp14::make_unique<CLSobel5x5VertKernel>()),
+ _border_handler(support::cpp14::make_unique<CLFillBorderKernel>()),
+ _tmp_x(),
+ _tmp_y()
{
}
+CLSobel5x5::~CLSobel5x5() = default;
+
void CLSobel5x5::configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value)
{
configure(CLKernelLibrary::get().get_compile_context(), input, output_x, output_y, border_mode, constant_border_value);
_tmp_y.allocator()->init(tensor_info);
_memory_group.manage(&_tmp_x);
_memory_group.manage(&_tmp_y);
- _sobel_hor.configure(compile_context, input, &_tmp_x, &_tmp_y, border_mode == BorderMode::UNDEFINED);
- _sobel_vert.configure(compile_context, &_tmp_x, &_tmp_y, output_x, output_y, border_mode == BorderMode::UNDEFINED);
+ _sobel_hor->configure(compile_context, input, &_tmp_x, &_tmp_y, border_mode == BorderMode::UNDEFINED);
+ _sobel_vert->configure(compile_context, &_tmp_x, &_tmp_y, output_x, output_y, border_mode == BorderMode::UNDEFINED);
_tmp_x.allocator()->allocate();
_tmp_y.allocator()->allocate();
}
{
_tmp_x.allocator()->init(tensor_info);
_memory_group.manage(&_tmp_x);
- _sobel_hor.configure(compile_context, input, &_tmp_x, nullptr, border_mode == BorderMode::UNDEFINED);
- _sobel_vert.configure(compile_context, &_tmp_x, nullptr, output_x, nullptr, border_mode == BorderMode::UNDEFINED);
+ _sobel_hor->configure(compile_context, input, &_tmp_x, nullptr, border_mode == BorderMode::UNDEFINED);
+ _sobel_vert->configure(compile_context, &_tmp_x, nullptr, output_x, nullptr, border_mode == BorderMode::UNDEFINED);
_tmp_x.allocator()->allocate();
}
else if(run_sobel_y)
{
_tmp_y.allocator()->init(tensor_info);
_memory_group.manage(&_tmp_y);
- _sobel_hor.configure(compile_context, input, nullptr, &_tmp_y, border_mode == BorderMode::UNDEFINED);
- _sobel_vert.configure(compile_context, nullptr, &_tmp_y, nullptr, output_y, border_mode == BorderMode::UNDEFINED);
+ _sobel_hor->configure(compile_context, input, nullptr, &_tmp_y, border_mode == BorderMode::UNDEFINED);
+ _sobel_vert->configure(compile_context, nullptr, &_tmp_y, nullptr, output_y, border_mode == BorderMode::UNDEFINED);
_tmp_y.allocator()->allocate();
}
- _border_handler.configure(compile_context, input, _sobel_hor.border_size(), border_mode, PixelValue(constant_border_value));
+ _border_handler->configure(compile_context, input, _sobel_hor->border_size(), border_mode, PixelValue(constant_border_value));
}
void CLSobel5x5::run()
{
- CLScheduler::get().enqueue(_border_handler, false);
+ CLScheduler::get().enqueue(*_border_handler, false);
MemoryGroupResourceScope scope_mg(_memory_group);
- CLScheduler::get().enqueue(_sobel_hor, false);
- CLScheduler::get().enqueue(_sobel_vert);
+ CLScheduler::get().enqueue(*_sobel_hor, false);
+ CLScheduler::get().enqueue(*_sobel_vert);
}
#include "arm_compute/runtime/CL/functions/CLSobel7x7.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLSobel7x7Kernel.h"
#include "arm_compute/core/PixelValue.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "arm_compute/runtime/ITensorAllocator.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLSobel7x7Kernel.h"
+#include "support/MemorySupport.h"
using namespace arm_compute;
CLSobel7x7::CLSobel7x7(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _sobel_hor(), _sobel_vert(), _border_handler(), _tmp_x(), _tmp_y()
+ : _memory_group(std::move(memory_manager)),
+ _sobel_hor(support::cpp14::make_unique<CLSobel7x7HorKernel>()),
+ _sobel_vert(support::cpp14::make_unique<CLSobel7x7VertKernel>()),
+ _border_handler(support::cpp14::make_unique<CLFillBorderKernel>()),
+ _tmp_x(),
+ _tmp_y()
{
}
+CLSobel7x7::~CLSobel7x7() = default;
+
void CLSobel7x7::configure(ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, BorderMode border_mode, uint8_t constant_border_value)
{
configure(CLKernelLibrary::get().get_compile_context(), input, output_x, output_y, border_mode, constant_border_value);
_tmp_y.allocator()->init(tensor_info);
_memory_group.manage(&_tmp_x);
_memory_group.manage(&_tmp_y);
- _sobel_hor.configure(compile_context, input, &_tmp_x, &_tmp_y, border_mode == BorderMode::UNDEFINED);
- _sobel_vert.configure(compile_context, &_tmp_x, &_tmp_y, output_x, output_y, border_mode == BorderMode::UNDEFINED);
+ _sobel_hor->configure(compile_context, input, &_tmp_x, &_tmp_y, border_mode == BorderMode::UNDEFINED);
+ _sobel_vert->configure(compile_context, &_tmp_x, &_tmp_y, output_x, output_y, border_mode == BorderMode::UNDEFINED);
_tmp_x.allocator()->allocate();
_tmp_y.allocator()->allocate();
}
{
_tmp_x.allocator()->init(tensor_info);
_memory_group.manage(&_tmp_x);
- _sobel_hor.configure(compile_context, input, &_tmp_x, nullptr, border_mode == BorderMode::UNDEFINED);
- _sobel_vert.configure(compile_context, &_tmp_x, nullptr, output_x, nullptr, border_mode == BorderMode::UNDEFINED);
+ _sobel_hor->configure(compile_context, input, &_tmp_x, nullptr, border_mode == BorderMode::UNDEFINED);
+ _sobel_vert->configure(compile_context, &_tmp_x, nullptr, output_x, nullptr, border_mode == BorderMode::UNDEFINED);
_tmp_x.allocator()->allocate();
}
else if(run_sobel_y)
{
_tmp_y.allocator()->init(tensor_info);
_memory_group.manage(&_tmp_y);
- _sobel_hor.configure(compile_context, input, nullptr, &_tmp_y, border_mode == BorderMode::UNDEFINED);
- _sobel_vert.configure(compile_context, nullptr, &_tmp_y, nullptr, output_y, border_mode == BorderMode::UNDEFINED);
+ _sobel_hor->configure(compile_context, input, nullptr, &_tmp_y, border_mode == BorderMode::UNDEFINED);
+ _sobel_vert->configure(compile_context, nullptr, &_tmp_y, nullptr, output_y, border_mode == BorderMode::UNDEFINED);
_tmp_y.allocator()->allocate();
}
- _border_handler.configure(compile_context, input, _sobel_hor.border_size(), border_mode, PixelValue(constant_border_value));
+ _border_handler->configure(compile_context, input, _sobel_hor->border_size(), border_mode, PixelValue(constant_border_value));
}
void CLSobel7x7::run()
{
- CLScheduler::get().enqueue(_border_handler, false);
+ CLScheduler::get().enqueue(*_border_handler, false);
MemoryGroupResourceScope scope_mg(_memory_group);
- CLScheduler::get().enqueue(_sobel_hor, false);
- CLScheduler::get().enqueue(_sobel_vert);
+ CLScheduler::get().enqueue(*_sobel_hor, false);
+ CLScheduler::get().enqueue(*_sobel_vert);
}
#include "arm_compute/runtime/CL/functions/CLSoftmaxLayer.h"
#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/ICLKernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLSoftmaxLayerKernel.h"
#include "src/core/helpers/SoftmaxHelpers.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
template <bool IS_LOG>
CLSoftmaxLayerGeneric<IS_LOG>::CLSoftmaxLayerGeneric(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(std::move(memory_manager)), _permute_input(), _permute_output(), _max_shift_exp_sum_kernel(), _norm_kernel(), _max(), _sum(), _tmp(), _input_permuted(), _output_permuted(),
+ : _memory_group(std::move(memory_manager)),
+ _permute_input(),
+ _permute_output(),
+ _max_shift_exp_sum_kernel(support::cpp14::make_unique<CLLogits1DMaxShiftExpSumKernel>()),
+ _norm_kernel(support::cpp14::make_unique<CLLogits1DNormKernel>()),
+ _max(),
+ _sum(),
+ _tmp(),
+ _input_permuted(),
+ _output_permuted(),
_needs_permute()
{
}
+template <bool IS_LOG>
+CLSoftmaxLayerGeneric<IS_LOG>::~CLSoftmaxLayerGeneric() = default;
+
template <bool IS_LOG>
void CLSoftmaxLayerGeneric<IS_LOG>::configure(const ICLTensor *input, ICLTensor *output, float beta, int32_t axis)
{
_sum.allocator()->init(tmp_input->info()->clone()->set_tensor_shape(max_sum_shape).set_data_type(tmp_data_type));
// Set GPU target to kernels
- _max_shift_exp_sum_kernel.set_target(CLScheduler::get().target());
+ _max_shift_exp_sum_kernel->set_target(CLScheduler::get().target());
// Manage intermediate buffers
_memory_group.manage(&_tmp);
softmax_info.input_data_type = tmp_input->info()->data_type();
// Configure kernels
- _max_shift_exp_sum_kernel.configure(compile_context, tmp_input, &_max, &_tmp, &_sum, softmax_info);
- _norm_kernel.configure(compile_context, &_tmp, &_sum, tmp_output, softmax_info);
+ _max_shift_exp_sum_kernel->configure(compile_context, tmp_input, &_max, &_tmp, &_sum, softmax_info);
+ _norm_kernel->configure(compile_context, &_tmp, &_sum, tmp_output, softmax_info);
// Allocate intermediate buffers
_tmp.allocator()->allocate();
_permute_input.run();
}
- CLScheduler::get().enqueue(_max_shift_exp_sum_kernel, false);
- CLScheduler::get().enqueue(_norm_kernel, !_needs_permute);
+ CLScheduler::get().enqueue(*_max_shift_exp_sum_kernel, false);
+ CLScheduler::get().enqueue(*_norm_kernel, !_needs_permute);
if(_needs_permute)
{
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLMemsetKernel.h"
+#include "src/core/CL/kernels/CLSpaceToBatchLayerKernel.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
CLSpaceToBatchLayer::CLSpaceToBatchLayer()
- : _space_to_batch_kernel(), _memset_kernel(), _has_padding(false)
+ : _space_to_batch_kernel(support::cpp14::make_unique<CLSpaceToBatchLayerKernel>()),
+ _memset_kernel(support::cpp14::make_unique<CLMemsetKernel>()),
+ _has_padding(false)
{
}
+CLSpaceToBatchLayer::~CLSpaceToBatchLayer() = default;
+
void CLSpaceToBatchLayer::configure(const ICLTensor *input, const ICLTensor *block_shape, const ICLTensor *paddings, ICLTensor *output)
{
configure(CLKernelLibrary::get().get_compile_context(), input, block_shape, paddings, output);
if(input->info()->tensor_shape().total_size() != output->info()->tensor_shape().total_size())
{
_has_padding = true;
- _memset_kernel.configure(compile_context, output, PixelValue(0, input->info()->data_type(), input->info()->quantization_info()));
+ _memset_kernel->configure(compile_context, output, PixelValue(0, input->info()->data_type(), input->info()->quantization_info()));
}
- _space_to_batch_kernel.configure(compile_context, input, block_shape, paddings, output);
+ _space_to_batch_kernel->configure(compile_context, input, block_shape, paddings, output);
}
void CLSpaceToBatchLayer::configure(const ICLTensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, ICLTensor *output)
if(input->info()->tensor_shape().total_size() != output->info()->tensor_shape().total_size())
{
_has_padding = true;
- _memset_kernel.configure(compile_context, output, PixelValue(0, input->info()->data_type(), input->info()->quantization_info()));
+ _memset_kernel->configure(compile_context, output, PixelValue(0, input->info()->data_type(), input->info()->quantization_info()));
}
- _space_to_batch_kernel.configure(compile_context, input, block_shape_x, block_shape_y, padding_left, padding_right, output);
+ _space_to_batch_kernel->configure(compile_context, input, block_shape_x, block_shape_y, padding_left, padding_right, output);
}
Status CLSpaceToBatchLayer::validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *paddings, const ITensorInfo *output)
// Zero out output only if we have paddings
if(_has_padding)
{
- CLScheduler::get().enqueue(_memset_kernel, true);
+ CLScheduler::get().enqueue(*_memset_kernel, true);
}
- CLScheduler::get().enqueue(_space_to_batch_kernel, true);
+ CLScheduler::get().enqueue(*_space_to_batch_kernel, true);
}
} // namespace arm_compute
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLSpaceToDepthLayerKernel.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
CLSpaceToDepthLayer::CLSpaceToDepthLayer()
- : _space_to_depth_kernel()
+ : _space_to_depth_kernel(support::cpp14::make_unique<CLSpaceToDepthLayerKernel>())
{
}
+CLSpaceToDepthLayer::~CLSpaceToDepthLayer() = default;
+
void CLSpaceToDepthLayer::configure(const ICLTensor *input, ICLTensor *output, int32_t block_shape)
{
configure(CLKernelLibrary::get().get_compile_context(), input, output, block_shape);
void CLSpaceToDepthLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t block_shape)
{
- _space_to_depth_kernel.configure(compile_context, input, output, block_shape);
+ _space_to_depth_kernel->configure(compile_context, input, output, block_shape);
}
Status CLSpaceToDepthLayer::validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape)
void CLSpaceToDepthLayer::run()
{
- CLScheduler::get().enqueue(_space_to_depth_kernel, true);
+ CLScheduler::get().enqueue(*_space_to_depth_kernel, true);
}
} // namespace arm_compute
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLStackLayerKernel.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
{
}
+CLStackLayer::~CLStackLayer() = default;
+
void CLStackLayer::configure(const std::vector<ICLTensor *> &input, int axis, ICLTensor *output)
{
configure(CLKernelLibrary::get().get_compile_context(), input, axis, output);
void CLStackLayer::configure(const CLCompileContext &compile_context, const std::vector<ICLTensor *> &input, int axis, ICLTensor *output)
{
_num_inputs = input.size();
- _stack_kernels.resize(_num_inputs);
+ _stack_kernels.reserve(_num_inputs);
// Wrap around negative values
const unsigned int axis_u = wrap_around(axis, static_cast<int>(input[0]->info()->num_dimensions() + 1));
for(unsigned int i = 0; i < _num_inputs; i++)
{
- _stack_kernels[i].configure(compile_context, input[i], axis_u, i, _num_inputs, output);
+ _stack_kernels.emplace_back(support::cpp14::make_unique<CLStackLayerKernel>());
+ _stack_kernels.back()->configure(compile_context, input[i], axis_u, i, _num_inputs, output);
}
}
{
for(unsigned i = 0; i < _num_inputs; i++)
{
- CLScheduler::get().enqueue(_stack_kernels[i], false);
+ CLScheduler::get().enqueue(*_stack_kernels[i], false);
}
}
} // namespace arm_compute
#include "arm_compute/runtime/CL/functions/CLStridedSlice.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLStridedSliceKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/CL/kernels/CLStridedSliceKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
*/
#include "arm_compute/runtime/CL/functions/CLTableLookup.h"
-#include "arm_compute/core/CL/kernels/CLTableLookupKernel.h"
+#include "src/core/CL/kernels/CLTableLookupKernel.h"
#include "support/MemorySupport.h"
#include <utility>
*/
#include "arm_compute/runtime/CL/functions/CLThreshold.h"
-#include "arm_compute/core/CL/kernels/CLThresholdKernel.h"
+#include "src/core/CL/kernels/CLThresholdKernel.h"
#include "support/MemorySupport.h"
#include <utility>
*/
#include "arm_compute/runtime/CL/functions/CLTile.h"
-#include "arm_compute/core/CL/kernels/CLTileKernel.h"
+#include "src/core/CL/kernels/CLTileKernel.h"
#include "support/MemorySupport.h"
namespace arm_compute
*/
#include "arm_compute/runtime/CL/functions/CLTranspose.h"
-#include "arm_compute/core/CL/kernels/CLTransposeKernel.h"
+#include "src/core/CL/kernels/CLTransposeKernel.h"
#include "support/MemorySupport.h"
#include <utility>
#include "arm_compute/core/CL/OpenCL.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLUpsampleLayerKernel.h"
+#include "support/MemorySupport.h"
namespace arm_compute
{
CLUpsampleLayer::CLUpsampleLayer() // NOLINT
- : _upsample(),
+ : _upsample(support::cpp14::make_unique<CLUpsampleLayerKernel>()),
_output(nullptr)
{
}
+CLUpsampleLayer::~CLUpsampleLayer() = default;
+
Status CLUpsampleLayer::validate(const ITensorInfo *input, const ITensorInfo *output,
const Size2D &info, const InterpolationPolicy upsampling_policy)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
_output = output;
- _upsample.configure(compile_context, input, _output, info, upsampling_policy);
+ _upsample->configure(compile_context, input, _output, info, upsampling_policy);
}
void CLUpsampleLayer::run()
{
- CLScheduler::get().enqueue(_upsample, false);
+ CLScheduler::get().enqueue(*_upsample, false);
}
} // namespace arm_compute
*/
#include "arm_compute/runtime/CL/functions/CLWarpAffine.h"
-#include "arm_compute/core/CL/kernels/CLWarpAffineKernel.h"
#include "arm_compute/core/PixelValue.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLWarpAffineKernel.h"
#include "support/MemorySupport.h"
#include <utility>
auto k = arm_compute::support::cpp14::make_unique<CLWarpAffineKernel>();
k->configure(compile_context, input, output, matrix, policy);
_kernel = std::move(k);
- _border_handler.configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+ _border_handler->configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
}
*/
#include "arm_compute/runtime/CL/functions/CLWarpPerspective.h"
-#include "arm_compute/core/CL/kernels/CLWarpPerspectiveKernel.h"
#include "arm_compute/core/PixelValue.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLWarpPerspectiveKernel.h"
#include "support/MemorySupport.h"
#include <utility>
auto k = arm_compute::support::cpp14::make_unique<CLWarpPerspectiveKernel>();
k->configure(compile_context, input, output, matrix, policy);
_kernel = std::move(k);
- _border_handler.configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
+ _border_handler->configure(compile_context, input, _kernel->border_size(), border_mode, PixelValue(constant_border_value));
}
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
+#include "src/core/CL/kernels/CLWinogradFilterTransformKernel.h"
+#include "src/core/CL/kernels/CLWinogradOutputTransformKernel.h"
+#include "support/MemorySupport.h"
using namespace arm_compute;
} // namespace
CLWinogradConvolutionLayer::CLWinogradConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager)
- : _memory_group(memory_manager), _batched_mm(memory_manager), _input_transform(), _filter_transform(), _output_transform(), _input0(), _input1(), _batched_mm_output(), _original_weights(nullptr),
- _is_prepared(false)
+ : _memory_group(memory_manager), _batched_mm(memory_manager), _input_transform(), _filter_transform(support::cpp14::make_unique<CLWinogradFilterTransformKernel>()),
+ _output_transform(support::cpp14::make_unique<CLWinogradOutputTransformKernel>()), _input0(), _input1(), _batched_mm_output(), _original_weights(nullptr), _is_prepared(false)
{
}
+CLWinogradConvolutionLayer::~CLWinogradConvolutionLayer() = default;
+
void CLWinogradConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info,
bool enable_fast_math)
{
_input_transform.configure(compile_context, input, &_input0, winograd_info);
// Configure filter transform
- _filter_transform.configure(compile_context, weights, &_input1, winograd_info);
+ _filter_transform->configure(compile_context, weights, &_input1, winograd_info);
// Configure batched matrix multiply
_batched_mm.configure(compile_context, &_input0, &_input1, nullptr, &_batched_mm_output, 1.0f, 0.0f, GEMMInfo(false, false, true /* Reshape weights only for the first run*/, 0, false, false,
(input->info()->data_type() == DataType::F16)));
// Configure output transform
- _output_transform.configure(compile_context, &_batched_mm_output, biases, output, winograd_info, act_info);
+ _output_transform->configure(compile_context, &_batched_mm_output, biases, output, winograd_info, act_info);
// Allocate temporary tensors
_input0.allocator()->allocate();
_batched_mm.run();
// Run output transform
- CLScheduler::get().enqueue(_output_transform);
+ CLScheduler::get().enqueue(*_output_transform);
}
void CLWinogradConvolutionLayer::prepare()
{
// Run filter transform and mark original weights as unused
_input1.allocator()->allocate();
- CLScheduler::get().enqueue(_filter_transform, false);
+ CLScheduler::get().enqueue(*_filter_transform, false);
_original_weights->mark_as_unused();
// Prepare GEMM and release reshaped weights if marked unused by CLGEMM
#include "arm_compute/runtime/CL/functions/CLWinogradInputTransform.h"
#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLWinogradInputTransformKernel.h"
#include "arm_compute/core/Error.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLWinogradInputTransformKernel.h"
#include "support/MemorySupport.h"
using namespace arm_compute;
auto k = arm_compute::support::cpp14::make_unique<CLWinogradInputTransformKernel>();
k->configure(compile_context, input, output, winograd_info);
_kernel = std::move(k);
- _border_handler.configure(compile_context, input, _kernel->border_size(), BorderMode::CONSTANT, PixelValue());
+ _border_handler->configure(compile_context, input, _kernel->border_size(), BorderMode::CONSTANT, PixelValue());
}
Status CLWinogradInputTransform::validate(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info)
*/
#include "arm_compute/runtime/CL/functions/CLYOLOLayer.h"
-#include "arm_compute/core/CL/kernels/CLYOLOLayerKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/CL/kernels/CLYOLOLayerKernel.h"
#include "support/MemorySupport.h"
using namespace arm_compute;
#include "arm_compute/runtime/CL/tuners/BifrostTuner.h"
#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernels.h"
+#include "src/core/CL/CLKernels.h"
#include "support/Cast.h"
namespace arm_compute
#include "arm_compute/runtime/CL/tuners/MidgardTuner.h"
#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernels.h"
+#include "src/core/CL/CLKernels.h"
#include "support/Cast.h"
namespace arm_compute
#ifndef ARM_COMPUTE_TEST_CL_HELPER_H
#define ARM_COMPUTE_TEST_CL_HELPER_H
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
-#include "arm_compute/core/CL/kernels/CLMemsetKernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLMemsetKernel.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
#include "arm_compute/runtime/IFunction.h"
+
+#include "src/core/CL/ICLKernel.h"
+
#include "support/MemorySupport.h"
namespace arm_compute
auto k = arm_compute::support::cpp14::make_unique<K>();
k->configure(first, std::forward<Args>(args)...);
_kernel = std::move(k);
- _border_handler.configure(first, BorderSize(bordersize), BorderMode::CONSTANT, PixelValue());
+ _border_handler->configure(first, BorderSize(bordersize), BorderMode::CONSTANT, PixelValue());
}
};
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/CLTensorAllocator.h"
#include "arm_compute/runtime/CL/functions/CLScale.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
#include "tests/CL/CLAccessor.h"
#include "tests/benchmark/fixtures/ScaleFixture.h"
#include "tests/datasets/BorderModeDataset.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
-#include "arm_compute/runtime/CL/CLFunctions.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
-
+#include "arm_compute/runtime/CL/functions/CLGEMM.h"
+#include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h"
+#include "arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h"
+#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpReductionKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
+#include "src/core/CL/kernels/CLIm2ColKernel.h"
+#include "src/core/CL/kernels/CLWeightsReshapeKernel.h"
#include "tests/AssetsLibrary.h"
#include "tests/CL/CLAccessor.h"
#include "tests/Globals.h"
* SOFTWARE.
*/
#include "arm_compute/core/Types.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/CLTensorAllocator.h"
#include "arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h"
#include "arm_compute/runtime/CL/functions/CLReductionOperation.h"
-
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "tests/CL/CLAccessor.h"
#include "tests/datasets/ShapeDatasets.h"
#include "tests/datasets/SplitDataset.h"
TensorShape{ 2560, 2U, 2U, 2U },
});
-const auto ArgMinMaxLargeDataset = framework::dataset::make("Shape", { TensorShape{ 517U, 123U, 13U, 2U } });
+const auto ArgMinMaxLargeDataset = framework::dataset::make("Shape",
+{ TensorShape{ 517U, 123U, 13U, 2U } });
} // namespace
TEST_SUITE(CL)
TEST_SUITE(ArgMinMax)
ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 8.f, 2.f),
});
-const auto common_fusion_dataset = combine(combine(combine(framework::dataset::make("UseBias", { false, true }),
- framework::dataset::make("UseBeta", { false, true })),
- framework::dataset::make("UseGamma", { false, true })),
- framework::dataset::make("Epsilon", { 0.001f }));
+const auto common_fusion_dataset = combine(combine(combine(framework::dataset::make("UseBias",
+{ false, true }),
+framework::dataset::make("UseBeta", { false, true })),
+framework::dataset::make("UseGamma", { false, true })),
+framework::dataset::make("Epsilon", { 0.001f }));
bool validate_zero_padding(TensorShape shape0, const TensorShape shape1, float epsilon, ActivationLayerInfo act_info, DataType dt, DataLayout data_layout)
{
// clang-format on
// *INDENT-ON*
-DATA_TEST_CASE(ValidateZeroPadding, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallRandomBatchNormalizationLayerDataset(),
- act_infos),
- framework::dataset::make("DataType", { DataType::F32, DataType::F16 })),
+DATA_TEST_CASE(ValidateZeroPadding, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallRandomBatchNormalizationLayerDataset(), act_infos), framework::dataset::make("DataType", { DataType::F32, DataType::F16 })),
framework::dataset::make("DataLayout", { DataLayout::NHWC })),
shape0, shape1, episilon, act_infos, data_type, data_layout)
{
TEST_SUITE(Float)
TEST_SUITE(FP32)
FIXTURE_DATA_TEST_CASE(Random, CLBatchNormalizationLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallRandomBatchNormalizationLayerDataset(),
- combine(framework::dataset::make("UseBeta", { false, true }),
- framework::dataset::make("UseGamma", { false, true }))),
+ combine(framework::dataset::make("UseBeta", { false, true }), framework::dataset::make("UseGamma", { false, true }))),
act_infos),
framework::dataset::make("DataType", DataType::F32)),
framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
TEST_SUITE(FP16)
FIXTURE_DATA_TEST_CASE(Random, CLBatchNormalizationLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallRandomBatchNormalizationLayerDataset(),
- combine(framework::dataset::make("UseBeta", { false, true }),
- framework::dataset::make("UseGamma", { false, true }))),
- framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f))),
+ combine(framework::dataset::make("UseBeta", { false, true }), framework::dataset::make("UseGamma", { false, true }))),
+ framework::dataset::make("ActivationInfo",
+ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f))),
framework::dataset::make("DataType", DataType::F16)),
framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
{
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
/* Allowed ratio of mismatches between target and reference (1.0 = 100%) */
const float allowed_mismatch_ratio = 0.1f;
-const auto data = combine(framework::dataset::make("GradientSize", { 3, 5, 7 }),
- combine(framework::dataset::make("Normalization", { MagnitudeType::L1NORM, MagnitudeType::L2NORM }), datasets::BorderModes()));
+const auto data = combine(framework::dataset::make("GradientSize",
+{ 3, 5, 7 }),
+combine(framework::dataset::make("Normalization", { MagnitudeType::L1NORM, MagnitudeType::L2NORM }), datasets::BorderModes()));
} // namespace
TEST_SUITE(CL)
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLCol2ImKernel.h"
#include "arm_compute/core/Types.h"
-
+#include "src/core/CL/kernels/CLCol2ImKernel.h"
#include "tests/CL/CLAccessor.h"
#include "tests/CL/Helper.h"
#include "tests/framework/Asserts.h"
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/CLTensorAllocator.h"
#include "arm_compute/runtime/CL/functions/CLCropResize.h"
-
#include "tests/CL/CLAccessor.h"
#include "tests/datasets/CropResizeDataset.h"
#include "tests/framework/Asserts.h"
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLTensor.h"
/*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h"
#include "arm_compute/core/KernelDescriptors.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h"
#include "tests/CL/CLAccessor.h"
#include "tests/CL/Helper.h"
#include "tests/PaddingCalculator.h"
/*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
/*
- * Copyright (c) 2017 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
#include "tests/CL/CLAccessor.h"
#include "tests/Globals.h"
#include "tests/datasets/BorderModeDataset.h"
/*
- * Copyright (c) 2017-2018 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
/*
- * Copyright (c) 2017-2018 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
/*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
#include "tests/CL/CLAccessor.h"
#include "tests/CL/Helper.h"
#include "tests/framework/Asserts.h"
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
#include "tests/CL/CLAccessor.h"
#include "tests/CL/Helper.h"
#include "tests/framework/Asserts.h"
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
#include "tests/CL/CLAccessor.h"
#include "tests/CL/Helper.h"
#include "tests/PaddingCalculator.h"
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
#include "arm_compute/core/KernelDescriptors.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
#include "tests/CL/CLAccessor.h"
#include "tests/CL/Helper.h"
#include "tests/PaddingCalculator.h"
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
#include "arm_compute/core/KernelDescriptors.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
#include "tests/CL/CLAccessor.h"
#include "tests/CL/Helper.h"
#include "tests/PaddingCalculator.h"
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h"
#include "arm_compute/core/KernelDescriptors.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h"
#include "tests/CL/CLAccessor.h"
#include "tests/CL/Helper.h"
#include "tests/PaddingCalculator.h"
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
#include "arm_compute/core/KernelDescriptors.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
#include "tests/CL/CLAccessor.h"
#include "tests/CL/Helper.h"
#include "tests/PaddingCalculator.h"
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
#include "arm_compute/core/KernelDescriptors.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
#include "tests/CL/CLAccessor.h"
#include "tests/CL/Helper.h"
#include "tests/PaddingCalculator.h"
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
#include "tests/CL/CLAccessor.h"
#include "tests/CL/Helper.h"
#include "tests/PaddingCalculator.h"
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
#include "tests/CL/CLAccessor.h"
#include "tests/CL/Helper.h"
#include "tests/PaddingCalculator.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/CLTensorAllocator.h"
#include "arm_compute/runtime/CL/functions/CLGather.h"
-
#include "tests/CL/CLAccessor.h"
#include "tests/datasets/GatherDataset.h"
#include "tests/framework/Asserts.h"
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
/*
- * Copyright (c) 2017-2018 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
/*
- * Copyright (c) 2018 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
/*
- * Copyright (c) 2018 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLIm2ColKernel.h"
#include "arm_compute/core/Types.h"
-
+#include "src/core/CL/kernels/CLIm2ColKernel.h"
#include "tests/CL/CLAccessor.h"
#include "tests/CL/Helper.h"
#include "tests/framework/Asserts.h"
/*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
/*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
* SOFTWARE.
*/
#include "arm_compute/runtime/CL/functions/CLLSTMLayerQuantized.h"
-
#include "tests/CL/CLAccessor.h"
#include "tests/PaddingCalculator.h"
#include "tests/Utils.h"
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/CLTensorAllocator.h"
/*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
/*
- * Copyright (c) 2017-2018 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
/*
- * Copyright (c) 2018 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
/*
- * Copyright (c) 2018 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h"
+#include "src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.h"
#include "tests/CL/CLAccessor.h"
#include "tests/CL/Helper.h"
#include "tests/PaddingCalculator.h"
/*
- * Copyright (c) 2018 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
/*
- * Copyright (c) 2018 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/CLTensorAllocator.h"
#include "arm_compute/runtime/CL/functions/CLReduceMean.h"
-
#include "tests/CL/CLAccessor.h"
#include "tests/datasets/ShapeDatasets.h"
#include "tests/datasets/SplitDataset.h"
/*
- * Copyright (c) 2018 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLSoftmaxLayerKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/CLTensorAllocator.h"
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
/*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/CLTensorAllocator.h"
#include "arm_compute/runtime/CL/functions/CLTableLookup.h"
-
#include "tests/CL/CLAccessor.h"
#include "tests/CL/CLLutAccessor.h"
#include "tests/PaddingCalculator.h"
#include "tests/framework/Asserts.h"
#include "tests/framework/Macros.h"
#include "tests/framework/datasets/Datasets.h"
-
#include "tests/validation/Helpers.h"
#include "tests/validation/Validation.h"
#include "tests/validation/fixtures/TableLookupFixture.h"
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
#include "arm_compute/runtime/MemoryGroup.h"
#include "arm_compute/runtime/MemoryManagerOnDemand.h"
#include "arm_compute/runtime/PoolManager.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
+#include "src/core/CL/kernels/CLIm2ColKernel.h"
+#include "src/core/CL/kernels/CLL2NormalizeLayerKernel.h"
+#include "src/core/CL/kernels/CLReductionOperationKernel.h"
+#include "src/core/CL/kernels/CLWeightsReshapeKernel.h"
#include "tests/AssetsLibrary.h"
#include "tests/CL/CLAccessor.h"
#include "tests/Globals.h"
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "arm_compute/runtime/CL/CLTensor.h"
#include "arm_compute/runtime/CL/tuners/BifrostTuner.h"
+#include "src/core/CL/kernels/CLDirectConvolutionLayerKernel.h"
#include "tests/Utils.h"
#include "tests/framework/Asserts.h"
#include "tests/framework/Macros.h"
* SOFTWARE.
*/
#include "arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h"
+#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h"
+#include "src/core/CL/kernels/CLGEMMLowpReductionKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h"
+#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
+#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
#include "tests/AssetsLibrary.h"
#include "tests/CL/CLAccessor.h"
#include "tests/Globals.h"
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h"
#include "arm_compute/core/Types.h"
+#include "src/core/CL/kernels/CLWeightsReshapeKernel.h"
#include "tests/CL/CLAccessor.h"
#include "tests/CL/Helper.h"
#include "tests/datasets/ShapeDatasets.h"
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#include "arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h"
-#include "arm_compute/core/CL/kernels/CLWinogradOutputTransformKernel.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/runtime/CL/CLTensorAllocator.h"
#include "arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h"
#include "arm_compute/runtime/CL/functions/CLWinogradInputTransform.h"
+#include "src/core/CL/kernels/CLWinogradFilterTransformKernel.h"
+#include "src/core/CL/kernels/CLWinogradOutputTransformKernel.h"
#include "tests/CL/CLAccessor.h"
#include "tests/CL/Helper.h"
#include "tests/PaddingCalculator.h"
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*