*
* @return An opencl kernel
*/
-cl::Kernel create_kernel(CLCompileContext &ctx, const std::string &kernel_name, const std::set<std::string> &build_opts);
+cl::Kernel create_kernel(CLCompileContext &ctx, const std::string &kernel_name, const std::set<std::string> &build_opts = std::set<std::string>());
/** Creates a suitable LWS hint object for parallel implementations. Sets the number of WG based on the input size.
* If input width is smaller than 128 we can use fewer threads than 8.
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* @param[out] output Destination tensor. Data types supported: U8/S16.
*/
void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
+ /** Set the inputs and output images.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input1 Source tensor. Data types supported: U8/S16.
+ * @param[in] input2 Source tensor. Data types supported: U8/S16.
+ * @param[out] output Destination tensor. Data types supported: U8/S16.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* @param[out] accum Destination tensor. Data types supported: S16.
*/
void configure(const ICLTensor *input, ICLTensor *accum);
+ /** Set the input and accumulation tensors.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[out] accum Destination tensor. Data types supported: S16.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *accum);
};
/** Interface for the accumulate weighted kernel.
* @param[in,out] accum Accumulated tensor. Data types supported: U8.
*/
void configure(const ICLTensor *input, float alpha, ICLTensor *accum);
+ /** Set the input and accumulation images, and the scale value.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[in] alpha Scalar value in the range [0, 1.0]. Data types supported: F32.
+ * @param[in,out] accum Accumulated tensor. Data types supported: U8.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, float alpha, ICLTensor *accum);
};
/** Interface for the accumulate squared kernel.
* @param[in,out] accum Accumulated tensor. Data types supported: S16.
*/
void configure(const ICLTensor *input, uint32_t shift, ICLTensor *accum);
+ /** Set the input and accumulation tensors and the shift value.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[in] shift Shift value in the range of [0, 15]. Data types supported: U32.
+ * @param[in,out] accum Accumulated tensor. Data types supported: S16.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, uint32_t shift, ICLTensor *accum);
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_CLACCUMULATEKERNEL_H */
namespace arm_compute
{
class ICLTensor;
-class CLCoreRuntimeContext;
/** Interface for the activation layer kernel. */
class CLActivationLayerKernel : public ICLKernel
{
public:
/** Default constructor */
- CLActivationLayerKernel(CLCoreRuntimeContext *ctx = nullptr);
+ CLActivationLayerKernel();
/** Prevent instances of this class from being copied (As this class contains pointers) */
CLActivationLayerKernel(const CLActivationLayerKernel &) = delete;
/** Prevent instances of this class from being copied (As this class contains pointers) */
* @param[in] act_info Activation layer information.
*/
void configure(ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info);
+ /** Set the input and output tensor.
+ *
+ * @note If the output tensor is a nullptr, the activation function will be performed in-place
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result
+ * of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32.
+ * @param[out] output Destination tensor. Data type supported: same as @p input
+ * @param[in] act_info Activation layer information.
+ */
+ void configure(CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info);
/** Static function to check if given info will lead to a valid configuration of @ref CLActivationLayerKernel
*
* @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result
ICLTensor *_input;
ICLTensor *_output;
bool _run_in_place;
- CLCoreRuntimeContext *_ctx;
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_CLACTIVATIONLAYERKERNEL_H */
/*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* @param[in] op Reduction operation to perform. Only ArgMin and ArgMax are supported.
*/
void configure(const ICLTensor *input, const ICLTensor *prev_output, ICLTensor *output, unsigned int axis, ReductionOperation op);
+ /** Set the input and output tensors.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: S32/F16/F32.
+ * @param[in] prev_output Destination tensor of the previous iterations of @ref CLArgMinMaxLayerKernel. Data types supported: U32/S32
+ * Has to be nullptr for the first iteration
+ * @param[out] output Destination tensor. Data types supported: U32/S32
+ * Output will have the same number of dimensions as input.
+ * @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1,2,3
+ * @param[in] op Reduction operation to perform. Only ArgMin and ArgMax are supported.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *prev_output, ICLTensor *output, unsigned int axis, ReductionOperation op);
/** Static function to check if given info will lead to a valid configuration of @ref CLArgMinMaxLayerKernel.
*
*
*/
void configure(const ICLTensor *input, unsigned int batch_offset, ICLTensor *output);
+ /** Initialise the kernel's inputs and output
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Input tensor. Data types supported: All.
+ * @param[in] batch_offset The offset on axis # 3.
+ * @param[in,out] output Output tensor. Data types supported: Same as @p input.
+ *
+ * @note: The output tensor's low two dimensions can't be smaller than the input one's.
+ * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2.
+ *
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, unsigned int batch_offset, ICLTensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref CLBatchConcatenateLayerKernel
*
* @param[in] input Input tensor info. Data types supported: All.
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
*/
void configure(ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta = nullptr, const ICLTensor *gamma = nullptr, float epsilon = 0.001f,
ActivationLayerInfo act_info = ActivationLayerInfo());
+ /** Set the input and output tensors.
+ *
+ * @note If the output tensor is a nullptr, the batch normalization function will be performed in-place
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result.
+ * 3 lower dimensions represent a single input with dimensions [width, height, FM].
+ * The rest are optional and used for representing batches. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC.
+ * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input
+ * @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+ * @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+ * @param[in] beta (Optional) Beta values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for beta is 0. Data types supported: Same as @p input
+ * @param[in] gamma (Optional) Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for gamma is 1. Data types supported: Same as @p input
+ * @param[in] epsilon (Optional) Small value to avoid division with zero. Default value is 0.001f.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
+ */
+ void configure(CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta = nullptr, const ICLTensor *gamma = nullptr,
+ float epsilon = 0.001f,
+ ActivationLayerInfo act_info = ActivationLayerInfo());
/** Static function to check if given info will lead to a valid configuration of @ref CLBatchNormalizationLayerKernel
*
* @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result.
* @param[out] output Tensor output. Data types supported: same as @p input
*/
void configure(const ICLTensor *input, const ICLTensor *block_shape, ICLTensor *output);
+ /** Initialise the kernel's inputs and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
+ * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32
+ * @param[out] output Tensor output. Data types supported: same as @p input
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *block_shape, ICLTensor *output);
/** Initialise the kernel's inputs and output (Static block shape).
*
* @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
* @param[out] output Tensor output. Data types supported: same as @p input
*/
void configure(const ICLTensor *input, const int32_t block_shape_x, const int32_t block_shape_y, ICLTensor *output);
+ /** Initialise the kernel's inputs and output (Static block shape).
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
+ * @param[in] block_shape_x Block shape x value.
+ * @param[in] block_shape_y Block shape y value.
+ * @param[out] output Tensor output. Data types supported: same as @p input
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, const int32_t block_shape_x, const int32_t block_shape_y, ICLTensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref CLBatchToSpaceLayerKernel
*
* @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* @param[out] output Destination tensor. Data types supported: U8.
*/
void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
+ /** Set the inputs and output images
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input1 Source tensor. Data types supported: U8.
+ * @param[in] input2 Source tensor. Data types supported: U8.
+ * @param[out] output Destination tensor. Data types supported: U8.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* @param[out] output Destination tensor. Data types supported: U8.
*/
void configure(const ICLTensor *input, ICLTensor *output);
+ /** Set the inputs and output images.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[out] output Destination tensor. Data types supported: U8.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_CLBITWISENOTKERNEL_H */
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* @param[out] output Destination tensor. Data types supported: U8.
*/
void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
+ /** Set the inputs and output images
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input1 Source tensor. Data types supported: U8.
+ * @param[in] input2 Source tensor. Data types supported: U8.
+ * @param[out] output Destination tensor. Data types supported: U8.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* @param[out] output Destination tensor. Data types supported: U8.
*/
void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
+ /** Set the inputs and output images
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input1 Source tensor. Data types supported: U8.
+ * @param[in] input2 Source tensor. Data types supported: U8.
+ * @param[out] output Destination tensor. Data types supported: U8.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
*
*/
void configure(const ICLTensor *boxes, ICLTensor *pred_boxes, const ICLTensor *deltas, const BoundingBoxTransformInfo &info);
+ /** Set the input and output tensors.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] boxes Source tensor. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32.
+ * @param[out] pred_boxes Destination tensor. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input
+ * @param[in] deltas Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K is the number of classes.
+ * Data types supported: QASYMM8 if @p input is QASYMM16, otherwise same as @p input
+ * @param[in] info Contains BoundingBox operation information described in @ref BoundingBoxTransformInfo.
+ *
+ * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct.
+ *
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *boxes, ICLTensor *pred_boxes, const ICLTensor *deltas, const BoundingBoxTransformInfo &info);
/** Static function to check if given info will lead to a valid configuration of @ref CLBoundingBoxTransform
*
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
*/
void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined);
+ /**Initialise the kernel's input and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input An input tensor. Data types supported: U8
+ * @param[out] output The output tensor. Data types supported: U8.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined);
//Inherited methods overriden:
BorderSize border_size() const override;
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* @param[in] norm_type Normalization type. if 1, L1-Norm otherwise L2-Norm.
*/
void configure(const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase, int32_t norm_type);
+ /** Initialise the kernel's sources, destinations and border mode.
+ *
+ * @note gx, gy and mag must all be the same size (either 16 or 32).
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] gx Source tensor - Gx component. Data types supported: S16/S32.
+ * @param[in] gy Source tensor - Gy component. Data types supported: Same as gx.
+ * @param[out] magnitude Destination tensor - Magnitude. Data types supported: U16/U32. Must match the pixel size of gx, gy.
+ * @param[out] phase Destination tensor - Quantized phase. Data types supported: U8.
+ * @param[in] norm_type Normalization type. if 1, L1-Norm otherwise L2-Norm.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase, int32_t norm_type);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
* @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
*/
void configure(const ICLTensor *magnitude, const ICLTensor *phase, ICLTensor *output, int32_t lower_thr, bool border_undefined);
+ /** Initialise the kernel's sources, destination and border mode.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] magnitude Source tensor - Magnitude. Data types supported: U16/U32.
+ * @param[in] phase Source tensor - Quantized phase. Data types supported: U8.
+ * @param[out] output Destination tensor. Data types supported: U16/U32.
+ * @param[in] lower_thr Lower threshold.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *magnitude, const ICLTensor *phase, ICLTensor *output, int32_t lower_thr, bool border_undefined);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
*/
void configure(const ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr,
ICLTensor *visited, ICLTensor *recorded, ICLTensor *l1_stack, ICLTensor *l1_stack_counter);
+ /** Initialise the kernel's source, destination and border mode.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[out] output Destination tensor. Data types supported: U8.
+ * @param[in] upper_thr Upper threshold used for the hysteresis
+ * @param[in] lower_thr Lower threshold used for the hysteresis
+ * @param[in,out] visited Tensor for keeping the visited pixels. Data types supported: U32.
+ * Expected to be initialized to 0 before each run.
+ * @param[in,out] recorded Tensor for keeping the recorded pixels. Data types supported: U32
+ * Expected to be initialized to 0 before each run.
+ * @param[in,out] l1_stack Tensor with the L1 stack for each pixel. Data types supported: S32.
+ * Expected to be initialized to 0 before each run.
+ * @param[in,out] l1_stack_counter Tensor for counting the elements in the L1 stack of each pixel. Data types supported: U8.
+ * Expected to be initialized to 0 before each run.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr,
+ ICLTensor *visited, ICLTensor *recorded, ICLTensor *l1_stack, ICLTensor *l1_stack_counter);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* @param[out] output The single planar output tensor.
*/
void configure(const ICLTensor *plane0, const ICLTensor *plane1, const ICLTensor *plane2, const ICLTensor *plane3, ICLTensor *output);
+ /** Configure function's inputs and outputs.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format.
+ * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format.
+ * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format.
+ * @param[in] plane3 The 2D plane that forms channel 3. Must be of U8 format.
+ * @param[out] output The single planar output tensor.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *plane0, const ICLTensor *plane1, const ICLTensor *plane2, const ICLTensor *plane3, ICLTensor *output);
/** Configure function's inputs and outputs.
*
* @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format.
* @param[out] output The multi planar output tensor.
*/
void configure(const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output);
+ /** Configure function's inputs and outputs.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] plane0 The 2D plane that forms channel 0. Must be of U8 format.
+ * @param[in] plane1 The 2D plane that forms channel 1. Must be of U8 format.
+ * @param[in] plane2 The 2D plane that forms channel 2. Must be of U8 format.
+ * @param[out] output The multi planar output tensor.
+ */
+ void configure(CLCompileContext &compile_context, const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* @param[out] output Destination tensor. Must be of U8 format.
*/
void configure(const ICLTensor *input, Channel channel, ICLTensor *output);
+ /** Set the input and output of the kernel
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Formats supported: RGB888/RGBA8888/YUYV422/UYVY422
+ * @param[in] channel Channel to extract.
+ * @param[out] output Destination tensor. Must be of U8 format.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, Channel channel, ICLTensor *output);
/** Set the input and output of the kernel
*
* @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV/YUV444
* @param[out] output Single-planar 2D destination image. Must be of U8 format.
*/
void configure(const ICLMultiImage *input, Channel channel, ICLImage *output);
+ /** Set the input and output of the kernel
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV/YUV444
+ * @param[in] channel Channel to extract.
+ * @param[out] output Single-planar 2D destination image. Must be of U8 format.
+ */
+ void configure(CLCompileContext &compile_context, const ICLMultiImage *input, Channel channel, ICLImage *output);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
* @param[in] num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups.
*/
void configure(const ICLTensor *input, ICLTensor *output, unsigned int num_groups);
+ /** Configure function's inputs and outputs.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Input tensor. Data types supported: All.
+ * @param[out] output Output tensor. Data type supported: Same as @p input
+ * @param[in] num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, unsigned int num_groups);
/** Static function to check if given info will lead to a valid configuration of @ref CLChannelShuffleLayerKernel
*
* @param[in] input Input tensor info. Data types supported: All.
* @param[in] num_groups (Optional) Number of groups when performing a grouped convolution
*/
void configure(const ICLTensor *input, ICLTensor *output, const Size2D &convolved_dims, unsigned int num_groups = 1);
+ /** Set the input and output of the kernel.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input The input tensor to convert. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
+ * @param[out] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM],
+ * while the rest represent batch of outputs. Data types supported: Same as @p input. Data layout: NCHW
+ * @param[in] convolved_dims Output convolved dimensions.
+ * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Size2D &convolved_dims, unsigned int num_groups = 1);
/** Static function to check if given info will lead to a valid configuration of @ref CLCol2ImKernel
*
* @param[in] input The input tensor to convert. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* U8 (if the formats of @p input is RGB888)
*/
void configure(const ICLTensor *input, ICLTensor *output);
+ /** Set the input and output of the kernel
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Formats supported: RGBA8888/UYVY422/YUYV422/RGB888
+ * @param[out] output Destination tensor. Formats supported: RGB888 (if the formats of @p input are RGBA8888/UYVY422/YUYV422),
+ * RGBA8888 (if the formats of @p input are UYVY422/YUYV422/RGB888/),
+ * U8 (if the formats of @p input is RGB888)
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
/** Set the input and output of the kernel
*
* @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV
* @param[out] output Single-planar destination image. Formats supported: RGB888/RGBA8888
*/
void configure(const ICLMultiImage *input, ICLImage *output);
+ /** Set the input and output of the kernel
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV
+ * @param[out] output Single-planar destination image. Formats supported: RGB888/RGBA8888
+ */
+ void configure(CLCompileContext &compile_context, const ICLMultiImage *input, ICLImage *output);
/** Set the input and output of the kernel
*
* @param[in] input Single-planar source image. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422
* @param[out] output Multi-planar destination image. Formats supported: NV12/IYUV/YUV444 (if the formats of @p input are RGB888/RGB8888)
*/
void configure(const ICLImage *input, ICLMultiImage *output);
+ /** Set the input and output of the kernel
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Single-planar source image. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422
+ * @param[out] output Multi-planar destination image. Formats supported: NV12/IYUV/YUV444 (if the formats of @p input are RGB888/RGB8888)
+ */
+ void configure(CLCompileContext &compile_context, const ICLImage *input, ICLMultiImage *output);
/** Set the input and output of the kernel
*
* @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV
* @param[out] output Multi-planar destination image. Formats supported: YUV444/IYUV (if the formats of @p input are NV12/NV21)/NV12 (if the format of @p input is IYUV)
*/
void configure(const ICLMultiImage *input, ICLMultiImage *output);
+ /** Set the input and output of the kernel
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Multi-planar source image. Formats supported: NV12/NV21/IYUV
+ * @param[out] output Multi-planar destination image. Formats supported: YUV444/IYUV (if the formats of @p input are NV12/NV21)/NV12 (if the format of @p input is IYUV)
+ */
+ void configure(CLCompileContext &compile_context, const ICLMultiImage *input, ICLMultiImage *output);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
* @param[in] operation Comparison operation to use.
*/
void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ComparisonOperation operation);
+ /** Set the inputs and output tensors
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input1 Source tensor. Data types supported: All.
+ * @param[in] input2 Source tensor. Data types supported: Same as @p input1.
+ * @param[out] output Destination tensor. Data types supported: U8.
+ * @param[in] operation Comparison operation to use.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ComparisonOperation operation);
/** Static function to check if given info will lead to a valid configuration of @ref CLComparisonKernel
*
* @param[in] input1 Source tensor. Data types supported: All.
* @param[in] data_layout The data layout the weights have been trained in.
*/
void configure(const ICLTensor *input, ICLTensor *output, const TensorShape &original_input_shape, DataLayout data_layout);
+ /** Set the input and output tensor.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source weights tensor to convert. Must be 2 dimensional. Data types supported: All.
+ * @param[out] output The converted weights tensor. Shape and Data Type: Same as @p input.
+ * @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer).
+ * @param[in] data_layout The data layout the weights have been trained in.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const TensorShape &original_input_shape, DataLayout data_layout);
/** Static function to check if given info will lead to a valid configuration of @ref CLConvertFullyConnectedWeightsKernel
*
* @param[in] input Source weights tensor info to convert. Must be 2 dimensional. Data types supported: All.
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
*/
void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined);
+ /** Initialise the kernel's input, output and border mode.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[out] output Destination tensor, Data types supported: U8, S16.
+ * @param[in] conv Convolution matrix to apply to the input tensor.
+ * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined);
// Inherited methods overridden:
BorderSize border_size() const override;
* @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
*/
void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, bool border_undefined);
+ /** Initialise the kernel's input, output and border mode.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[out] output Destination tensor, Data types supported: S16.
+ * @param[in] conv Convolution matrix to apply to the input tensor.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, bool border_undefined);
// Inherited methods overridden:
BorderSize border_size() const override;
* @param[in] data_type Data type to use for intermeidate result. @sa data_type_for_convolution
*/
void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined, DataType data_type = DataType::S32);
+ /** Initialise the kernel's input, output and border mode.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: S16.
+ * @param[out] output Destination tensor, Data types supported: U8, S16.
+ * @param[in] conv Convolution matrix to apply to the input tensor.
+ * @param[in] scale Scale of the convolution matrix.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ * @param[in] data_type Data type to use for intermeidate result. @sa data_type_for_convolution
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined, DataType data_type = DataType::S32);
// Inherited methods overridden:
BorderSize border_size() const override;
* @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
*/
void configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t width, uint32_t height, uint32_t scale, bool border_undefined);
+ /** Initialise the kernel's input, output and border mode.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[out] output Destination tensor, Data types supported: U8, S16.
+ * @param[in] conv Convolution matrix to apply to the input tensor.
+ * @param[in] width Width of convolution matrix (Number of columns)
+ * @param[in] height Height of convolution matrix (Number of rows)
+ * @param[in] scale Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t width, uint32_t height, uint32_t scale, bool border_undefined);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* @param[in] output_window (Optional) Window to be used in case only copying into part of a tensor. Default is nullptr.
*/
void configure(const ICLTensor *input, ICLTensor *output, const PaddingList &padding = PaddingList(), Window *output_window = nullptr);
+ /** Initialize the kernel's input, output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+ * @param[out] output Destination tensor. Data types supported: same as @p input.
+ * @param[in] padding (Optional) Padding to be applied to the input tensor
+ * @param[in] output_window (Optional) Window to be used in case only copying into part of a tensor. Default is nullptr.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PaddingList &padding = PaddingList(), Window *output_window = nullptr);
/** Static function to check if given info will lead to a valid configuration of @ref CLCopyKernel
*
* @param[in] input Source tensor info. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
/*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* @param[in] output_window Output window to be used in case cropped image is being copied into a tensor. Default is nullptr.
*/
void configure(const ICLTensor *input, ICLTensor *output, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value = 0, Window *output_window = nullptr);
+ /** Configure kernel
+ *
+ * @note Supported tensor rank: up to 4
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data type supported: U16/S16/U32/S32/F16/F32. Data layouts supported: NHWC.
+ * @param[out] output Destination tensor. Data type supported: F32
+ * @param[in] start Coordinates of where to start cropping the image.
+ * @param[in] end Coordinates of where to end cropping the image.
+ * @param[in] batch_index Fourth dimension index of the 3D image to crop in @p input.
+ * @param[in] extrapolation_value Value to be used for values outside of the image. Default is 0.
+ * @param[in] output_window Output window to be used in case cropped image is being copied into a tensor. Default is nullptr.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value = 0,
+ Window *output_window = nullptr);
/** Static function to check if given info will lead to a valid configuration of @ref CLStridedSliceKernel
*
* @param[in] info Contains padding and stride information described in @ref PadStrideInfo.
*/
void configure(const ICLTensor *input, ICLTensor *output, const PadStrideInfo &info);
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: All.
+ * @param[out] output Destination tensor. Data types supported: same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
+ * @param[in] info Contains padding and stride information described in @ref PadStrideInfo.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PadStrideInfo &info);
/** Static function to check if given info will lead to a valid configuration of @ref CLDeconvolutionLayerUpsample
*
* @param[in] input Source tensor info. Data types supported: All.
* @param[in] deconv_info Contains padding and policies to be used in the deconvolution, this is described in @ref PadStrideInfo. This kernel supports only stride_x = weights.width && stride_y = weights.height. Moreover, padding is not supported.
*/
void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const ITensorInfo *input_info, const ITensorInfo *weights_info, const PadStrideInfo &deconv_info);
+ /** Initialise the kernel's source and destination.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Input tensor. Supported data types: QASYMM8/QASYMM8_SIGNED/S32/F16/F32.
+ * @param[in] bias Bias tensor to be added directly during the reshape operation. Supported data types: same as @p input. Supported data layouts: same as @p input.
+ * @param[out] output Output tensor with the following shape: [stride_x * (input_width - 1) + filter_width - 2 * padx, stride_y * (input_height - 1) + filter_height - 2 * pady, ofms, batch_size]
+ * Supported data types: same as @p input. Supported data layouts: same as @p input.
+ * @param[in] input_info Deconvolution input tensor info. Supported data types: same as @p input. Supported data layouts: same as @p input.
+ * @param[in] weights_info Deconvolution weights tensor info. Supported data types: same as @p input. Supported data layouts: same as @p input.
+ * @param[in] deconv_info Contains padding and policies to be used in the deconvolution, this is described in @ref PadStrideInfo. This kernel supports only stride_x = weights.width && stride_y = weights.height. Moreover, padding is not supported.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const ITensorInfo *input_info, const ITensorInfo *weights_info,
+ const PadStrideInfo &deconv_info);
/** Static function to check if given info will lead to a valid configuration of @ref CLDeconvolutionReshapeOutputKernel.
*
*
*/
void configure(const ICLTensor *input, unsigned int depth_offset, ICLTensor *output);
+ /** Initialise the kernel's inputs and output
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] depth_offset The offset on the Z axis.
+ * @param[in,out] output Output tensor. Data types supported: Same as @p input.
+ *
+ * @note: The output tensor's low two dimensions can't be smaller than the input one's.
+ * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2.
+ *
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, unsigned int depth_offset, ICLTensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref CLDepthConcatenateLayerKernel
*
* @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8.
*/
void configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift);
+ /** Set the input and output of the kernel.
+ *
+ * Valid conversions Input -> Output :
+ *
+ * - QSYMM8_PER_CHANNEL -> QASYMM8 (ATTENTION: it is the user's responsibility to keep track of the quantization info in the TensorInfo meta-data)
+ * - U8 -> S8, U16, S16, U32, S32, F16, F32
+ * - U16 -> U8, S8, S16, U32, S32, F16, F32
+ * - S16 -> U8, S8, U16, U32, S32, F16, F32
+ * - U32 -> U8, S8, U16, S16, S32, F16, F32
+ * - S32 -> U8, S8, U16, S16, U32, F16, F32
+ * - F16 -> U8, S8, U16, S16, U32, F32
+ * - F32 -> U8, S8, U16, S16, U32, F16
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input The input tensor to convert. Data types supported: U8/S8/QSYMM8_PER_CHANNEL/U16/S16/U32/S32/F16/F32.
+ * @param[out] output The output tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
+ * @param[in] policy Conversion policy
+ * @param[in] shift Value for down/up conversions. Must be 0 <= shift < 8.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift);
/** Static function to check if given info will lead to a valid configuration of @ref CLDepthConvertLayerKernel
*
* @param[in] input Source tensor info. Data types supported: U8/S8/QSYMM8_PER_CHANNEL/U16/S16/U32/S32/F16/F32.
* @param[in] block_shape Block shape value.
*/
void configure(const ICLTensor *input, ICLTensor *output, int32_t block_shape);
+ /** Initialise the kernel's inputs and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
+ * @param[out] output Tensor output. Data types supported: same as @p input
+ * @param[in] block_shape Block shape value.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t block_shape);
/** Static function to check if given info will lead to a valid configuration of @ref CLDepthToSpaceLayerKernel.
*
* @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All.
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U),
const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) override;
+ /** Initialize the function's source, destination, conv and border_size.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] weights Weights tensor. A 3D tensor with dimensions [3, 3, IFM].
+ * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
+ * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
+ * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
+ * @param[out] output Destination tensor. Data type supported: Same as @p input.
+ * @param[in] conv_info Padding and stride information to use for the convolution.
+ * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for QASYMM8 supported.
+ * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+ * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization,
+ * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
+ * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization,
+ * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
+ unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U),
+ const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) override;
/** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3NCHWKernel
*
* @param[in] input Source tensor info. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U),
const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) override;
+ /** Initialize the function's source, destination, conv and border_size.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED.
+ * @param[in] weights Weights tensor. A 3D tensor with dimensions [IFM, 3, 3].
+ * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
+ * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
+ * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
+ * @param[out] output Destination tensor. Data type supported: Same as @p input.
+ * @param[in] conv_info Padding and stride information to use for the convolution.
+ * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported.
+ * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+ * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization,
+ * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
+ * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization,
+ * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
+ unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U),
+ const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) override;
/** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3NHWCKernel
*
* @param[in] input Source tensor info. DataType supported: QASYMM8/QASYMM8_SIGNED.
void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const DWCWeightsKernelInfo &dwc_weights_info,
const DWCKernelInfo &dwc_info, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, const Size2D &dilation = Size2D(1U, 1U),
const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr);
+ /** Initialize the function's source, destination and parameters
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/FP32/FP16. Data layout supported: NHWC
+ * @param[in] weights Weights tensor. A 3D tensor with dimensions [IFM, N, M].
+ * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8.
+ * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
+ * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
+ * @param[out] output Destination tensor. Data type supported: Same as @p input.
+ * @param[in] dwc_weights_info Depthwise convolution layer weights info to retrieve the number of output elements processed by each thread
+ * @param[in] dwc_info Depthwise convolution layer info
+ * @param[in] conv_info Padding and stride information to use for the convolution.
+ * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+ * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+ * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization,
+ * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
+ * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization,
+ * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const DWCWeightsKernelInfo &dwc_weights_info,
+ const DWCKernelInfo &dwc_info, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, const Size2D &dilation = Size2D(1U, 1U),
+ const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr);
/** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayerNativeKernel
*
* @param[in] input Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/FP32/FP16. Data layout supported: NHWC
* @param[in] info Depthwise convolution information to reshape the input tensor.
*/
void configure(const ICLTensor *input, ICLTensor *output, const DepthwiseConvolutionReshapeInfo &info);
+ /** Initialize the function's source and destination.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input The input tensor of dimension [IFM, W, H]. Data types supported: All. Data layouts supported: NHWC
+ * @param[out] output The output tensor of dimension [W*H*C0, ceil(IFM/C0)]. C0 is the number of channels read by each thread. Data types supported: same as @p weights.
+ * @param[in] info Depthwise convolution information to reshape the input tensor.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const DepthwiseConvolutionReshapeInfo &info);
/** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3NHWCKernel
*
* @param[out] output Destination tensor. Data types supported: F16/F32.
*/
void configure(const ICLTensor *input, ICLTensor *output);
+ /** Set the input, output, min and max.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
+ * @param[out] output Destination tensor. Data types supported: F16/F32.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref CLDequantizationLayerKernel
*
* @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
*/
void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
+ /** Initialise the kernel's sources, destination and border
+ *
+ * @note At least one of output_x or output_y must be set
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16.
+ * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
*/
void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined);
+ /**Initialise the kernel's input and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input An input tensor. Data types supported: U8
+ * @param[out] output The output tensor. Data types supported: U8.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined);
// Inherited methods overridden:
BorderSize border_size() const override;
* @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
*/
void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info);
+ /** Set the input, weights, biases and output tensors.
+ *
+ * @note: DirectConvolution only works in the following configurations:
+ * 1x1 convolution with stride_x = 1/2/3, stride_y = 1/2/3
+ * 3x3 convolution with stride_x = 1/2, stride_y = 1/2
+ * 5x5 convolution with stride_x = 1/2, stride_y = 1/2
+ * 9x9 convolution with stride_x = 1/2, stride_y = 1/2, data_layout=NHWC
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input The input tensor to convolve. 3 lower dimensions represent a single input [width, height, IFM],
+ * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
+ * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
+ * The 3rd dimension must be the same as the input's volume 3rd dimension.
+ * Data type supported:Same as @p input.
+ * @param[in] biases Biases tensor. Biases are 1D tensor with dimension [OFM].
+ * Data type supported: Should match @p input data type, except for input of QASYMM8 and QASYMM8_SIGNED type where biases should be of S32 type
+ * @param[out] output Output tensor.
+ * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: Same as @p input.
+ * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info);
/** Static function to check if given info will lead to a valid configuration of @ref CLDirectConvolutionLayerKernel
*
* @param[in] input The input tensor to convolve. 3 lower dimensions represent a single input [width, height, IFM],
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* @param[in] op Element wise unary operation to perform.
*/
void configure(const ICLTensor *input, ICLTensor *output, const ElementWiseUnary &op);
+ /** Initialise the kernel's inputs, output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input First tensor input. Data types supported: F16/F32.
+ * @param[out] output Output tensor. Data types supported: Same as @p input.
+ * @param[in] op Element wise unary operation to perform.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ElementWiseUnary &op);
/** Static function to check if given info will lead to a valid configuration of @ref CLElementWiseUnaryLayerKernel
*
* @param[in] input First tensor input info. Data types supported: F16/F32.
*
*/
void configure_common(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
+ /** Commmon configure function for element-wise operators with no additional options (e.g., Div, Min, Max, SquaredDiff)
+ *
+ */
+ void configure_common(CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
ActivationLayerInfo _act_info;
* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
*/
void configure(ArithmeticOperation op, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const ConvertPolicy &policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ /** Static function to check if given info will lead to a valid configuration of @ref CLSaturatedArithmeticOperationKernel
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] op Arithmetic operation to be executed.
+ * @param[in] input1 First tensor input. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/QSYMM16/F16/U32/S32/F32.
+ * @param[in] input2 Second tensor input. Data types supported: Same as @p input1.
+ * @param[in] output Output tensor. Data types supported: Same as @p input1.
+ * @param[in] policy Policy to use to handle overflow.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
+ */
+ void configure(CLCompileContext &compile_context, ArithmeticOperation op, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const ConvertPolicy &policy,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
/** Static function to check if given info will lead to a valid configuration of @ref CLSaturatedArithmeticOperationKernel
*
* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
*/
void configure(ArithmeticOperation op, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ /** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] op Arithmetic operation to be executed.
+ * @param[in] input1 First tensor input. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/QSYMM16/F16/U32/S32/F32.
+ * @param[in] input2 Second tensor input. Data types supported: Same as @p input1.
+ * @param[in] output Output tensor. Data types supported: Same as @p input1.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
+ */
+ void configure(CLCompileContext &compile_context, ArithmeticOperation op, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
/** Static function to check if given info will lead to a valid configuration of @ref CLArithmeticOperationKernel
*
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
*/
void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined);
+ /**Initialise the kernel's input and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input An input tensor. Data types supported: U8
+ * @param[out] output The output tensor. Data types supported: U8.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined);
// Inherited methods overridden:
BorderSize border_size() const override;
/*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* @param[in] config Kernel configuration.
*/
void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *idx, const FFTDigitReverseKernelInfo &config);
+ /** Set the input and output tensors.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: F32.
+ * @param[out] output Destination tensor. Data type supported: same as @p input
+ * @param[in] idx Digit reverse index tensor. Data type supported: U32
+ * @param[in] config Kernel configuration.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *idx, const FFTDigitReverseKernelInfo &config);
/** Static function to check if given info will lead to a valid configuration of @ref CLFFTDigitReverseKernel
*
* @param[in] input Source tensor info. Data types supported: F32.
/*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* @param[in] config FFT descriptor metadata.
*/
void configure(ICLTensor *input, ICLTensor *output, const FFTRadixStageKernelInfo &config);
+ /** Set the input and output tensors.
+ *
+ * @note If the output tensor is nullptr, the FFT will be performed in-place
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in,out] input Source tensor. Data types supported: F32.
+ * @param[out] output Destination tensor. Can be nullptr. Data type supported: same as @p input
+ * @param[in] config FFT descriptor metadata.
+ */
+ void configure(CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const FFTRadixStageKernelInfo &config);
/** Static function to check if given info will lead to a valid configuration of @ref CLFFTRadixStageKernel
*
* @param[in] input Source tensor info. Data types supported: F32.
/*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* @param[in] config Kernel configuration
*/
void configure(ICLTensor *input, ICLTensor *output, const FFTScaleKernelInfo &config);
+ /** Set the input and output tensors.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in,out] input Source tensor. Data types supported: F32.
+ * @param[out] output Destination tensor. Data type supported: same as @p input
+ * @param[in] config Kernel configuration
+ */
+ void configure(CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const FFTScaleKernelInfo &config);
/** Static function to check if given info will lead to a valid configuration of @ref CLFFTScaleKernel
*
* @param[in] input Source tensor info. Data types supported: F32.
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* @param[in] border_mode Strategy to use for borders.
*/
void configure(const ICLImage *input, ICLImage *output, float threshold, bool non_max_suppression, BorderMode border_mode);
+ /** Initialise the kernel.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source image. Data types supported: U8.
+ * @param[out] output Output image. Data types supported: U8.
+ * @param[in] threshold Threshold on difference between intensity of the central pixel and pixels on Bresenham's circle of radius 3.
+ * @param[in] non_max_suppression True if non-maxima suppresion is applied, false otherwise.
+ * @param[in] border_mode Strategy to use for borders.
+ */
+ void configure(CLCompileContext &compile_context, const ICLImage *input, ICLImage *output, float threshold, bool non_max_suppression, BorderMode border_mode);
// Inherited methods overridden
void run(const Window &window, cl::CommandQueue &queue) override;
* @param[out] num_buffers Number of keypoints to store the results.
*/
void configure(const ICLImage *input, bool update_number, ICLKeyPointArray *corners, cl::Buffer *num_buffers);
+ /** Initialise the kernel.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source image. Data types supported: U8.
+ * @param[in] update_number Flag to indicate whether we need to update the number of corners
+ * @param[out] corners Array of keypoints to store the results.
+ * @param[out] num_buffers Number of keypoints to store the results.
+ */
+ void configure(CLCompileContext &compile_context, const ICLImage *input, bool update_number, ICLKeyPointArray *corners, cl::Buffer *num_buffers);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
* @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
*/
void configure(ICLTensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue());
+ /** Initialise the kernel's input, output and border mode.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in,out] tensor Tensor to process Data types supported: U8/QASYMM8/S8/QASYMM8_SIGNED/U16/S16/U32/S32/F16/F32.
+ * @param[in] border_size Size of the border to fill in elements.
+ * @param[in] border_mode Border mode to use for the convolution.
+ * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+ */
+ void configure(CLCompileContext &compile_context, ICLTensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue());
/** Function to set the constant value on fill border kernel depending on type.
*
* w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input
*/
void configure(const ICLTensor *input, ICLTensor *output);
+ /** Set the input and output of the kernel.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input First input tensor to flatten with at least 3 dimensions.
+ * The dimensions above the third will be interpreted as batches. Data types supported: All.
+ * @param[out] output Output tensor with shape [w*h*d, input_batches] where:
+ * w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref CLFlattenLayerKernel
*
* @param[in] input First input tensor to flatten with at least 3 dimensions.
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
void configure(const ICLTensor *input_weights, const ICLTensor *bn_mean, const ICLTensor *bn_var, ICLTensor *fused_weights, ICLTensor *fused_bias,
const ICLTensor *input_bias = nullptr, const ICLTensor *bn_beta = nullptr, const ICLTensor *bn_gamma = nullptr,
float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION);
+ /** Set the source, destination of the kernel
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input_weights Input weights tensor for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC
+ * @param[in] bn_mean Batch normalization layer mean tensor. Same as @p input_weights
+ * @param[in] bn_var Batch normalization layer variance tensor. Same as @p input_weights
+ * @param[out] fused_weights Output fused weights tensor. It can be a nullptr in case of in-place computation. Same as @p input_weights
+ * @param[out] fused_bias Output fused bias tensor. It can be a nullptr in case of in-place computation and input_bias != nullptr. Same as @p input_weights
+ * @param[in] input_bias (Optional) Input bias tensor for convolution or depthwise convolution layer. It can be a nullptr in case the bias tensor is not required. Same as @p input_weights
+ * @param[in] bn_beta (Optional) Batch normalization layer beta tensor. It can be a nullptr in case the beta tensor is not required. Same as @p input_weights
+ * @note if nullptr, bn_beta is set to 0.0
+ * @param[in] bn_gamma (Optional) Batch normalization layer gamma tensor. It can be a nullptr in case the gamma tensor is not required. Same as @p input_weights
+ * @note if nullptr, bn_gamma is set to 1.0
+ * @param[in] epsilon (Optional) Batch normalization layer epsilon parameter. Defaults to 0.001f.
+ * @param[in] fbn_type (Optional) Fused batch normalization type. Defaults to CONVOLUTION.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input_weights, const ICLTensor *bn_mean, const ICLTensor *bn_var, ICLTensor *fused_weights, ICLTensor *fused_bias,
+ const ICLTensor *input_bias = nullptr, const ICLTensor *bn_beta = nullptr, const ICLTensor *bn_gamma = nullptr,
+ float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION);
/** Static function to check if given info will lead to a valid configuration of @ref CLFuseBatchNormalizationKernel
*
* @param[in] input_weights Input weights tensor info for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* @param[in] gemm_info (Optional) GEMM information used to retrieve the original dimensions of the input matrices
*/
void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMReshapeInfo &gemm_info = GEMMReshapeInfo());
+ /** Initialise the kernel's input and output.
+ *
+ * @note This kernel should be used ONLY for Midgard architectures
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: QASYMM8
+ * @param[in] input1 Input tensor containing the RHS matrix. Data type supported: same as @p input0
+ * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: S32
+ * @param[in] gemm_info (Optional) GEMM information used to retrieve the original dimensions of the input matrices
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMReshapeInfo &gemm_info = GEMMReshapeInfo());
/** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixMultiplyKernel
*
* @param[in] input0 Input tensor containing the LHS matrix. Data type supported: QASYMM8
* @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices
*/
void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, const GEMMReshapeInfo &gemm_info);
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: QASYMM8/QASYMM8_SIGNED
+ * @param[in] input1 Input tensor containing the RHS matrix. Data type supported: same as @p input0
+ * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: S32
+ * @param[in] lhs_info LHS matrix information used to retrieve the number of rows to be processed by each thread
+ * lhs_info.m0: 2,3,4,5,6,7,8
+ * lhs_info.k0: 2,3,4,8,16
+ * @param[in] rhs_info RHS matrix information used to retrieve the number of columns to be processed by each thread
+ * rhs_info.n0: 2,3,4,8,16
+ * rhs_info.k0: same as lhs_info.k0
+ * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info,
+ const GEMMReshapeInfo &gemm_info);
/** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixMultiplyNativeKernel
*
* @param[in] input0 Input tensor info for the LHS matrix. Data type supported: QASYMM8/QASYMM8_SIGNED
* @note lhs_info.k0 must be equal to rhs_info.k0
*/
void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, const GEMMReshapeInfo &gemm_info);
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input0 Input tensor containing the LHS reshaped matrix. Data type supported: QASYMM8/QASYMM8_SIGNED. The number of dimensions for the LHS matrix must be less or equal than 4.
+ * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3.
+ * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: S32
+ * @param[in] lhs_info LHS matrix information used for reshaping the input0 tensor. Only the following values are supported:
+ * lhs_info.m0: 2,3,4,5,6,7,8
+ * lhs_info.k0: 2,3,4,8,16
+ * lhs_info.transpose: false
+ * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported:
+ * rhs_info.n0: 2,3,4,8,16
+ * rhs_info.k0: same as lhs_info.k0
+ * rhs_info.transpose: true
+ * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices
+ *
+ * @note lhs_info.k0 must be equal to rhs_info.k0
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info,
+ const GEMMReshapeInfo &gemm_info);
/** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixMultiplyReshapedKernel
*
* @param[in] input0 Input tensor info containing the LHS reshaped matrix. Data type supported: QASYMM8/QASYMM8_SIGNED. The number of dimensions for the LHS matrix must be less or equal than 4.
*/
void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMKernelInfo &gemm_info, const ICLTensor *vector_sum_col = nullptr,
const ICLTensor *vector_sum_row = nullptr, const ICLTensor *bias = nullptr, const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr);
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: QASYMM8/QASYMM8_SIGNED
+ * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0
+ * @param[out] output Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/S32.
+ * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices, output stage information and RHS/LHS info.
+ * Only the following values are supported for LHS info:
+ * lhs_info.m0: 2,3,4,5,6,7,8
+ * lhs_info.k0: 2,3,4,8,16
+ * Only the following values are supported for RHS info:
+ * rhs_info.n0: 2,3,4,8,16
+ * rhs_info.k0: same as lhs_info.k0
+ * rhs_info.transpose: true
+ * @param[in] vector_sum_col (Optional) Input row-vector of sums of all the entries in each column of matrix B.
+ * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: S32
+ * @param[in] vector_sum_row (Optional) Input row-vector of sums of all the entries in each row of matrix A.
+ * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: S32
+ * @param[in] bias (Optional) Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
+ * Biases are 1D tensor with dimensions [OFM]. Data type supported: S32.
+ * @param[in] output_multipliers (Optional) Output multipliers tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM).
+ * Supported data types: S32.
+ * @param[in] output_shifts (Optional) Output shifts tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM).
+ * Supported data types: S32.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMKernelInfo &gemm_info, const ICLTensor *vector_sum_col = nullptr,
+ const ICLTensor *vector_sum_row = nullptr, const ICLTensor *bias = nullptr, const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr);
/** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel
*
* @param[in] input0 Input tensor info for the LHS matrix. Data type supported: QASYMM8/QASYMM8_SIGNED
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* @param[in] b_offset Offset to be added to each element of the matrix B.
*/
void configure(ICLTensor *mm_result, const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, const ICLTensor *bias, int32_t k, int32_t a_offset, int32_t b_offset);
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in, out] mm_result Input tensor containing the result of @ref CLGEMMLowpMatrixMultiplyKernel. Data type supported: S32
+ * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B.
+ * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result
+ * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A.
+ * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result
+ * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
+ * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
+ * @param[in] k Number of matrix A columns or Matrix B rows
+ * @param[in] a_offset Offset to be added to each element of the matrix A.
+ * @param[in] b_offset Offset to be added to each element of the matrix B.
+ */
+ void configure(CLCompileContext &compile_context, ICLTensor *mm_result, const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, const ICLTensor *bias, int32_t k, int32_t a_offset,
+ int32_t b_offset);
/** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpOffsetContributionKernel
*
* @param[in] mm_result Input tensor containing the result of @ref CLGEMMLowpOffsetContributionKernel. Data type supported: S32
*/
void configure(const ICLTensor *mm_result, const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, const ICLTensor *bias, ICLTensor *output, int32_t k, int32_t a_offset, int32_t b_offset,
const GEMMLowpOutputStageInfo &output_stage, const ICLTensor *output_multipliers, const ICLTensor *output_shifts);
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] mm_result Input tensor containing the result of @ref CLGEMMLowpMatrixMultiplyKernel. Data type supported: S32
+ * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B.
+ * Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result
+ * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A.
+ * Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result
+ * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
+ * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
+ * @param[out] output Output tensor. Data type supported: QASYMM8/QASYMM8_SIGNED.
+ * @param[in] k Number of matrix A columns or Matrix B rows
+ * @param[in] a_offset Offset to be added to each element of the matrix A.
+ * @param[in] b_offset Offset to be added to each element of the matrix B.
+ * @param[in] output_stage GEMMLowp output stage info
+ * @param[in] output_multipliers Output multipliers tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM).
+ * Supported data types: S32
+ * @param[in] output_shifts Output shifts tensor. In case of per-channel quantization, the number of multipliers must be equal to the number of filters (OFM).
+ * Supported data types: S32
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *mm_result, const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, const ICLTensor *bias, ICLTensor *output, int32_t k,
+ int32_t a_offset, int32_t b_offset,
+ const GEMMLowpOutputStageInfo &output_stage, const ICLTensor *output_multipliers, const ICLTensor *output_shifts);
/** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpOffsetContributionKernel
*
* @param[in] mm_result Input tensor containing the result of @ref CLGEMMLowpOffsetContributionKernel. Data type supported: S32
* @param[in] info Output stage info. Used to pass the quantized output data type
*/
void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo *info);
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Input tensor. Data type supported: S32
+ * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
+ * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
+ * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED
+ * @param[in] info Output stage info. Used to pass the quantized output data type
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo *info);
/** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel
*
* @param[in] input Input tensor. Data type supported: S32
* @param[in] output_stage GEMMLowp output stage metadata.
*/
void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo *output_stage);
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Input tensor. Data type supported: S32
+ * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
+ * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
+ * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED
+ * @param[in] output_stage GEMMLowp output stage metadata.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo *output_stage);
/** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ScaleKernel
*
* @param[in] input Input tensor. Data type supported: S32
/*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to 0.
*/
void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int min = 0, int max = 0);
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Input tensor. Data type supported: S32
+ * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
+ * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
+ * @param[out] output Output tensor. Data type supported: Data type supported: QSYMM16
+ * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
+ * @param[in] result_shift Integer value used to round to nearest division by a power-of-two the result after the fixed point multiplication
+ * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to 0.
+ * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QSYMM16.
+ * Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to 0.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int min = 0, int max = 0);
/** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel
*
* @param[in] input Input tensor info. Data type supported: S32
/*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
*/
void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift,
int min = 0, int max = 0);
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Input tensor. Data type supported: S32
+ * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
+ * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
+ * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8_SIGNED
+ * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
+ * @param[in] result_shift Integer value used to round to nearest division by a power-of-two the result after the fixed point multiplication
+ * @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8_SIGNED
+ * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED. Defaults to 0
+ * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED. Defaults to 0
+ * Along with @p min, this value can be used to implement "rectified linear unit" activation functions
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift,
+ int min = 0, int max = 0);
/** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel
*
* @param[in] input Input tensor. Data type supported: S32
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
*/
void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift,
int min = 0, int max = 0);
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Input tensor. Data type supported: S32
+ * @param[in] bias Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
+ * Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
+ * @param[out] output Output tensor. Data type supported: Data type supported: QASYMM8
+ * @param[in] result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
+ * @param[in] result_shift Integer value used to round to nearest division by a power-of-two the result after the fixed point multiplication
+ * @param[in] result_offset_after_shift Offset to be applied to result before converting it back to QASYMM8
+ * @param[in] min (Optional) Min value used to saturate down the output result before converting back to QASYMM8
+ * @param[in] max (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
+ * Along with @p min, this value can be used to implement "rectified linear unit" activation functions
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift,
+ int min = 0, int max = 0);
/** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel
*
* @param[in] input Input tensor. Data type supported: S32
* - mul_byscalar True if each reduced column/row must be multiplied by a scalar value.
*/
virtual void configure(const ICLTensor *input, ICLTensor *output, const GEMMLowpReductionKernelInfo &info) = 0;
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Input tensor. Data type supported: S8
+ * @param[out] output Output row-vector of sums of all the entries in each row/col of input tensor. Data type supported: S32
+ * @param[in] info Kernel metadata:
+ * - k Number of matrix columns/rows depending on the type of reduction.
+ * - is_reshaped True if the matrix has been reshaped.
+ * - scalar Scalar value to multiply each reduced column/row by.
+ * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value.
+ */
+ virtual void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const GEMMLowpReductionKernelInfo &info) = 0;
protected:
const ICLTensor *_input;
* - mul_byscalar True if each reduced column/row must be multiplied by a scalar value.
*/
void configure(const ICLTensor *mtx_a, ICLTensor *vector_sum_row, const GEMMLowpReductionKernelInfo &info) override;
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] mtx_a Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED
+ * @param[out] vector_sum_row Output row-vector of sums of all the entries in each row of mtx_a. Data type supported: S32
+ * @param[in] info Kernel metadata:
+ * - k Number of matrix columns/rows depending on the type of reduction.
+ * - is_reshaped True if the matrix has been reshaped.
+ * - scalar Scalar value to multiply each reduced column/row by.
+ * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *mtx_a, ICLTensor *vector_sum_row, const GEMMLowpReductionKernelInfo &info) override;
/** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixAReductionKernel
*
* @param[in] mtx_a Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED
* - mul_byscalar True if each reduced column/row must be multiplied by a scalar value.
*/
void configure(const ICLTensor *mtx_b, ICLTensor *vector_sum_col, const GEMMLowpReductionKernelInfo &info) override;
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] mtx_b Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED
+ * @param[out] vector_sum_col Output row-vector of sums of all the entries in each column of mtx_b. Data type supported: S32
+ * @param[in] info Kernel metadata:
+ * - k Number of matrix columns/rows depending on the type of reduction.
+ * - is_reshaped True if the matrix has been reshaped.
+ * - scalar Scalar value to multiply each reduced column/row by.
+ * - mul_byscalar True if each reduced column/row must be multiplied by a scalar value.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *mtx_b, ICLTensor *vector_sum_col, const GEMMLowpReductionKernelInfo &info) override;
/** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixBReductionKernel
*
* @param[in] mtx_b Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* @param[in] biases The shared biases tensor to append. It must be 1D tensor. Data types supported: Same as @p input
*/
void configure(ICLTensor *accum, const ICLTensor *biases);
+ /** Set the accumulate buffer and the biases of the kernel.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in, out] accum The accumulate tensor to convert. Data types supported: F16/F32
+ * @param[in] biases The shared biases tensor to append. It must be 1D tensor. Data types supported: Same as @p input
+ */
+ void configure(CLCompileContext &compile_context, ICLTensor *accum, const ICLTensor *biases);
/** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixAccumulateBiasesKernel
*
* @param[in] accum The accumulate tensor to convert. Data types supported: F16/F32
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
*/
void configure(const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta = 0.f,
bool is_interleaved_transposed = true, const GEMMReshapeInfo &reshape_info = GEMMReshapeInfo(), bool fp_mixed_precision = false, const ActivationLayerInfo &activation_info = ActivationLayerInfo());
+ /** Initialise the kernel's input, output and alpha
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input0 Input tensor containing the Matrix A. Data types supported: F16/F32
+ * @param[in] input1 Input tensor containing the Matrix B. Data type supported: same as @p input0
+ * @param[in] input2 Input tensor containing the Matrix C (bias). Can be nullptr. Data type supported: same as @p input0
+ * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0
+ * @param[in] alpha Weight of the matrix product
+ * @param[in] beta (Optional) Weight of vector C. Default value is 0. Only beta = 1 is currently supported.
+ * @param[in] is_interleaved_transposed (Optional) True if input0 and input1 have been reshaped respectively using @ref CLGEMMReshapeLHSMatrixKernel and @ref CLGEMMReshapeRHSMatrixKernel
+ * @param[in] reshape_info (Optional) GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped
+ * @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy
+ * @param[in] activation_info (Optional) Activation to apply after the matrix multiplication
+ *
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta = 0.f,
+ bool is_interleaved_transposed = true, const GEMMReshapeInfo &reshape_info = GEMMReshapeInfo(), bool fp_mixed_precision = false, const ActivationLayerInfo &activation_info = ActivationLayerInfo());
/** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixMultiplyKernel
*
* @param[in] input0 Input tensor containing the Matrix A info. Data types supported: F16/F32
/*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
void configure(const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info,
const GEMMRHSMatrixInfo &rhs_info,
const GEMMKernelInfo &gemm_info);
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input0 Input tensor for the LHS matrix. Data type supported: F32. The number of dimensions for the LHS matrix must be less or equal than 4.
+ * @param[in] input1 Input tensor for the RHS matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3.
+ * @param[in] input2 Input tensor containing the bias matrix. Data type supported: same as @p input0.
+ * @param[out] output Output tensor info. Data type supported: same as @p input0
+ * @param[in] alpha Weight of the matrix product
+ * @param[in] beta Weight of the matrix bias
+ * @param[in] lhs_info LHS matrix information used to retrieve the number of rows and accumulations to be processed by each thread. Only the following values are supported:
+ * lhs_info.m0: 1,2,3,4,5,6,7,8
+ * lhs_info.k0: 2,3,4,8,16
+ * @param[in] rhs_info RHS matrix information used to retrieve the number of columns and accumulations to be processed by each thread. Only the following values are supported:
+ * rhs_info.n0: 2,3,4,8,16
+ * rhs_info.k0: same of lhs_info.k0
+ * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta,
+ const GEMMLHSMatrixInfo &lhs_info,
+ const GEMMRHSMatrixInfo &rhs_info,
+ const GEMMKernelInfo &gemm_info);
/** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixMultiplyNativeKernel
*
* @param[in] input0 Input tensor info for the LHS matrix. Data type supported: F32. The number of dimensions for the LHS matrix must be less or equal than 4.
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
void configure(const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info,
const GEMMRHSMatrixInfo &rhs_info,
const GEMMKernelInfo &gemm_info);
+ /** Initialise the kernel's input and output.
+ *
+ * @note The F16 computation also supports mixed precision through the gemm_info.fp_mixed_precision flag.
+ * Mixed precision combines different floating precisions during the computation, in particular, F32 for the accumulations and F16 for the
+ * multiplications. i.e. float c = (half)a * (half)b
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input0 Input tensor containing the LHS reshaped matrix. Data type supported: F16/F32. The number of dimensions for the LHS matrix must be less or equal than 4
+ * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3
+ * @param[in] input2 Input tensor containing the bias matrix. Data type supported: same as @p input0.
+ * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0
+ * @param[in] alpha Weight of the matrix product
+ * @param[in] beta Weight of the matrix bias
+ * @param[in] lhs_info LHS matrix information used for reshaping the input0 tensor. Only the following values are supported:
+ * lhs_info.m0: 2,3,4,5,6,7,8
+ * lhs_info.k0: 2,3,4,8,16
+ * lhs_info.transpose: false
+ * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported:
+ * rhs_info.n0: 2,3,4,8,16
+ * rhs_info.k0: 2,3,4,8,16
+ * rhs_info.transpose: true
+ * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices
+ *
+ * @note lhs_info.k0 must be equal to rhs_info.k0
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta,
+ const GEMMLHSMatrixInfo &lhs_info,
+ const GEMMRHSMatrixInfo &rhs_info,
+ const GEMMKernelInfo &gemm_info);
/** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixMultiplyReshapedKernel
*
* @param[in] input0 Input tensor containing the LHS reshaped matrix. Data type supported: F16/F32. The number of dimensions for the LHS matrix must be less or equal than 4
/*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
void configure(const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info,
const GEMMRHSMatrixInfo &rhs_info,
const GEMMKernelInfo &gemm_info);
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input0 Input tensor containing the LHS matrix. Data type supported: F16/F32. The number of dimensions for the LHS matrix must be less or equal than 4.
+ * @param[in] input1 Input tensor containing the RHS reshaped matrix. Data type supported: same as @p input0. The number of dimensions for the RHS matrix must be less or equal than 3.
+ * @param[in] input2 Input tensor containing the bias matrix. Data type supported: same as @p input0.
+ * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0
+ * @param[in] alpha Weight of the matrix product
+ * @param[in] beta Weight of the matrix bias
+ * @param[in] lhs_info LHS matrix information used to retrieve the number of rows to be processed by each thread. Only the following values are supported:
+ * lhs_info.m0: 1,2,3,4,5,6,7,8
+ * @param[in] rhs_info RHS matrix information used for reshaping the input1 tensor. Only the following values are supported:
+ * rhs_info.k0: 2,3,4,8,16
+ * rhs_info.n0: 2,3,4,8,16
+ * rhs_info.transpose: true,false
+ * @param[in] gemm_info GEMM information used to retrieve the original dimensions of the input matrices
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta,
+ const GEMMLHSMatrixInfo &lhs_info,
+ const GEMMRHSMatrixInfo &rhs_info,
+ const GEMMKernelInfo &gemm_info);
/** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixMultiplyReshapedOnlyRHSKernel
*
* @param[in] input0 Input tensor info for the LHS matrix. Data type supported: F16/F32. The number of dimensions for the LHS matrix must be less or equal than 4.
* @param[out] output The output 2D tensor. Data types supported: Same as @p input, S32 for QASYMM8/QASYMM8_SIGNED.
*/
void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output);
+ /** Set the input and output of the kernel.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input0 The reshaped input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
+ * @param[in] input1 The 2D reshaped weights tensor. Data type supported: Same as @p input.
+ * @param[out] output The output 2D tensor. Data types supported: Same as @p input, S32 for QASYMM8/QASYMM8_SIGNED.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixVectorMultiplyKernel
*
* @param[in] input0 The reshaped input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
* @param[in] reinterpret_input_as_3d (Optional) True if the input has to be reinterpreted as 3D tensor
*/
void configure(const ICLTensor *input, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, bool reinterpret_input_as_3d = false);
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Input tensor. Data types supported: All
+ * @param[out] output Output tensor. Data type supported: same as @p input
+ * @param[in] lhs_info LHS matrix information to be used for reshaping. This object contains all the necessary
+ * information to reshape the input tensor. Only the following values are supported:
+ * lhs_info.m0: 2,3,4,5,6,7,8
+ * lhs_info.k0: 2,3,4,8,16
+ * lhs_info.v0: greater than 0
+ * lhs_info.transpose: true, false
+ * lhs_info.interleave: true, false
+ * @param[in] reinterpret_input_as_3d (Optional) True if the input has to be reinterpreted as 3D tensor
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, bool reinterpret_input_as_3d = false);
/** Static function to check if given info will lead to a valid configuration of @ref CLGEMMReshapeLHSMatrixKernel
*
* @param[in] input Input tensor info. Data types supported: All
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* rhs_info.interleave: true, false
*/
void configure(const ICLTensor *input, ICLTensor *output, const GEMMRHSMatrixInfo &rhs_info);
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Input tensor. Data types supported: All
+ * @param[out] output Output tensor. Data type supported: same as @p input
+ * @param[in] rhs_info RHS matrix information to be used for reshaping. This object contains all the necessary
+ * information to reshape the input tensor. Only the following values are supported:
+ * rhs_info.n0: 2,3,4,8,16
+ * rhs_info.k0: 1,2,3,4,8,16 (k0 = 1 only if rhs_info.transpose = false)
+ * rhs_info.h0: greater than 0
+ * rhs_info.transpose: true, false
+ * rhs_info.interleave: true, false
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const GEMMRHSMatrixInfo &rhs_info);
/** Static function to check if given info will lead to a valid configuration of @ref CLGEMMReshapeRHSMatrixKernel
*
* @param[in] input Input tensor info. Data types supported: All
* @param[in] axis (Optional) The axis in @p input to gather @p indices from. Negative values wrap around. Defaults to 0
*/
void configure(const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, int axis = 0);
+ /** Initialise the kernel's inputs and outputs
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Supported tensor rank: up to 4. Data type supported: All.
+ * @param[in] indices Indices tensor. Supported tensor rank: up to 1. Must be one of the following types: U32/S32. Each value must be in range [0, input.shape[@p axis])
+ * @param[out] output Destination tensor. Data type supported: Same as @p input
+ * @param[in] axis (Optional) The axis in @p input to gather @p indices from. Negative values wrap around. Defaults to 0
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, int axis = 0);
/** Static function to check if given info will lead to a valid configuration of @ref CLGatherKernel
*
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
*/
void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined);
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input An input tensor. Data types supported: U8
+ * @param[out] output The output tensor. Data types supported: U8.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined);
// Inherited methods overridden:
BorderSize border_size() const override;
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
*/
void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined);
+ /** Initialise the kernel's source, destination and border.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[out] output Destination tensor. Data types supported: S16.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined);
private:
//Make the configure method of the parent class private
* @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
*/
void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined);
+ /** Initialise the kernel's source, destination and border.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Input tensor(output of horizontal pass). Data types supported: S16.
+ * @param[out] output Destination tensor. Data types supported: U8.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined);
private:
//Make the configure method of the parent class private
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* @param[out] output Destination tensor. Output should have half the input width. Data types supported: U16.
*/
void configure(const ICLTensor *input, ICLTensor *output);
+ /** Initialise the kernel's source, destination and border mode.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[out] output Destination tensor. Output should have half the input width. Data types supported: U16.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
* @param[out] output Destination tensor. Output should have half the input height. Data types supported: U8.
*/
void configure(const ICLTensor *input, ICLTensor *output);
+ /** Initialise the kernel's source, destination and border mode.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: U16.
+ * @param[out] output Destination tensor. Output should have half the input height. Data types supported: U8.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
/*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
*
*/
void configure(const ICLTensor *anchors, ICLTensor *all_anchors, const ComputeAnchorsInfo &info);
+ /** Set the input and output tensors.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] anchors Source tensor. Original set of anchors of size (4, A), where A is the number of anchors. Data types supported: QSYMM16/F16/F32
+ * @param[out] all_anchors Destination tensor. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input
+ * @param[in] info Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo
+ *
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *anchors, ICLTensor *all_anchors, const ComputeAnchorsInfo &info);
/** Static function to check if given info will lead to a valid configuration of @ref CLComputeAllAnchorsKernel
*
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* @param[in] hog_info HOG's metadata
*/
void configure(const ICLTensor *input_magnitude, const ICLTensor *input_phase, ICLTensor *output, const HOGInfo *hog_info);
+ /** Initialise the kernel's inputs, output and HOG's metadata
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input_magnitude Input tensor which stores the magnitude of the gradient for each pixel. Data type supported: S16.
+ * @param[in] input_phase Input tensor which stores the phase of the gradient for each pixel. Data type supported: U8
+ * @param[out] output Output tensor which stores the local HOG for each cell. DataType supported: F32. Number of channels supported: equal to the number of histogram bins per cell
+ * @param[in] hog_info HOG's metadata
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input_magnitude, const ICLTensor *input_phase, ICLTensor *output, const HOGInfo *hog_info);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
* @param[in] hog_info HOG's metadata
*/
void configure(const ICLTensor *input, ICLTensor *output, const HOGInfo *hog_info);
+ /** Initialise the kernel's input, output and HOG's metadata
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Input tensor which stores the local HOG for each cell. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per cell
+ * @param[out] output Output tensor which stores the normalised blocks. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block
+ * @param[in] hog_info HOG's metadata
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const HOGInfo *hog_info);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
*/
void configure(const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, cl::Buffer *num_detection_windows, const Size2D &detection_window_stride, float threshold = 0.0f,
uint16_t idx_class = 0);
+ /** Initialise the kernel's input, HOG data-object, detection window, the stride of the detection window, the threshold and index of the object to detect
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Input tensor which stores the HOG descriptor obtained with @ref CLHOGOrientationBinningKernel. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block
+ * @param[in] hog HOG data object used by @ref CLHOGOrientationBinningKernel and @ref CLHOGBlockNormalizationKernel
+ * @param[out] detection_windows Array of @ref DetectionWindow. This array stores all the detected objects
+ * @param[in] num_detection_windows Number of detected objects
+ * @param[in] detection_window_stride Distance in pixels between 2 consecutive detection windows in x and y directions.
+ * It must be multiple of the hog->info()->block_stride()
+ * @param[in] threshold (Optional) Threshold for the distance between features and SVM classifying plane
+ * @param[in] idx_class (Optional) Index of the class used for evaluating which class the detection window belongs to
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, cl::Buffer *num_detection_windows,
+ const Size2D &detection_window_stride, float threshold = 0.0f,
+ uint16_t idx_class = 0);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue);
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
void configure(const ICLImage *input1, const ICLImage *input2, ICLImage *output,
int32_t block_size, float norm_factor, float strength_thresh, float sensitivity,
bool border_undefined);
+ /** Setup the kernel parameters
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input1 Source image (gradient X). Data types supported S16, S32. (Must be the same as input2)
+ * @param[in] input2 Source image (gradient Y). Data types supported S16, S32. (Must be the same as input1)
+ * @param[out] output Destination image (harris score). Data types supported F32
+ * @param[in] block_size The block window size used to compute the Harris Corner score. Supports: 3, 5 and 7
+ * @param[in] norm_factor Normalization factor to use accordingly with the gradient size (Must be different from 0)
+ * @param[in] strength_thresh Minimum threshold with which to eliminate Harris Corner scores (computed using the normalized Sobel kernel).
+ * @param[in] sensitivity Sensitivity threshold k from the Harris-Stephens equation.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(CLCompileContext &compile_context, const ICLImage *input1, const ICLImage *input2, ICLImage *output,
+ int32_t block_size, float norm_factor, float strength_thresh, float sensitivity,
+ bool border_undefined);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
*
*/
void configure(const ICLTensor *input, unsigned int height_offset, ICLTensor *output);
+ /** Initialise the kernel's inputs and output
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Input tensor. Data types supported: All.
+ * @param[in] height_offset The starting offset on the Y axis for the output tensor.
+ * @param[out] output Output tensor. Data types supported: Same as @p input.
+ *
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, unsigned int height_offset, ICLTensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref CLHeightConcatenateLayerKernel
*
* @param[in] input Input tensor info. Data types supported: All.
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* @param[out] output Destination distribution.
*/
void configure(const ICLImage *input, ICLDistribution1D *output);
+ /** Initialise the kernel's input, output and border mode.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source image. Data types supported: U8.
+ * @param[out] output Destination distribution.
+ */
+ void configure(CLCompileContext &compile_context, const ICLImage *input, ICLDistribution1D *output);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
* @param[out] output Destination distribution.
*/
void configure(const ICLImage *input, ICLDistribution1D *output);
+ /** Initialise the kernel's input, output and border mode.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source image. Data types supported: U8.
+ * @param[out] output Destination distribution.
+ */
+ void configure(CLCompileContext &compile_context, const ICLImage *input, ICLDistribution1D *output);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
*/
void configure(const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation = Size2D(1U, 1U),
unsigned int num_groups = 1);
+ /** Set the input and output of the kernel.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
+ * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
+ * @param[out] output The output tensor. First 2 lower dimensions represent a transform of each 3D input,
+ * while every dimension above represents a batch. Data types supported: Same as @p input
+ * @param[in] kernel_dims The kernel dimensions (width and height).
+ * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
+ * @param[in] has_bias In case biases are provided expands the matrix with 1.
+ * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+ * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias,
+ const Size2D &dilation = Size2D(1U, 1U),
+ unsigned int num_groups = 1);
/** Static function to check if given info will lead to a valid configuration of @ref CLIm2ColKernel
*
* @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
* @param[in] info Kernel meta-data descriptor
*/
void configure(ICLTensor *input, ICLTensor *output, const InstanceNormalizationLayerKernelInfo &info);
+ /** Set the input and output tensors.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in, out] input Source tensor. Data types supported: F16/F32. Data layout supported: NCHW, NHWC
+ * In case of @p output tensor = nullptr this tensor will store the result of the normalization.
+ * @param[out] output Destination tensor. Data types and data layouts supported: same as @p input.
+ * @param[in] info Kernel meta-data descriptor
+ */
+ void configure(CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const InstanceNormalizationLayerKernelInfo &info);
/** Static function to check if given info will lead to a valid configuration of @ref CLInstanceNormalizationLayer.
*
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* @param[out] output Destination tensor, Data types supported: U32.
*/
void configure(const ICLTensor *input, ICLTensor *output);
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input An input tensor. Data types supported: U8
+ * @param[out] output Destination tensor, Data types supported: U32.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
};
/** Interface to run the vertical pass of the integral image kernel. */
* @param[in,out] in_out The input/output tensor. Data types supported: U32
*/
void configure(ICLTensor *in_out);
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in,out] in_out The input/output tensor. Data types supported: U32
+ */
+ void configure(CLCompileContext &compile_context, ICLTensor *in_out);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* @param[in] epsilon Lower bound value for the normalization.
*/
void configure(const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, int axis, float epsilon);
+ /** Set the input and output tensors.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC.
+ * @param[in] sum Sum values tensor. Data types supported: same as @p input.
+ * Sum will have the same number of dimensions as input.
+ * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input.
+ * Output will have the same number of dimensions as input.
+ * @param[in] axis Axis along which to reduce. Negative values wrap around. Maximum supported actual reduction axis : 2
+ * @param[in] epsilon Lower bound value for the normalization.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, int axis, float epsilon);
/** Static function to check if given info will lead to a valid configuration of @ref CLL2NormalizeLayerKernel.
*
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
void configure(const ICLKeyPointArray *old_points, const ICLKeyPointArray *new_points_estimates,
ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal,
bool use_initial_estimate, size_t level, size_t num_levels, float pyramid_scale);
+ /** Initialise the kernel input and output
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] old_points Pointer to the @ref ICLKeyPointArray storing old key points
+ * @param[in] new_points_estimates Pointer to the @ref ICLKeyPointArray storing new estimates key points
+ * @param[out] old_points_internal Pointer to the array of internal @ref CLLKInternalKeypoint old points
+ * @param[out] new_points_internal Pointer to the array of internal @ref CLLKInternalKeypoint new points
+ * @param[in] use_initial_estimate The flag to indicate whether the initial estimated position should be used
+ * @param[in] level The pyramid level
+ * @param[in] num_levels The number of pyramid levels
+ * @param[in] pyramid_scale Scale factor used for generating the pyramid
+ */
+ void configure(CLCompileContext &compile_context, const ICLKeyPointArray *old_points, const ICLKeyPointArray *new_points_estimates,
+ ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal,
+ bool use_initial_estimate, size_t level, size_t num_levels, float pyramid_scale);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
* @param[out] new_points Pointer to the @ref ICLKeyPointArray storing new key points
*/
void configure(ICLLKInternalKeypointArray *new_points_internal, ICLKeyPointArray *new_points);
+ /** Initialise the kernel input and output
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] new_points_internal Pointer to the array of internal @ref CLLKInternalKeypoint new points
+ * @param[out] new_points Pointer to the @ref ICLKeyPointArray storing new key points
+ */
+ void configure(CLCompileContext &compile_context, ICLLKInternalKeypointArray *new_points_internal, ICLKeyPointArray *new_points);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal,
ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival,
size_t window_dimension, size_t level);
+ /** Initialise the kernel input and output
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] old_input Pointer to the input old tensor. Data types supported: U8
+ * @param[in] old_scharr_gx Pointer to the input scharr X tensor. Data types supported: S16
+ * @param[in] old_scharr_gy Pointer to the input scharr Y tensor. Data types supported: S16
+ * @param[in] old_points_internal Pointer to the array of CLLKInternalKeypoint old points
+ * @param[in, out] new_points_internal Pointer to the array of CLLKInternalKeypoint new points
+ * @param[out] coeff_table Pointer to the array holding the Spatial Gradient coefficients
+ * @param[out] old_ival Pointer to the array holding internal values
+ * @param[in] window_dimension The size of the window on which to perform the algorithm
+ * @param[in] level The pyramid level
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *old_input, const ICLTensor *old_scharr_gx, const ICLTensor *old_scharr_gy,
+ ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal,
+ ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival,
+ size_t window_dimension, size_t level);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
*/
void configure(const ICLTensor *new_input, ICLLKInternalKeypointArray *new_points_internal, ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival,
Termination termination, float epsilon, size_t num_iterations, size_t window_dimension, size_t level);
+ /** Initialise the kernel input and output
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] new_input Pointer to the input new tensor. Data types supported: U8
+ * @param[in, out] new_points_internal Pointer to the array of CLLKInternalKeypoint for new points
+ * @param[in] coeff_table Pointer to the array holding the Spatial Gradient coefficients
+ * @param[in] old_ival Pointer to the array holding internal values
+ * @param[in] termination The criteria to terminate the search of each keypoint.
+ * @param[in] epsilon The error for terminating the algorithm
+ * @param[in] num_iterations The maximum number of iterations before terminating the algorithm
+ * @param[in] window_dimension The size of the window on which to perform the algorithm
+ * @param[in] level The pyramid level
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *new_input, ICLLKInternalKeypointArray *new_points_internal, ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival,
+ Termination termination, float epsilon, size_t num_iterations, size_t window_dimension, size_t level);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* @param[out] output Output tensor to store the result. Data type supported: same as @p input0
*/
void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output);
+ /** Initialise the kernel's input, output and alpha
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input0 First input tensor. Data types supported: F32
+ * @param[in] input1 Second input tensor. Data type supported: same as @p input0
+ * @param[out] output Output tensor to store the result. Data type supported: same as @p input0
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref CLLocallyConnectedMatrixMultiplyKernel
*
* @param[in] input0 First input tensor info. Data types supported: F32
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
*/
void configure(const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase,
MagnitudeType mag_type = MagnitudeType::L2NORM, PhaseType phase_type = PhaseType::SIGNED);
+ /** Initialise the kernel's input, output.
+ *
+ * @note At least one of output1 or output2 must be set.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] gx The input gradient X tensor. Data types supported: S16.
+ * @param[in] gy The input gradient Y tensor. Data types supported: S16.
+ * @param[out] magnitude (Optional) The output tensor - Magnitude. Data types supported: S16.
+ * @param[out] phase (Optional) The output tensor - Phase. Data types supported: U8.
+ * @param[in] mag_type (Optional) Magnitude calculation type. Default: L2NORM.
+ * @param[in] phase_type (Optional) Phase calculation type. Default: SIGNED.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase,
+ MagnitudeType mag_type = MagnitudeType::L2NORM, PhaseType phase_type = PhaseType::SIGNED);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* @param[out] global_sum_squared (Optional if stddev is not set, required if stddev is set) Keeps global sum of squared pixel values (Buffer size: 1 cl_ulong).
*/
void configure(const ICLImage *input, float *mean, cl::Buffer *global_sum, float *stddev = nullptr, cl::Buffer *global_sum_squared = nullptr);
+ /** Initialise the kernel's input and outputs.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Input image. Data types supported: U8.
+ * @param[out] mean Input average pixel value.
+ * @param[out] global_sum Keeps global sum of pixel values (Buffer size: 1 cl_ulong).
+ * @param[out] stddev (Optional) Output standard deviation of pixel values.
+ * @param[out] global_sum_squared (Optional if stddev is not set, required if stddev is set) Keeps global sum of squared pixel values (Buffer size: 1 cl_ulong).
+ */
+ void configure(CLCompileContext &compile_context, const ICLImage *input, float *mean, cl::Buffer *global_sum, float *stddev = nullptr, cl::Buffer *global_sum_squared = nullptr);
/** Static function to check if given info will lead to a valid configuration of @ref CLMeanStdDevKernel.
*
* @param[in] input Input image info. Data types supported: U8.
/*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* @param[in] epsilon (Optional) Small float to avoid division by zero in case of zero standard deviation. Defaults to 1e-8.
*/
void configure(ICLTensor *input, ICLTensor *output = nullptr, float epsilon = 1e-8f);
+ /** Initialise the kernel's input and outputs.
+ *
+ * @note If the output tensor is a nullptr, the normalization will be performed in-place.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in, out] input Source tensor with 2 dimensions. In case of @p output tensor = nullptr,
+ * this tensor will store the result of the normalization. Data types supported: F16/F32.
+ * @param[out] output (Optional) Destination tensor. It can be nullptr in case of in-place computation. Data type supported: same as @p input
+ * @param[in] epsilon (Optional) Small float to avoid division by zero in case of zero standard deviation. Defaults to 1e-8.
+ */
+ void configure(CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output = nullptr, float epsilon = 1e-8f);
/** Static function to check if given info will lead to a valid configuration of @ref CLMeanStdDevNormalizationKernel
*
* @param[in] input Source tensor info with 2 dimensions. In case of @p output tensor info = nullptr,
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
*/
void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined);
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input An input tensor. Data types supported: U8
+ * @param[out] output The output tensor. Data types supported: U8.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined);
// Inherited methods overridden:
BorderSize border_size() const override;
* @param[in] window Window to be used in case setting only part of a tensor. Default is nullptr.
*/
void configure(ICLTensor *tensor, const PixelValue &constant_value, Window *window = nullptr);
+ /** Initialise the kernel's tensor and filling value
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in,out] tensor Input tensor to fill. Supported data types: All.
+ * @param[in] constant_value The value used to fill the planes of the tensor
+ * @param[in] window Window to be used in case setting only part of a tensor. Default is nullptr.
+ */
+ void configure(CLCompileContext &compile_context, ICLTensor *tensor, const PixelValue &constant_value, Window *window = nullptr);
/** Static function to check if given info will lead to a valid configuration of @ref CLMemsetKernel
*
* @param[in] tensor Source tensor info. Data types supported: All.
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* The dimensions over the second must match the batched dimensions of the input tensor. Data types supported: F32.
*/
void configure(const ICLTensor *input, ICLTensor *output);
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Input tensor with at least 3 dimensions. The dimensions over the third will be interpreted as batches.Data types supported: F32.
+ * @param[out] output Output tensor with shape [2, batches, ...] which stores the minimum and maximum values for each 3D input tensor.
+ * The dimensions over the second must match the batched dimensions of the input tensor. Data types supported: F32.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref CLMinMaxLayerKernel
*
* @param[in] input Input tensor info. Data types supported: F32.
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* @param[out] min_max Buffer of 2 elements to store the min value at position 0 and the max value at position 1. Data type supported: S32 if input type is U8/S16, F32 if input type is F32.
*/
void configure(const ICLImage *input, cl::Buffer *min_max);
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Input Image. Data types supported: U8/S16/F32.
+ * @param[out] min_max Buffer of 2 elements to store the min value at position 0 and the max value at position 1. Data type supported: S32 if input type is U8/S16, F32 if input type is F32.
+ */
+ void configure(CLCompileContext &compile_context, const ICLImage *input, cl::Buffer *min_max);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
*/
void configure(const ICLImage *input, cl::Buffer *min_max, cl::Buffer *min_max_count,
ICLCoordinates2DArray *min_loc = nullptr, ICLCoordinates2DArray *max_loc = nullptr);
+ /** Initialise the kernel's input and outputs.
+ *
+ * @note When locations of min and max occurrences are requested, the reported number of locations is limited to the given array size.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Input image. Data types supported: U8/S16/F32.
+ * @param[out] min_max Buffer of 2 elements to store the min value at position 0 and the max value at position 1. Data type supported: S32 if input type is U8/S16, F32 if input type is F32.
+ * @param[out] min_max_count Buffer of 2 elements to store the min value occurrences at position 0 and the max value occurrences at position 1. Data type supported: S32
+ * @param[out] min_loc (Optional) Array of Coordinates2D used to store minimum value locations.
+ * @param[out] max_loc (Optional) Array of Coordinates2D used to store maximum value locations.
+ */
+ void configure(CLCompileContext &compile_context, const ICLImage *input, cl::Buffer *min_max, cl::Buffer *min_max_count,
+ ICLCoordinates2DArray *min_loc = nullptr, ICLCoordinates2DArray *max_loc = nullptr);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
void configure(const ICLTensor *input, ICLTensor *output, NonLinearFilterFunction function,
unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask,
bool border_undefined);
+ /** Set the source, destination and border mode of the kernel
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: U8
+ * @param[out] output Destination tensor. Data types supported: U8
+ * @param[in] function Non linear function to perform
+ * @param[in] mask_size Mask size. Supported sizes: 3, 5
+ * @param[in] pattern Mask pattern
+ * @param[in] mask The given mask. Will be used only if pattern is specified to PATTERN_OTHER
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, NonLinearFilterFunction function,
+ unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask,
+ bool border_undefined);
// Inherited methods overridden:
BorderSize border_size() const override;
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
*/
void configure(const ICLTensor *input, ICLTensor *output, bool border_undefined);
+ /** Initialise the kernel's sources, destinations and border mode.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: U8, F32. (Must be the same as the output tensor)
+ * @param[out] output Destination tensor. Data types supported: U8, F32. (Must be the same as the input tensor)
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined);
// Inherited methods overridden:
BorderSize border_size() const override;
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters.
*/
void configure(const ICLTensor *input, ICLTensor *output, NormalizationLayerInfo norm_info);
+ /** Set the input and output tensors.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM],
+ * and an optional 4th dimension for batch of inputs. Data types supported: F16/F32. Data layouts supported: NCHW/NHWC.
+ * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data types supported: same as @p input.
+ * Data layouts supported: same as @p input.
+ * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, NormalizationLayerInfo norm_info);
/** Static function to check if given info will lead to a valid configuration of @ref CLNormalizationLayerKernel
*
* @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM],
* Data types supported: same as @p input
*/
void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *std);
+ /** Set the input and output tensors.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, channels].
+ * Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[out] output Destination tensor. Data type supported: same as @p input
+ * @param[in] mean Mean values tensor. 1 dimension with size equal to the number of input channels. Data types supported: same as @p input
+ * @param[in] std Standard deviation values tensor. 1 dimension with size equal to the number of input channels.
+ * Data types supported: same as @p input
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *std);
/** Static function to check if given info will lead to a valid configuration of @ref CLNormalizePlanarYUVLayerKernel
*
* @param[in] input Source tensor info. 3 lower dimensions represent a single input with dimensions [width, height, channels].
* or reflect the input, either including the border values (SYMMETRIC) or not (REFLECT).
*/
void configure(const ICLTensor *input, ICLTensor *output, const PaddingList &padding, PixelValue constant_value = PixelValue(), PaddingMode mode = PaddingMode::CONSTANT);
+ /** Set the input and output tensor.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: All.
+ * @param[out] output Output tensor. Data type supported: same as @p input
+ * @param[in] padding The padding for each spatial dimension of the input tensor. The pair padding[i]
+ * specifies the front and the end padding in the i-th dimension.
+ * @param[in] constant_value (Optional) Constant value to be used for the padding.
+ * @param[in] mode (Optional) Controls whether the padding should be filled with @p constant_value using CONSTANT,
+ * or reflect the input, either including the border values (SYMMETRIC) or not (REFLECT).
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PaddingList &padding, PixelValue constant_value = PixelValue(),
+ PaddingMode mode = PaddingMode::CONSTANT);
/** Static function to check if given info will lead to a valid configuration of @ref CLPadLayerKernel
*
* @param[in] input Source tensor info. Data types supported: All.
* @param[in] perm Permutation vector
*/
void configure(const ICLTensor *input, ICLTensor *output, const PermutationVector &perm);
+ /** Set the input and output of the kernel.
+ *
+ * @note Arbitrary permutation vectors are supported with rank not greater than 4
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input The input tensor to permute. Data types supported: All.
+ * @param[in] output The output tensor. Data types supported: Same as @p input
+ * @param[in] perm Permutation vector
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PermutationVector &perm);
/** Static function to check if given info will lead to a valid configuration of @ref CLPermuteKernel
*
* @note Arbitrary permutation vectors are supported with rank not greater than 4
*/
void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float scale,
ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ /** Initialise the kernel's input, output and border mode.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
+ * @param[in] input2 An input tensor. Data types supported: same as @p input1.
+ * @param[out] output The output tensor, Data types supported: same as @p input1. Note: U8 requires both inputs to be U8.
+ * @param[in] scale Scale to apply after multiplication.
+ * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15.
+ * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate
+ * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest even.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float scale,
+ ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
/** Static function to check if given info will lead to a valid configuration of @ref CLPixelWiseMultiplicationKernel
*
* @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32.
* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
*/
void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ /** Initialise the kernel's input, output and border mode.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input1 An input tensor. Data types supported: F32. Number of channels supported: 2.
+ * @param[in] input2 An input tensor. Data types supported: same as @p input1. Number of channels supported: same as @p input1.
+ * @param[out] output The output tensor, Data types supported: same as @p input1. Number of channels supported: same as @p input1.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
/** Static function to check if given info will lead to a valid configuration of @ref CLComplexPixelWiseMultiplicationKernel
*
* @param[in] input1 An input tensor info. Data types supported: F32. Number of channels supported: 2.
* @param[out] indices (optional) The indices of the maximal values. Data type supported: U32.
*/
void configure(const ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info, ICLTensor *indices = nullptr);
+ /** Set the input and output tensors.
+ *
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[out] output Destination tensor. Data types supported: Same as @p input.
+ * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
+ * @param[out] indices (optional) The indices of the maximal values. Data type supported: U32.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info, ICLTensor *indices = nullptr);
/** Static function to check if given info will lead to a valid configuration of @ref CLPoolingLayerKernel
*
* @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* @param[in] aspect_ratios Aspect ratio values
*/
void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const PriorBoxLayerInfo &info, cl::Buffer *min, cl::Buffer *max, cl::Buffer *aspect_ratios);
+ /** Set the input and output tensors.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input1 First source tensor. Data types supported: F32. Data layouts supported: NCHW/NHWC.
+ * @param[in] input2 Second source tensor. Data types and layouts supported: same as @p input1
+ * @param[out] output Destination tensor. Output dimensions are [W * H * num_priors * 4, 2]. Data types and layouts supported: same as @p input1
+ * @param[in] info Prior box layer info.
+ * @param[in] min Minimum prior box values
+ * @param[in] max Maximum prior box values
+ * @param[in] aspect_ratios Aspect ratio values
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const PriorBoxLayerInfo &info, cl::Buffer *min, cl::Buffer *max,
+ cl::Buffer *aspect_ratios);
/** Static function to check if given info will lead to a valid configuration of @ref CLPriorBoxLayerKernel
*
* @param[in] input1 First source tensor info. Data types supported: F32. Data layouts supported: NCHW/NHWC.
* @note Output auto initialization is not supported by this kernel
*/
void configure(const ICLTensor *input, ICLTensor *output);
+ /** Set the input, output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16.
+ * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16.
+ *
+ * @note Output auto initialization is not supported by this kernel
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref CLQuantizationLayerKernel
*
* @param[in] input Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16.
* @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array.
*/
void configure(const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info);
+ /** Set the input and output tensors.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner
+ * as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ].
+ * Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8/QASYMM8_SIGNED, otherwise same as @p input
+ * @param[out] output Destination tensor. Data types supported: Same as @p input.
+ * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo.
+ *
+ * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled
+ * width and pooled height.
+ * @note The z dimensions of @p output tensor and @p input tensor must be the same.
+ * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info);
/** Static function to check if given info will lead to a valid configuration of @ref CLROIAlignLayerKernel
*
* @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array.
*/
void configure(const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info);
+ /** Set the input and output tensors.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: F16/F32.
+ * @param[in] rois ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner
+ * as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ]. Data types supported: U16
+ * @param[out] output Destination tensor. Data types supported: Same as @p input.
+ * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo.
+ *
+ * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled
+ * width and pooled height.
+ * @note The z dimensions of @p output tensor and @p input tensor must be the same.
+ * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* @param[in] step The gap between each pair of values in the sequence.
*/
void configure(ICLTensor *output, float start, float end, float step);
+ /** Initialize the kernel's output tensor, start, end and step of the sequence.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[out] output Output tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
+ * @param[in] start The starting value of the sequence.
+ * @param[in] end The ending (not including) value of the sequence.
+ * @param[in] step The gap between each pair of values in the sequence.
+ */
+ void configure(CLCompileContext &compile_context, ICLTensor *output, float start, float end, float step);
/** Static function to check if given info will lead to a valid configuration of @ref CLRangeKernel
*
* @param[in] output Output tensor info. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
* @param[in] width (Optional) In case of x-axis we also need to provide the width of the input image.
*/
void configure(const ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op, unsigned int width = 0);
+ /** Set the input and output tensors.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/S32/F16/F32.
+ * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input.
+ * Output will have the same number of dimensions as input.
+ * @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1,2,3
+ * @param[in] op Reduction operation to perform. Operations supported: MEAN_SUM, PROD, SUM_SQUARE, SUM, MIN, MAX
+ * @param[in] width (Optional) In case of x-axis we also need to provide the width of the input image.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op, unsigned int width = 0);
/** Static function to check if given info will lead to a valid configuration of @ref CLReductionOperationKernel.
*
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
*/
void configure(const ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output, InterpolationPolicy policy, bool border_undefined);
+ /** Initialize the kernel's input, output and border mode.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[in] map_x Map for X coordinates. Data types supported: F32.
+ * @param[in] map_y Map for Y coordinates. Data types supported: F32.
+ * @param[out] output Destination tensor. Data types supported: U8. All but the lowest two dimensions must be the same size as in the input tensor, i.e. remapping is only performed within the XY-plane.
+ * @param[in] policy The interpolation type.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output, InterpolationPolicy policy, bool border_undefined);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
* It defines the spatial distance between 2 consecutive pixels in the x and y direction
*/
void configure(const ICLTensor *input, ICLTensor *output, int32_t stride);
+ /** Initialize the kernel's input, output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: U8/S8/QASYMM8/QASYMM8_SIGNED/U16/S16/F16/U32/S32/F32.
+ * @param[out] output Destination tensor with tensor shape:
+ * [width_input / stride, height_input / stride, channels_input * stride * stride, batch_size]. This means the output has
+ * the same number of input elements. Data types supported: same as @p input.
+ * @param[in] stride Stride value to use for reorganizing the values in the output tensor.
+ * It defines the spatial distance between 2 consecutive pixels in the x and y direction
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t stride);
/** Static function to check if given info will lead to a valid configuration of @ref CLReorgLayerKernel
*
* @param[in] input Source tensor. Data types supported: All.
* @param[out] output Destination tensor. Data type supported: Same as @p input
*/
void configure(const ICLTensor *input, ICLTensor *output);
+ /** Set the input and output of the kernel
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data type supported: All.
+ * @param[out] output Destination tensor. Data type supported: Same as @p input
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref CLReshapeLayerKernel
*
* @param[in] axis Axis tensor. Contains the indices of the dimensions to reverse. Data type supported: U32
*/
void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *axis);
+ /** Initialise the kernel's inputis and output
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Input tensor. Data types supported: All.
+ * @param[out] output Output tensor. Data type supported: Same as @p input
+ * @param[in] axis Axis tensor. Contains the indices of the dimensions to reverse. Data type supported: U32
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *axis);
/** Static function to check if given info will lead to a valid configuration of @ref CLReverseKernel
*
* @param[in] align_corners (Optional) Align corners of input and output, only affecting bilinear policy with TOP_LEFT sampling policy. Defaults to false.
*/
void configure(const ICLTensor *input, ICLTensor *output, InterpolationPolicy policy, BorderMode border_mode, SamplingPolicy sampling_policy = SamplingPolicy::CENTER, bool align_corners = false);
+ /** Initialise the kernel's inputs, output and interpolation policy
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32
+ * @param[out] output Destination tensor. Data types supported: Same as @p input
+ * All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
+ * @param[in] policy Interpolation type to use
+ * @param[in] border_mode Selected border mode.
+ * @param[in] sampling_policy (Optional) Sampling policy used by the interpolation. Defaults to @ref SamplingPolicy::CENTER
+ * @param[in] align_corners (Optional) Align corners of input and output, only affecting bilinear policy with TOP_LEFT sampling policy. Defaults to false.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, InterpolationPolicy policy, BorderMode border_mode,
+ SamplingPolicy sampling_policy = SamplingPolicy::CENTER, bool align_corners = false);
/** Static function to check if given info will lead to a valid configuration of @ref CLScaleKernel
*
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
*/
void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
+ /** Initialise the kernel's source, destination and border.
+ *
+ * @note At least one of output_x or output_y must be set.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16.
+ * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
* @param[in] output Output tensor. Data types supported: Same as @p x.
*/
void configure(const ICLTensor *c, const ICLTensor *x, const ICLTensor *y, ICLTensor *output);
+ /** Initialise the kernel's inputs and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] c Condition input tensor. Data types supported: U8.
+ * @param[in] x First input tensor. Data types supported: All.
+ * @param[out] y Second input tensor. Data types supported: Same as @p x
+ * @param[in] output Output tensor. Data types supported: Same as @p x.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *c, const ICLTensor *x, const ICLTensor *y, ICLTensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref CLSelectKernel
*
* @param[in] c Condition input tensor. Data types supported: U8.
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
*/
void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
+ /** Initialise the kernel's source, destination and border.
+ *
+ * @note At least one of output_x or output_y must be set.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16.
+ * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
*/
void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
+ /** Initialise the kernel's source, destination and border.
+ *
+ * @note At least one of output_x or output_y must be set.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16.
+ * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
* @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
*/
void configure(const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
+ /** Initialise the kernel's source, destination and border.
+ *
+ * @note At least one of output_x or output_y must be set and the corresponding input.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input_x (Optional) Input for X (X output of horizontal pass). Data types supported: S16.
+ * @param[in] input_y (Optional) Input for Y (Y output of horizontal pass). Data types supported: S16.
+ * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S16.
+ * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S16.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
*/
void configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
+ /** Initialise the kernel's source, destination and border.
+ *
+ * @note At least one of output_x or output_y must be set.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S32.
+ * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S32.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
* @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
*/
void configure(const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
+ /** Initialise the kernel's source, destination and border.
+ *
+ * @note At least one of output_x or output_y must be set and the corresponding input.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input_x (Optional) Input for X (X output of horizontal pass). Data types supported: S32.
+ * @param[in] input_y (Optional) Input for Y (Y output of horizontal pass). Data types supported: S32.
+ * @param[out] output_x (Optional) Destination tensor for the X gradient, Data types supported: S32.
+ * @param[out] output_y (Optional) Destination tensor for the Y gradient, Data types supported: S32.
+ * @param[in] border_undefined True if the border mode is undefined. False if it's replicate or constant.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* @param[out] output Destination tensor. Data types supported: same as @p input
*/
void configure(const ICLTensor *input, ICLTensor *output);
+ /** Set the input and output tensors.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32
+ * @param[out] output Destination tensor. Data types supported: same as @p input
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref CLLogits1DMaxKernel
*
* @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32
* @param[in] beta (Optional) A scaling factor for the exponent. Defaults to 1.0
*/
void configure(const ICLTensor *input, const ICLTensor *max, ICLTensor *output, ICLTensor *sum, float beta = 1.0f);
+ /** Set the input and output tensors.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32
+ * @param[in] max Max values tensor. Data types supported: same as @p input
+ * @param[out] output Destination tensor. Data types supported: S32 for QASYMM8 @p input, or same as @p input
+ * @param[out] sum Sum of 1D logits tensor. Data types supported: S32 for QASYMM8 @p input, or same as @p input
+ * @param[in] beta (Optional) A scaling factor for the exponent. Defaults to 1.0
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *max, ICLTensor *output, ICLTensor *sum, float beta = 1.0f);
/** Static function to check if given info will lead to a valid configuration of @ref CLLogits1DShiftExpSumKernel
*
* @param[in] input Source tensor. Data types supported: QASYMM8/F16/F32
* @param[in] info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo.
*/
void configure(const ICLTensor *input, ICLTensor *max, ICLTensor *output, ICLTensor *sum, const SoftmaxKernelInfo &info);
+ /** Set the input and output tensors.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: F16/F32
+ * @param[in,out] max Max values tensor. Data types supported: same as @p input
+ * @param[out] output Destination tensor. Data types supported: same as @p input
+ * @param[out] sum Sum of 1D logits tensor. Data types supported: same as @p input
+ * @param[in] info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *max, ICLTensor *output, ICLTensor *sum, const SoftmaxKernelInfo &info);
/** Static function to check if given info will lead to a valid configuration of @ref CLLogits1DMaxShiftExpSumKernel
*
* @param[in] input Source tensor. Data types supported: F16/F32
* @param[in] info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo.
*/
void configure(const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, const SoftmaxKernelInfo &info);
+ /** Set the input and output tensors.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: S32/F16/F32
+ * @param[in] sum Sum tensor. Dimensions should be dim(input)-1. Data types supported: same as @p input
+ * @param[out] output Destination tensor. Data types supported: QASYMM8 for S32 @p input, or same as @p input
+ * @param[in] info Contains information consumed by kernels for softmax described in @ref SoftmaxKernelInfo.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, const SoftmaxKernelInfo &info);
/** Static function to check if given info will lead to a valid configuration of @ref CLLogits1DNormKernel
*
* @param[in] input Source tensor. Data types supported: S32/F16/F32
* @param[out] output Tensor output. Data types supported: same as @p input
*/
void configure(const ICLTensor *input, const ICLTensor *block_shape, const ICLTensor *paddings, ICLTensor *output);
+ /** Initialise the kernel's inputs and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
+ * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32
+ * @param[in] paddings 2-D tensor with shape [2, M]. Data types supported: S32
+ * @param[out] output Tensor output. Data types supported: same as @p input
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *block_shape, const ICLTensor *paddings, ICLTensor *output);
/** Initialise the kernel's input and output. (Static block shape and paddings)
*
* @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
* @param[out] output Tensor output. Data types supported: same as @p input
*/
void configure(const ICLTensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, ICLTensor *output);
+ /** Initialise the kernel's input and output. (Static block shape and paddings)
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
+ * @param[in] block_shape_x Block shape x value.
+ * @param[in] block_shape_y Block shape y value.
+ * @param[in] padding_left The left padding of the output tensor.
+ * @param[in] padding_right The right padding of the output tensor.
+ * @param[out] output Tensor output. Data types supported: same as @p input
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, ICLTensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref CLSpaceToBatchLayerKernel
*
* @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
* @param[in] block_shape Block shape value.
*/
void configure(const ICLTensor *input, ICLTensor *output, int32_t block_shape);
+ /** Initialise the kernel's inputs and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Tensor input. Supported tensor rank: 4. Data types supported: All.
+ * @param[out] output Tensor output. Data types supported: same as @p input
+ * @param[in] block_shape Block shape value.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t block_shape);
/** Static function to check if given info will lead to a valid configuration of @ref CLSpaceToDepthLayerKernel.
*
* @param[in] input Tensor input info. Supported tensor rank: 4. Data types supported: All.
*
*/
void configure(const ICLTensor *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, ICLTensor *output);
+ /** Initialise the kernel's inputs and output
+ *
+ * @note Supported input tensor rank: up to 4
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Input tensor. Data types supported: All.
+ * @param[in] axis The dimension to stack the tensors along. It must be smaller than the number of input dimensions.
+ * @param[in] idx_input Index of the input tensor in the list of tensors to stack.
+ * All tensors in the list must have the same shape
+ * @param[in] num_tensors Number of tensors to stack
+ * @param[out] output Output tensor. Data types supported: Same as @p input.
+ *
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, ICLTensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref CLStackLayerKernel
*
* @note Supported input tensor rank: up to 4
void configure(const ICLTensor *input, ICLTensor *output,
const Coordinates &starts, const Coordinates &ends, const BiStrides &strides,
int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask);
+ /** Configure kernel
+ *
+ * @note Supported tensor rank: up to 4
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data type supported: All.
+ * @param[out] output Destination tensor. Data type supported: Same as @p input
+ * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input).
+ * @param[in] ends The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input).
+ * @param[in] strides The strides of the dimensions of the input tensor to be sliced. The length must be of rank(input).
+ * @param[in] begin_mask If the ith bit of begin_mask is set, starts[i] is ignored and the fullest possible range in that dimension is used instead.
+ * @param[in] end_mask If the ith bit of end_mask is set, ends[i] is ignored and the fullest possible range in that dimension is used instead.
+ * @param[in] shrink_axis_mask If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1.
+ * A slice of size 1 starting from starts[i] in the dimension must be preserved.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output,
+ const Coordinates &starts, const Coordinates &ends, const BiStrides &strides,
+ int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask);
/** Static function to check if given info will lead to a valid configuration of @ref CLStridedSliceKernel
*
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* @param[out] output The output tensor. Data types supported: U8, S16.
*/
void configure(const ICLTensor *input, const ICLLut *lut, ICLTensor *output);
+ /** Initialise the kernel's input, lut and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input An input tensor. Data types supported: U8, S16.
+ * @param[in] lut The input LUT. Data types supported: U8, S16.
+ * @param[out] output The output tensor. Data types supported: U8, S16.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLLut *lut, ICLTensor *output);
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_CLTABLELOOKUPKERNEL_H */
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
*/
void configure(const ICLTensor *input, ICLTensor *output, uint8_t threshold,
uint8_t false_value, uint8_t true_value, ThresholdType type, uint8_t upper);
+ /**Initialise the kernel's input, output and threshold parameters.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input An input tensor. Data types supported: U8
+ * @param[out] output The output tensor. Data types supported: U8.
+ * @param[in] threshold Threshold. When the threshold type is RANGE, this is used as the lower threshold.
+ * @param[in] false_value value to set when the condition is not respected.
+ * @param[in] true_value value to set when the condition is respected.
+ * @param[in] type Thresholding type. Either RANGE or BINARY.
+ * @param[in] upper Upper threshold. Only used when the thresholding type is RANGE.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, uint8_t threshold,
+ uint8_t false_value, uint8_t true_value, ThresholdType type, uint8_t upper);
};
} // namespace arm_compute
#endif /*ARM_COMPUTE_NETHRESHOLDKERNEL_H */
*
*/
void configure(const ICLTensor *input, ICLTensor *output, const Multiples &multiples);
+ /** Set the source, destination of the kernel
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data type supported: All.
+ * @param[in] multiples Contains the number of times the input tensor should be replicated on the given dimension.
+ * Cannot have more than 4 elements (tiling in dimensions greater than 4 is not supported).
+ * @param[out] output Destination tensor. Same as @p input
+ *
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Multiples &multiples);
/** Static function to check if given info will lead to a valid configuration of @ref CLTileKernel
*
* @param[in] input Source tensor info. Data type supported: All.
* @param[out] output Output tensor. Data type supported: Same as @p input
*/
void configure(const ICLTensor *input, ICLTensor *output);
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Input tensor. Data types supported: All.
+ * @param[out] output Output tensor. Data type supported: Same as @p input
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref CLTransposeKernel
*
* @param[in] input Input tensor. Data types supported: All.
* @param[in] upsampling_policy Defines the policy to fill the intermediate pixels.
*/
void configure(const ICLTensor *input, ICLTensor *output, const Size2D &info, const InterpolationPolicy upsampling_policy);
+ /** Initialise the kernel's input and output.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+ * @param[out] output Destination tensor. Data types supported: same as @p input.
+ * @param[in] info Contains stride information described in @ref Size2D.
+ * @param[in] upsampling_policy Defines the policy to fill the intermediate pixels.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Size2D &info, const InterpolationPolicy upsampling_policy);
/** Static function to check if given info will lead to a valid configuration of @ref CLUpsampleLayerKernel
*
* @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* @param[in] policy The interpolation type.
*/
void configure(const ICLTensor *input, ICLTensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy);
+ /** Initialize the function's source, destination, interpolation policy and border_mode.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[out] output Destination tensor, Data types supported: U8.
+ * @param[in] matrix The perspective matrix. Must be 2x3 of type float
+ * The matrix argument requires 9 values, the last 3 values are ignored.
+ * @param[in] policy The interpolation type.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy);
// Inherited methods overridden:
BorderSize border_size() const override;
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* @param[in] policy The interpolation type.
*/
void configure(const ICLTensor *input, ICLTensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy);
+ /** Initialize the function's source, destination, interpolation policy and border_mode.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. Data types supported: U8.
+ * @param[out] output Destination tensor, Data types supported: U8.
+ * @param[in] matrix The perspective matrix. Must be 3x3 of type float.
+ * @param[in] policy The interpolation type.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy);
// Inherited methods overridden:
BorderSize border_size() const override;
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* Number of groups greater than one are only supported for NCHW data layout, and the number of weights must be a multiple of it.
*/
void configure(const ICLTensor *input, const ICLTensor *biases, ICLTensor *output, unsigned int num_groups = 1);
+ /** Set the input and output of the kernel.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared,
+ * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared. Data types supported: All
+ * @param[in] biases The shared biases tensor to append. Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with
+ * dimensions [OFM, num_patches] if unshared. Data types supported: F16/F32, for quantized types this must be nullptr.
+ * @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types.
+ * @param[out] output The output tensor. Should be a 2D Tensor if there are no groups and the weights are not shared; a 3D Tensor otherwise.
+ * Data types supported: Same as @p input
+ * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout
+ * Number of groups greater than one are only supported for NCHW data layout, and the number of weights must be a multiple of it.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *biases, ICLTensor *output, unsigned int num_groups = 1);
/** Static function to check if given info will lead to a valid configuration of @ref CLWeightsReshapeKernel
*
* @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared,
* @param[out] output Output tensor. Data types supported: Same as @p input1.
*/
void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
+ /** Initialise the kernel's input1s and output
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input1 First input tensor. Data types supported: All.
+ * @param[in] input2 Second input tensor. Data types supported: same as @p input1
+ * @param[out] output Output tensor. Data types supported: Same as @p input1.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref CLWidthConcatenate2TensorsKernel
*
* @param[in] input1 First tensor info. Data types supported: All.
* @param[out] output Output tensor. Data types supported: Same as @p input1.
*/
void configure(const ICLTensor *input1, const ICLTensor *input2, const ICLTensor *input3, const ICLTensor *input4, ICLTensor *output);
+ /** Initialise the kernel's input1s and output
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input1 First input tensor. Data types supported: All.
+ * @param[in] input2 Second input tensor. Data types supported: same as @p input1
+ * @param[in] input3 Third input tensor. Data types supported: same as @p input1
+ * @param[in] input4 Fourth input tensor. Data types supported: same as @p input1
+ * @param[out] output Output tensor. Data types supported: Same as @p input1.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, const ICLTensor *input3, const ICLTensor *input4, ICLTensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref CLWidthConcatenate4TensorsKernel
*
* @param[in] input1 First tensor info. Data types supported: All.
*
*/
void configure(const ICLTensor *input, unsigned int width_offset, ICLTensor *output);
+ /** Initialise the kernel's inputs and output
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Input tensor. Data types supported: All.
+ * @param[in] width_offset The offset on the X axis.
+ * @param[in,out] output Output tensor. Data types supported: Same as @p input.
+ *
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, unsigned int width_offset, ICLTensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref CLWidthConcatenateLayerKernel
*
* @param[in] input Input tensor info. Data types supported: All.
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
*/
void configure(const ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info);
+ /** Set the input and output tensor.
+ *
+ * @note Winograd filter transform supports the following configurations for NCWH data layout
+ * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3),
+ * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
+ * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
+ *
+ * @note Winograd filter transform supports the following configurations for NHWC data layout
+ * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
+ * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
+ *
+ * Strides: only unit strides
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. The input is a 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] (NCHW data layout) or [IFM, kernel_x, kernel_y, OFM] (NHWC data layout). Data types supported: F16/F32.
+ * @param[out] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_filter_transform_shape. Data types supported: Same as @p input
+ * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info);
/** Static function to check if given info will lead to a valid configuration of @ref CLWinogradFilterTransformKernel
*
* @note Winograd filter transform supports the following configurations for NCWH data layout
/*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo.
*/
void configure(const ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info);
+ /** Set the input and output of the kernel.
+ *
+ * @note Winograd input transform supports the following configurations for NCWH data layout
+ * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3),
+ * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
+ * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
+ *
+ * @note Winograd input transform supports the following configurations for NHWC data layout
+ * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
+ * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
+ *
+ * Strides: only unit strides
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input The input tensor to transform. Data types supported: F16/F32
+ * @param[in] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_input_transform_shape. Data types supported: Same as @p input
+ * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info);
/** Static function to check if given info will lead to a valid configuration of @ref CLWinogradInputTransformKernel
*
* @note Winograd input transform supports the following configurations for NCWH data layout
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
*/
void configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const WinogradInfo &winograd_info, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ /** Set the input and output tensor.
+ *
+ * @note Winograd output transform supports the following configurations for NCWH data layout
+ * F(output tile, kernel size):F(2x2, 3x3), F(2x1, 3x1), F(1x2, 1x3),
+ * F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
+ * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
+ *
+ * @note Winograd output transform supports the following configurations for NHWC data layout
+ * F(output tile, kernel size):F(4x4, 3x3), F(4x1, 3x1), F(1x4, 1x3),
+ * F(4x4, 5x5), F(4x1, 5x1), F(1x4, 1x5)
+ *
+ * Strides: only unit strides
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor with shape [C, N, K, batches]. Data types supported: F16/F32.
+ * @param[in] bias Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. It can be a nullptr. Data type supported: as @p input
+ * @param[out] output The output tensor. The shape for this tensor can be calculated using the utility function @p compute_winograd_output_transform_shape. Data types supported: Same as @p input
+ * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
+ */
+ void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const WinogradInfo &winograd_info,
+ const ActivationLayerInfo &act_info = ActivationLayerInfo());
/** Static function to check if given info will lead to a valid configuration of @ref CLWinogradOutputTransformKernel
*
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* @param[in] num_classes Number of classes to activate (must be submultiple of @p input channels)
*/
void configure(ICLTensor *input, ICLTensor *output, const ActivationLayerInfo &act_info, int32_t num_classes);
+ /** Set the input and output tensor.
+ *
+ * @note If the output tensor is a nullptr, the activation function will be performed in-place
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result
+ * of the activation function. Data types supported: F16/F32.
+ * @param[out] output Destination tensor. Data type supported: same as @p input
+ * @param[in] act_info Activation layer information.
+ * @param[in] num_classes Number of classes to activate (must be submultiple of @p input channels)
+ */
+ void configure(CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const ActivationLayerInfo &act_info, int32_t num_classes);
/** Static function to check if given info will lead to a valid configuration of @ref CLYOLOLayerKernel
*
* @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
virtual void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U),
const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) = 0;
+ /** Initialize the function's source, destination, conv and border_size.
+ *
+ * @param[in] compile_context The compile context to be used.
+ * @param[in] input Source tensor. DataType supported: QASYMM8/F16/F32.
+ * @param[in] weights Weights tensor. A 3D tensor with dimensions [3, 3, IFM].
+ * Data type supported: Same as @p input, QASYMM8/QSYMM8_PER_CHANNEL when input is QASYMM8.
+ * @param[in] biases Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
+ * Data type supported: Same as @p input, S32 when input is QASYMM8.
+ * @param[out] output Destination tensor. Data type supported: Same as @p input.
+ * @param[in] conv_info Padding and stride information to use for the convolution.
+ * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
+ * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported for QASYMM8.
+ * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
+ * @param[in] output_multipliers (Optional) Output multipliers tensor for quantized computations. In case of per-channel quantization,
+ * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
+ * @param[in] output_shifts (Optional) Output shifts tensor for quantized computations. In case of per-channel quantization,
+ * the number of multipliers must be equal to the number of filters (IFM). Supported data types: S32
+ */
+ virtual void configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info,
+ unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U),
+ const ICLTensor *output_multipliers = nullptr, const ICLTensor *output_shifts = nullptr) = 0;
protected:
BorderSize _border_size;
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
}
void CLAbsoluteDifferenceKernel::configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output);
+}
+
+void CLAbsoluteDifferenceKernel::configure(CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8, DataType::S16);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8, DataType::S16);
build_opts.insert("-DDATA_TYPE_OUT=" + get_cl_type_from_data_type(output->info()->data_type()));
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("absdiff", build_opts));
+ _kernel = create_kernel(compile_context, "absdiff", build_opts);
// Configure kernel window
constexpr unsigned int num_elems_processed_per_iteration = 16;
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
} // namespace
void CLAccumulateKernel::configure(const ICLTensor *input, ICLTensor *accum)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, accum);
+}
+
+void CLAccumulateKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *accum)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(accum, 1, DataType::S16);
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("accumulate"));
+ _kernel = create_kernel(compile_context, "accumulate");
// Make sure _kernel is initialized before calling the parent's configure
ICLSimple2DKernel::configure(input, accum, num_elems_processed_per_iteration);
}
void CLAccumulateWeightedKernel::configure(const ICLTensor *input, float alpha, ICLTensor *accum)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, alpha, accum);
+}
+
+void CLAccumulateWeightedKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, float alpha, ICLTensor *accum)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(accum, 1, DataType::U8);
ARM_COMPUTE_ERROR_ON(alpha < 0.0 || alpha > 1.0);
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("accumulate_weighted"));
+ _kernel = create_kernel(compile_context, "accumulate_weighted");
// Set static kernel arguments
unsigned int idx = 2 * num_arguments_per_2D_tensor(); //Skip the input and output parameters
}
void CLAccumulateSquaredKernel::configure(const ICLTensor *input, uint32_t shift, ICLTensor *accum)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, shift, accum);
+}
+
+void CLAccumulateSquaredKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, uint32_t shift, ICLTensor *accum)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(accum, 1, DataType::S16);
ARM_COMPUTE_ERROR_ON(shift > 15);
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("accumulate_squared"));
+ _kernel = create_kernel(compile_context, "accumulate_squared");
// Set static kernel arguments
unsigned int idx = 2 * num_arguments_per_2D_tensor(); //Skip the input and output parameters
}
} // namespace
-CLActivationLayerKernel::CLActivationLayerKernel(CLCoreRuntimeContext *ctx)
- : _input(nullptr), _output(nullptr), _run_in_place(false), _ctx(ctx)
+CLActivationLayerKernel::CLActivationLayerKernel()
+ : _input(nullptr), _output(nullptr), _run_in_place(false)
{
}
void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output, act_info);
+}
+
+void CLActivationLayerKernel::configure(CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input);
}
// Create kernel
- _kernel = create_opencl_kernel(_ctx, kernel_name, build_opts);
+ _kernel = create_kernel(compile_context, kernel_name, build_opts.options());
+
// Make sure _kernel is initialized before calling the parent's configure
_input = input;
_output = output;
}
void CLArgMinMaxLayerKernel::configure(const ICLTensor *input, const ICLTensor *prev_output, ICLTensor *output, unsigned int axis, ReductionOperation op)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, prev_output, output, axis, op);
+}
+
+void CLArgMinMaxLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *prev_output, ICLTensor *output, unsigned int axis, ReductionOperation op)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), (prev_output != nullptr) ? prev_output->info() : nullptr, output->info(), axis, op));
default:
ARM_COMPUTE_ERROR("Not supported");
}
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("arg_min_max_" + kernel_axis_name, build_opts.options()));
+ _kernel = create_kernel(compile_context, "arg_min_max_" + kernel_axis_name, build_opts.options());
// Configure kernel window
ICLKernel::configure_internal(std::get<1>(win_config), lws_hint);
}
void CLBatchConcatenateLayerKernel::configure(const ICLTensor *input, unsigned int batch_offset, ICLTensor *output)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, batch_offset, output);
+}
+
+void CLBatchConcatenateLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, unsigned int batch_offset, ICLTensor *output)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), batch_offset, output->info()));
}
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("concatenate", build_opts.options()));
+ _kernel = create_kernel(compile_context, "concatenate", build_opts.options());
// Configure kernel window
auto win_config = validate_and_configure_window(input->info(), batch_offset, output->info());
void CLBatchNormalizationLayerKernel::configure(ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta, const ICLTensor *gamma,
float epsilon, ActivationLayerInfo act_info)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output, mean, var, beta, gamma, epsilon, act_info);
+}
+
+void CLBatchNormalizationLayerKernel::configure(CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta,
+ const ICLTensor *gamma,
+ float epsilon, ActivationLayerInfo act_info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, mean, var);
build_opts.add_option_if(gamma == nullptr, "-DUSE_DEFAULT_GAMMA");
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("batchnormalization_layer_" + lower_string(string_from_data_layout(input->info()->data_layout())), build_opts.options()));
+ _kernel = create_kernel(compile_context, "batchnormalization_layer_" + lower_string(string_from_data_layout(input->info()->data_layout())), build_opts.options());
// Set kernel static arguments
unsigned int include_output = (!_run_in_place) ? 1 : 0;
}
void CLBatchToSpaceLayerKernel::configure(const ICLTensor *input, const ICLTensor *block_shape, ICLTensor *output)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, block_shape, output);
+}
+
+void CLBatchToSpaceLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *block_shape, ICLTensor *output)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), block_shape->info(), output->info()));
build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
build_opts.add_option("-DBATCH_SIZE=" + support::cpp11::to_string(input->info()->dimension(3)));
build_opts.add_option("-DWIDTH_IN=" + support::cpp11::to_string(input->info()->dimension(idx_width)));
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("batch_to_space_" + lower_string(string_from_data_layout(input->info()->data_layout())), build_opts.options()));
+ _kernel = create_kernel(compile_context, "batch_to_space_" + lower_string(string_from_data_layout(input->info()->data_layout())), build_opts.options());
// Configure kernel window
Window win = calculate_max_window(*input->info(), Steps());
}
void CLBatchToSpaceLayerKernel::configure(const ICLTensor *input, const int32_t block_shape_x, const int32_t block_shape_y, ICLTensor *output)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, block_shape_x, block_shape_y, output);
+}
+
+void CLBatchToSpaceLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, const int32_t block_shape_x, const int32_t block_shape_y, ICLTensor *output)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
build_opts.add_option("-DBLOCK_SHAPE_X=" + support::cpp11::to_string(block_shape_x));
build_opts.add_option("-DBLOCK_SHAPE_Y=" + support::cpp11::to_string(block_shape_y));
build_opts.add_option("-DWIDTH_IN=" + support::cpp11::to_string(input->info()->dimension(idx_width)));
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("batch_to_space_static_" + lower_string(string_from_data_layout(input->info()->data_layout())), build_opts.options()));
+ _kernel = create_kernel(compile_context, "batch_to_space_static_" + lower_string(string_from_data_layout(input->info()->data_layout())), build_opts.options());
// Configure kernel window
Window win = calculate_max_window(*input->info(), Steps());
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
{
}
void CLBitwiseAndKernel::configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output);
+}
+
+void CLBitwiseAndKernel::configure(CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8);
_output = output;
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("bitwise_and"));
+ _kernel = create_kernel(compile_context, "bitwise_and");
// Configure kernel window
constexpr unsigned int num_elems_processed_per_iteration = 16;
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
using namespace arm_compute;
void CLBitwiseNotKernel::configure(const ICLTensor *input, ICLTensor *output)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output);
+}
+
+void CLBitwiseNotKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
_output = output;
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("bitwise_not"));
+ _kernel = create_kernel(compile_context, "bitwise_not");
// Configure kernel window
constexpr unsigned int num_elems_processed_per_iteration = 16;
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
}
void CLBitwiseOrKernel::configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output);
+}
+
+void CLBitwiseOrKernel::configure(CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8);
_output = output;
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("bitwise_or"));
+ _kernel = create_kernel(compile_context, "bitwise_or");
// Configure kernel window
constexpr unsigned int num_elems_processed_per_iteration = 16;
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
}
void CLBitwiseXorKernel::configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output);
+}
+
+void CLBitwiseXorKernel::configure(CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8);
_output = output;
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("bitwise_xor"));
+ _kernel = create_kernel(compile_context, "bitwise_xor");
// Configure kernel window
constexpr unsigned int num_elems_processed_per_iteration = 16;
}
void CLBoundingBoxTransformKernel::configure(const ICLTensor *boxes, ICLTensor *pred_boxes, const ICLTensor *deltas, const BoundingBoxTransformInfo &info)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), boxes, pred_boxes, deltas, info);
+}
+
+void CLBoundingBoxTransformKernel::configure(CLCompileContext &compile_context, const ICLTensor *boxes, ICLTensor *pred_boxes, const ICLTensor *deltas, const BoundingBoxTransformInfo &info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(boxes, pred_boxes, deltas);
auto_init_if_empty(*pred_boxes->info(), deltas->info()->clone()->set_data_type(boxes->info()->data_type()).set_quantization_info(boxes->info()->quantization_info()));
// Create kernel
const std::string kernel_name = (is_quantized) ? "bounding_box_transform_quantized" : "bounding_box_transform";
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts.options());
// Since the number of columns is a multiple of 4 by definition, we don't need to pad the tensor
const unsigned int num_elems_processed_per_iteration = 4;
/*
- * Copyright (c) 2016-2018 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
}
void CLBox3x3Kernel::configure(const ICLTensor *input, ICLTensor *output, bool border_undefined)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output, border_undefined);
+}
+
+void CLBox3x3Kernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
};
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("convolution3x3_static", build_opts));
+ _kernel = create_kernel(compile_context, "convolution3x3_static", build_opts);
// Configure kernel window
constexpr unsigned int num_elems_processed_per_iteration = 8;
}
void CLGradientKernel::configure(const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase, int32_t norm_type)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), gx, gy, magnitude, phase, norm_type);
+}
+
+void CLGradientKernel::configure(CLCompileContext &compile_context, const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase, int32_t norm_type)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(gx, 1, DataType::S16, DataType::S32);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(gy, 1, DataType::S16, DataType::S32);
// Create kernel
const std::string kernel_name = (norm_type == 1) ? std::string("combine_gradients_L1") : std::string("combine_gradients_L2");
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, built_opts));
+ _kernel = create_kernel(compile_context, kernel_name, built_opts);
// Configure kernel window
constexpr unsigned int num_elems_processed_per_iteration = 4;
}
void CLEdgeNonMaxSuppressionKernel::configure(const ICLTensor *magnitude, const ICLTensor *phase, ICLTensor *output, int32_t lower_thr, bool border_undefined)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), magnitude, phase, output, lower_thr, border_undefined);
+}
+
+void CLEdgeNonMaxSuppressionKernel::configure(CLCompileContext &compile_context, const ICLTensor *magnitude, const ICLTensor *phase, ICLTensor *output, int32_t lower_thr, bool border_undefined)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(magnitude, 1, DataType::U16, DataType::U32);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(phase, 1, DataType::U8);
// Create kernel
const std::string kernel_name = std::string("suppress_non_maximum");
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, built_opts));
+ _kernel = create_kernel(compile_context, kernel_name, built_opts);
// Set minimum threshold argument
unsigned int idx = 3 * num_arguments_per_2D_tensor(); //Skip the input and output parameters
void CLEdgeTraceKernel::configure(const ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr,
ICLTensor *visited, ICLTensor *recorded, ICLTensor *l1_stack, ICLTensor *l1_stack_counter)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output, upper_thr, lower_thr, visited, recorded, l1_stack, l1_stack_counter);
+}
+
+void CLEdgeTraceKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr,
+ ICLTensor *visited, ICLTensor *recorded, ICLTensor *l1_stack, ICLTensor *l1_stack_counter)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U16, DataType::U32);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
// Create kernel
const std::string kernel_name = std::string("hysteresis");
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, built_opts));
+ _kernel = create_kernel(compile_context, kernel_name, built_opts);
// Set constant kernel args
unsigned int width = _input->info()->dimension(0);
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
}
void CLChannelCombineKernel::configure(const ICLTensor *plane0, const ICLTensor *plane1, const ICLTensor *plane2, const ICLTensor *plane3, ICLTensor *output)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), plane0, plane1, plane2, plane3, output);
+}
+
+void CLChannelCombineKernel::configure(CLCompileContext &compile_context, const ICLTensor *plane0, const ICLTensor *plane1, const ICLTensor *plane2, const ICLTensor *plane3, ICLTensor *output)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(plane0, plane1, plane2, output);
ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(plane0);
// Create kernel
std::string kernel_name = "channel_combine_" + string_from_format(output_format);
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name));
+ _kernel = create_kernel(compile_context, kernel_name);
// Configure window
Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration));
}
void CLChannelCombineKernel::configure(const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), plane0, plane1, plane2, output);
+}
+
+void CLChannelCombineKernel::configure(CLCompileContext &compile_context, const ICLImage *plane0, const ICLImage *plane1, const ICLImage *plane2, ICLMultiImage *output)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(plane0, plane1, plane2, output);
ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(plane0);
}
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts);
// Configure window
Window win = calculate_max_window(*plane0->info(), Steps(num_elems_processed_per_iteration));
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
}
void CLChannelExtractKernel::configure(const ICLTensor *input, Channel channel, ICLTensor *output)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, channel, output);
+}
+
+void CLChannelExtractKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, Channel channel, ICLTensor *output)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_ERROR_ON(input == output);
// Create kernel
std::string kernel_name = "channel_extract_" + string_from_format(format);
std::set<std::string> build_opts = { ("-DCHANNEL_" + string_from_channel(channel)) };
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts);
// Configure window
Window win = calculate_max_window(*input->info(), Steps(_num_elems_processed_per_iteration));
}
void CLChannelExtractKernel::configure(const ICLMultiImage *input, Channel channel, ICLImage *output)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, channel, output);
+}
+
+void CLChannelExtractKernel::configure(CLCompileContext &compile_context, const ICLMultiImage *input, Channel channel, ICLImage *output)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(output);
kernel_name = "channel_extract_" + string_from_format(format);
build_opts.insert(("-DCHANNEL_" + string_from_channel(channel)));
}
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts);
// Configure window
Window win = calculate_max_window(*input_plane->info(), Steps(_num_elems_processed_per_iteration));
}
void CLChannelShuffleLayerKernel::configure(const ICLTensor *input, ICLTensor *output, unsigned int num_groups)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output, num_groups);
+}
+
+void CLChannelShuffleLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, unsigned int num_groups)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
// Create kernel
std::string kernel_name = "channel_shuffle_" + lower_string(string_from_data_layout(data_layout));
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts.options());
// Configure kernel window
auto win_config = validate_and_configure_window(input->info(), output->info());
}
void CLCol2ImKernel::configure(const ICLTensor *input, ICLTensor *output, const Size2D &convolved_dims, unsigned int num_groups)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output, convolved_dims, num_groups);
+}
+
+void CLCol2ImKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Size2D &convolved_dims, unsigned int num_groups)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
build_opts.add_option("-DWIDTH_OUTPUT=" + support::cpp11::to_string(_convolved_dims.width));
build_opts.add_option("-DNUM_GROUPS=" + support::cpp11::to_string(num_groups));
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("col2im", build_opts.options()));
+ _kernel = create_kernel(compile_context, "col2im", build_opts.options());
// Configure kernel window
auto win_config = validate_and_configure_window(input->info(), output->info(), _convolved_dims, num_groups);
}
void CLColorConvertKernel::configure(const ICLTensor *input, ICLTensor *output)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output);
+}
+
+void CLColorConvertKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output)
{
ARM_COMPUTE_ERROR_ON(input == nullptr);
ARM_COMPUTE_ERROR_ON(output == nullptr);
_output = output;
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name.str()));
+ _kernel = create_kernel(compile_context, kernel_name.str());
// Configure kernel window
Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
}
void CLColorConvertKernel::configure(const ICLMultiImage *input, ICLImage *output)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output);
+}
+
+void CLColorConvertKernel::configure(CLCompileContext &compile_context, const ICLMultiImage *input, ICLImage *output)
{
ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(output);
ARM_COMPUTE_ERROR_ON(output == nullptr);
_output = output;
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name.str()));
+ _kernel = create_kernel(compile_context, kernel_name.str());
// Configure kernel window
const bool has_two_planes = (input->info()->format() == Format::NV12) || (input->info()->format() == Format::NV21);
}
void CLColorConvertKernel::configure(const ICLImage *input, ICLMultiImage *output)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output);
+}
+
+void CLColorConvertKernel::configure(CLCompileContext &compile_context, const ICLImage *input, ICLMultiImage *output)
{
ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input);
ARM_COMPUTE_ERROR_ON(output == nullptr);
_multi_output = output;
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name.str()));
+ _kernel = create_kernel(compile_context, kernel_name.str());
// Configure kernel window
Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
}
void CLColorConvertKernel::configure(const ICLMultiImage *input, ICLMultiImage *output)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output);
+}
+
+void CLColorConvertKernel::configure(CLCompileContext &compile_context, const ICLMultiImage *input, ICLMultiImage *output)
{
unsigned int num_elems_processed_per_iteration = 0;
switch(input->info()->format())
float sub_sampling_input = (has_two_input_planars || (input->info()->format() == Format::IYUV)) ? 0.5f : 1;
float sub_sampling_output = (has_two_output_planars || (output->info()->format() == Format::IYUV)) ? 0.5f : 1;
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name.str()));
+ _kernel = create_kernel(compile_context, kernel_name.str());
Window win = calculate_max_window(*input->cl_plane(0)->info(), Steps(num_elems_processed_per_iteration));
win.set_dimension_step(Window::DimY, 2);
}
void CLComparisonKernel::configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ComparisonOperation operation)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output, operation);
+}
+
+void CLComparisonKernel::configure(CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, ComparisonOperation operation)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*input1->info(), *input2->info(), *output->info(), operation));
}
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts);
ICLKernel::configure_internal(win_config.second);
void CLConvertFullyConnectedWeightsKernel::configure(const ICLTensor *input, ICLTensor *output, const TensorShape &original_input_shape,
DataLayout data_layout)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output, original_input_shape, data_layout);
+}
+
+void CLConvertFullyConnectedWeightsKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const TensorShape &original_input_shape,
+ DataLayout data_layout)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
build_opts.add_option("-DFACTOR_2=" + support::cpp11::to_string(factor_2));
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("convert_fc_weights", build_opts.options()));
+ _kernel = create_kernel(compile_context, "convert_fc_weights", build_opts.options());
// Configure kernel window
Window win = calculate_max_window(*input->info(), Steps());
template <unsigned int matrix_size>
void CLConvolutionKernel<matrix_size>::configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output, conv, scale, border_undefined);
+}
+
+template <unsigned int matrix_size>
+void CLConvolutionKernel<matrix_size>::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t scale, bool border_undefined)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16);
out_type << "-DDATA_TYPE_OUT=" << get_cl_type_from_data_type(output->info()->data_type());
build_opts.add_option(out_type.str());
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name.str(), build_opts.options()));
+ _kernel = create_kernel(compile_context, kernel_name.str(), build_opts.options());
// Configure kernel window
constexpr unsigned int num_elems_processed_per_iteration = 8;
template <unsigned int matrix_size>
void CLSeparableConvolutionHorKernel<matrix_size>::configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, bool border_undefined)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output, conv, border_undefined);
+}
+
+template <unsigned int matrix_size>
+void CLSeparableConvolutionHorKernel<matrix_size>::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, bool border_undefined)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U16, DataType::S16, DataType::S32);
// Create kernel
const std::string kernel_name = "convolution_separable1x" + support::cpp11::to_string(matrix_size) + "_static";
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts);
// Configure kernel window
constexpr unsigned int num_elems_processed_per_iteration = 8;
template <unsigned int matrix_size>
void CLSeparableConvolutionVertKernel<matrix_size>::configure(const ICLTensor *input, ICLTensor *output,
const int16_t *conv, uint32_t scale, bool border_undefined, DataType data_type)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output, conv, scale, border_undefined, data_type);
+}
+
+template <unsigned int matrix_size>
+void CLSeparableConvolutionVertKernel<matrix_size>::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output,
+ const int16_t *conv, uint32_t scale, bool border_undefined, DataType data_type)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U16, DataType::S16, DataType::S32);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16);
// Create kernel
const std::string kernel_name = "convolution_separable" + support::cpp11::to_string(matrix_size) + "x1_static";
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts);
// Configure kernel window
constexpr unsigned int num_elems_processed_per_iteration = 8;
}
void CLConvolutionRectangleKernel::configure(const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t width, uint32_t height, uint32_t scale, bool border_undefined)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output, conv, width, height, scale, border_undefined);
+}
+
+void CLConvolutionRectangleKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const int16_t *conv, uint32_t width, uint32_t height, uint32_t scale,
+ bool border_undefined)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16);
options.insert("-DMATRIX_WIDTH=" + support::cpp11::to_string(width));
options.insert("-DMATRIX_HEIGHT=" + support::cpp11::to_string(height));
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("convolution_rectangle", options));
+ _kernel = create_kernel(compile_context, "convolution_rectangle", options);
// Configure kernel window
constexpr unsigned int num_elems_processed_per_iteration = 8;
}
void CLCopyKernel::configure(const ICLTensor *input, ICLTensor *output, const PaddingList &padding, Window *output_window)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output, padding, output_window);
+}
+
+void CLCopyKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PaddingList &padding, Window *output_window)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), padding, output_window));
}
// Build kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("copy_tensor", build_opts.options()));
+ _kernel = create_kernel(compile_context, "copy_tensor", build_opts.options());
}
else
{
}
// Build kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("copy_pad_tensor", build_opts.options()));
+ _kernel = create_kernel(compile_context, "copy_pad_tensor", build_opts.options());
// Configure window
win_config = validate_and_configure_window_with_padding(input->info(), output->info(), padding);
}
void CLCropKernel::configure(const ICLTensor *input, ICLTensor *output, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value, Window *output_window)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output, start, end, batch_index, extrapolation_value, output_window);
+}
+
+void CLCropKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value,
+ Window *output_window)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_ERROR_THROW_ON(validate(input->info(), output->info(), start, end, batch_index, extrapolation_value, output_window));
build_opts.add_option_if(multi_access_x && remainder_x, "-DLAST_ACCESSED_X=" + support::cpp11::to_string(std::max<int>(output_width_x - vec_size_x, 0)));
build_opts.add_option_if(start.x > end.x, "-DWIDTH_FLIPPED=");
build_opts.add_option_if(start.y > end.y, "-DHEIGHT_FLIPPED=");
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("crop_tensor", build_opts.options()));
+ _kernel = create_kernel(compile_context, "crop_tensor", build_opts.options());
}
Status CLCropKernel::validate(const ITensorInfo *input, const ITensorInfo *output, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value, Window *output_window)
void CLDeconvolutionLayerUpsampleKernel::configure(const ICLTensor *input, ICLTensor *output,
const PadStrideInfo &info)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output, info);
+}
+
+void CLDeconvolutionLayerUpsampleKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output,
+ const PadStrideInfo &info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
// Create kernel
CLBuildOptions build_opts;
build_opts.add_option(("-DDATA_TYPE=" + get_cl_unsigned_type_from_element_size(input->info()->element_size())));
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("deconvolution_upsample", build_opts.options()));
+ _kernel = create_kernel(compile_context, "deconvolution_upsample", build_opts.options());
constexpr unsigned int num_elems_processed_per_iteration = 1;
void CLDeconvolutionReshapeOutputKernel::configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const ITensorInfo *input_info, const ITensorInfo *weights_info,
const PadStrideInfo &deconv_info)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, bias, output, input_info, weights_info, deconv_info);
+}
+
+void CLDeconvolutionReshapeOutputKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const ITensorInfo *input_info,
+ const ITensorInfo *weights_info,
+ const PadStrideInfo &deconv_info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, input_info, weights_info);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), (bias != nullptr ? bias->info() : nullptr), output->info(), input_info, weights_info, deconv_info));
build_opts.add_option_if(data_layout == DataLayout::NCHW, "-DNUM_FILTERS=" + support::cpp11::to_string(filter_b));
build_opts.add_option_if(_add_bias, "-DADD_BIAS");
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("deconvolution_reshape", build_opts.options()));
+ _kernel = create_kernel(compile_context, "deconvolution_reshape", build_opts.options());
ICLKernel::configure_internal(win_config.second);
// Set config_id for enabling LWS tuning
}
void CLDepthConcatenateLayerKernel::configure(const ICLTensor *input, unsigned int depth_offset, ICLTensor *output)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, depth_offset, output);
+}
+
+void CLDepthConcatenateLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, unsigned int depth_offset, ICLTensor *output)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), depth_offset, output->info()));
}
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("concatenate", build_opts.options()));
+ _kernel = create_kernel(compile_context, "concatenate", build_opts.options());
// Configure kernel window
auto win_config = validate_and_configure_window(input->info(), depth_offset, output->info());
} // namespace
void CLDepthConvertLayerKernel::configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output, policy, shift);
+}
+
+void CLDepthConvertLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
// Create kernel
const std::string kernel_name = (input_size >= output_size) ? "convert_depth_down" : "convert_depth_up";
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts.options());
// Set shift arg
unsigned int idx = 2 * num_arguments_per_3D_tensor(); // Skip the input and output parameters
_kernel.setArg(idx++, shift);
// Configure kernel
- ICLSimple3DKernel::configure(input, output, num_elems_processed_per_iteration);
+ ICLSimple2DKernel::configure(input, output, num_elems_processed_per_iteration);
// Collapse window
const Window &full_window = window();
}
void CLDepthToSpaceLayerKernel::configure(const ICLTensor *input, ICLTensor *output, int32_t block_shape)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output, block_shape);
+}
+
+void CLDepthToSpaceLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t block_shape)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
build_opts.add_option("-DCHANNEL_SIZE=" + support::cpp11::to_string(input->info()->dimension(idx_channel)));
build_opts.add_option("-DBLOCK_SHAPE=" + support::cpp11::to_string(block_shape));
build_opts.add_option("-DWIDTH_IN=" + support::cpp11::to_string(input->info()->dimension(idx_width)));
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("depth_to_space_" + lower_string(string_from_data_layout(input->info()->data_layout())), build_opts.options()));
+ _kernel = create_kernel(compile_context, "depth_to_space_" + lower_string(string_from_data_layout(input->info()->data_layout())), build_opts.options());
// Configure kernel window
Window win = calculate_max_window(*input->info(), Steps());
void CLDepthwiseConvolutionLayer3x3NCHWKernel::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output,
const PadStrideInfo &conv_info, unsigned int depth_multiplier, ActivationLayerInfo act_info, const Size2D &dilation,
const ICLTensor *output_multipliers, const ICLTensor *output_shifts)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation, output_multipliers, output_shifts);
+}
+
+void CLDepthwiseConvolutionLayer3x3NCHWKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output,
+ const PadStrideInfo &conv_info, unsigned int depth_multiplier, ActivationLayerInfo act_info, const Size2D &dilation,
+ const ICLTensor *output_multipliers, const ICLTensor *output_shifts)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), weights->info(), (biases != nullptr) ? biases->info() : nullptr, output->info(),
build_opts.add_option_if(input->info()->data_type() == DataType::F16, "-DIS_F16");
build_opts.add_option_if(input->info()->data_type() == DataType::F32, "-DIS_F32");
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts.options());
// Set config_id for enabling LWS tuning
_config_id = kernel_name;
void CLDepthwiseConvolutionLayer3x3NHWCKernel::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output,
const PadStrideInfo &conv_info, unsigned int depth_multiplier, ActivationLayerInfo act_info, const Size2D &dilation,
const ICLTensor *output_multipliers, const ICLTensor *output_shifts)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, weights, biases, output, conv_info, depth_multiplier, act_info, dilation, output_multipliers, output_shifts);
+}
+
+void CLDepthwiseConvolutionLayer3x3NHWCKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output,
+ const PadStrideInfo &conv_info, unsigned int depth_multiplier, ActivationLayerInfo act_info, const Size2D &dilation,
+ const ICLTensor *output_multipliers, const ICLTensor *output_shifts)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), weights->info(), (biases != nullptr) ? biases->info() : nullptr, output->info(),
build_opts.add_option_if(input->info()->data_type() == DataType::F32, "-DIS_F32");
ICLKernel::configure_internal(win_config.second);
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts.options());
// Set config_id for enabling LWS tuning
_config_id = kernel_name;
void CLDepthwiseConvolutionLayerNativeKernel::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const DWCWeightsKernelInfo &dwc_weights_info,
const DWCKernelInfo &dwc_info, const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation,
const ICLTensor *output_multipliers, const ICLTensor *output_shifts)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, weights, biases, output, dwc_weights_info, dwc_info, conv_info, depth_multiplier, dilation, output_multipliers, output_shifts);
+}
+
+void CLDepthwiseConvolutionLayerNativeKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output,
+ const DWCWeightsKernelInfo &dwc_weights_info,
+ const DWCKernelInfo &dwc_info, const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation,
+ const ICLTensor *output_multipliers, const ICLTensor *output_shifts)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), weights->info(), (biases != nullptr) ? biases->info() : nullptr, output->info(),
}
ICLKernel::configure_internal(win_config.second);
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts.options());
// Set config_id for enabling LWS tuning
_config_id = kernel_name;
}
void CLDepthwiseConvolutionLayerReshapeWeightsKernel::configure(const ICLTensor *input, ICLTensor *output, const DepthwiseConvolutionReshapeInfo &info)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output, info);
+}
+
+void CLDepthwiseConvolutionLayerReshapeWeightsKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const DepthwiseConvolutionReshapeInfo &info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), info));
build_opts.add_option_if(info.transpose, "-DTRANSPOSE");
build_opts.add_option("-DDATA_TYPE=" + get_cl_unsigned_type_from_element_size(input->info()->element_size()));
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("depthwise_convolution_reshape_weights", build_opts.options()));
+ _kernel = create_kernel(compile_context, "depthwise_convolution_reshape_weights", build_opts.options());
}
Status CLDepthwiseConvolutionLayerReshapeWeightsKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const DepthwiseConvolutionReshapeInfo &info)
}
void CLDequantizationLayerKernel::configure(const ICLTensor *input, ICLTensor *output)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output);
+}
+
+void CLDequantizationLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info()));
build_opts.add_option_if(multi_access_x, "-DLAST_ACCESSED_X=" + support::cpp11::to_string(std::max<int>(output_width_x - vec_size_x, 0)));
// Create kernel name
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts.options());
}
Status CLDequantizationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output)
}
void CLDerivativeKernel::configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output_x, output_y, border_undefined);
+}
+
+void CLDerivativeKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
ARM_COMPUTE_ERROR_ON((output_x == nullptr) && (output_y == nullptr));
// Create kernel
const std::string kernel_name = std::string("derivative");
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts);
// Configure kernel window
constexpr unsigned int num_elems_processed_per_iteration = 16;
/*
- * Copyright (c) 2016-2018 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
}
void CLDilateKernel::configure(const ICLTensor *input, ICLTensor *output, bool border_undefined)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output, border_undefined);
+}
+
+void CLDilateKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("dilate"));
+ _kernel = create_kernel(compile_context, "dilate");
_input = input;
_output = output;
}
void CLDirectConvolutionLayerKernel::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, weights, biases, output, conv_info);
+}
+
+void CLDirectConvolutionLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output,
+ const PadStrideInfo &conv_info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
build_options.add_option(std::string("-DWEIGHTS_DEPTH=" + support::cpp11::to_string(_weights->info()->dimension(channel_idx))));
kernel_name << "_f32_bifrost";
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name.str(), build_options.options()));
+ _kernel = create_kernel(compile_context, kernel_name.str(), build_options.options());
}
else
{
build_options.add_option("-DKERNEL_SIZE=" + support::cpp11::to_string(kernel_size));
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("direct_convolution_quantized", build_options.options()));
+ _kernel = create_kernel(compile_context, "direct_convolution_quantized", build_options.options());
// Set static kernel arguments
unsigned int idx = 3 * num_arguments_per_3D_tensor() + ((_biases != nullptr) ? num_arguments_per_1D_tensor() : 0) + 1;
else
{
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name.str(), build_options.options()));
+ _kernel = create_kernel(compile_context, kernel_name.str(), build_options.options());
}
}
} // namespace
void CLElementWiseUnaryLayerKernel::configure(const ICLTensor *input, ICLTensor *output, const ElementWiseUnary &op)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output, op);
+}
+
+void CLElementWiseUnaryLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ElementWiseUnary &op)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*input->info(), *output->info()));
}
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts.options());
}
Status CLElementWiseUnaryLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const ElementWiseUnary &op)
}
void CLElementwiseOperationKernel::configure_common(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output)
+{
+ configure_common(CLKernelLibrary::get().get_compile_context(), input1, input2, output);
+}
+
+void CLElementwiseOperationKernel::configure_common(CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*input1->info(), *input2->info(), *output->info()));
}
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts.options());
ICLKernel::configure_internal(win_config.second);
void CLSaturatedArithmeticOperationKernel::configure(ArithmeticOperation op, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const ConvertPolicy &policy,
const ActivationLayerInfo &act_info)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), op, input1, input2, output, policy, act_info);
+}
+
+void CLSaturatedArithmeticOperationKernel::configure(CLCompileContext &compile_context, ArithmeticOperation op, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output,
+ const ConvertPolicy &policy,
+ const ActivationLayerInfo &act_info)
{
_policy = policy;
_op = op;
_act_info = act_info;
- configure_common(input1, input2, output);
+ configure_common(compile_context, input1, input2, output);
}
Status CLSaturatedArithmeticOperationKernel::validate(ArithmeticOperation op, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ConvertPolicy &policy,
/** Arithmetic operations*/
void CLArithmeticOperationKernel::configure(ArithmeticOperation op, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), op, input1, input2, output, act_info);
+}
+
+void CLArithmeticOperationKernel::configure(CLCompileContext &compile_context, ArithmeticOperation op, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output,
+ const ActivationLayerInfo &act_info)
{
_op = op;
_act_info = act_info;
- configure_common(input1, input2, output);
+ configure_common(compile_context, input1, input2, output);
}
Status CLArithmeticOperationKernel::validate(ArithmeticOperation op, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info)
/*
- * Copyright (c) 2016-2018 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
}
void CLErodeKernel::configure(const ICLTensor *input, ICLTensor *output, bool border_undefined)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output, border_undefined);
+}
+
+void CLErodeKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("erode"));
+ _kernel = create_kernel(compile_context, "erode");
_input = input;
_output = output;
}
void CLFFTDigitReverseKernel::configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *idx, const FFTDigitReverseKernelInfo &config)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output, idx, config);
+}
+
+void CLFFTDigitReverseKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *idx, const FFTDigitReverseKernelInfo &config)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, idx);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), idx->info(), config));
build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(input->info()->num_channels()));
build_opts.add_option_if(config.conjugate, "-DCONJ");
std::string kernel_name = "fft_digit_reverse_axis_" + support::cpp11::to_string(config.axis);
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts.options());
// Configure kernel window
auto win_config = validate_and_configure_window(input->info(), output->info(), idx->info(), config);
}
void CLFFTRadixStageKernel::configure(ICLTensor *input, ICLTensor *output, const FFTRadixStageKernelInfo &config)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output, config);
+}
+
+void CLFFTRadixStageKernel::configure(CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const FFTRadixStageKernelInfo &config)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), (output != nullptr) ? output->info() : nullptr, config));
kernel_name += "_radix_" + support::cpp11::to_string(config.radix);
kernel_name += (config.is_first_stage) ? "_first_stage" : "";
kernel_name += "_axis_" + support::cpp11::to_string(config.axis);
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts.options());
// Set static arguments if not the first stage
if(!config.is_first_stage)
}
void CLFFTScaleKernel::configure(ICLTensor *input, ICLTensor *output, const FFTScaleKernelInfo &config)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output, config);
+}
+
+void CLFFTScaleKernel::configure(CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const FFTScaleKernelInfo &config)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), (output != nullptr) ? output->info() : nullptr));
build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(output != nullptr ? output->info()->num_channels() : input->info()->num_channels()));
build_opts.add_option_if(config.conjugate, "-DCONJ");
std::string kernel_name = "fft_scale_conj";
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts.options());
// Set static arguments
unsigned int idx = (1 + (_run_in_place ? 0 : 1)) * num_arguments_per_3D_tensor(); // Skip the input and output parameters
}
void CLFastCornersKernel::configure(const ICLImage *input, ICLImage *output, float threshold, bool non_max_suppression, BorderMode border_mode)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output, threshold, non_max_suppression, border_mode);
+}
+
+void CLFastCornersKernel::configure(CLCompileContext &compile_context, const ICLImage *input, ICLImage *output, float threshold, bool non_max_suppression, BorderMode border_mode)
{
ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input);
ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(output);
// Create kernel
const std::string kernel_name = std::string("fast_corners");
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts);
// Set static kernel arguments
unsigned int idx = 2 * num_arguments_per_2D_tensor(); // Skip the input and output parameters
}
void CLCopyToArrayKernel::configure(const ICLImage *input, bool update_number, ICLKeyPointArray *corners, cl::Buffer *num_buffers)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, update_number, corners, num_buffers);
+}
+
+void CLCopyToArrayKernel::configure(CLCompileContext &compile_context, const ICLImage *input, bool update_number, ICLKeyPointArray *corners, cl::Buffer *num_buffers)
{
ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
// Create kernel
const std::string kernel_name = std::string("copy_to_keypoint");
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts);
//Get how many pixels skipped in the x dimension in the previous stages
unsigned int offset = _input->info()->valid_region().anchor.x();
}
void CLFillBorderKernel::configure(ICLTensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), tensor, border_size, border_mode, constant_border_value);
+}
+
+void CLFillBorderKernel::configure(CLCompileContext &compile_context, ICLTensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value)
{
ARM_COMPUTE_ERROR_ON(tensor == nullptr);
ARM_COMPUTE_ERROR_ON(tensor->info()->num_channels() != 1);
build_opts.add_option("-DBORDER_SIZE_RIGHT=" + support::cpp11::to_string(border_size.right));
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts.options());
_tensor = tensor;
// Create static kernel arguments
}
void CLFlattenLayerKernel::configure(const ICLTensor *input, ICLTensor *output)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output);
+}
+
+void CLFlattenLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info()));
build_opts.add_option_if(output->info()->num_dimensions() > 2, "-DDST_DIM1=" + support::cpp11::to_string(output->info()->dimension(1)));
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("flatten", build_opts.options()));
+ _kernel = create_kernel(compile_context, "flatten", build_opts.options());
// Set config_id for enabling LWS tuning
_config_id = "flatten";
ICLTensor *fused_weights, ICLTensor *fused_bias,
const ICLTensor *input_bias, const ICLTensor *bn_beta, const ICLTensor *bn_gamma,
float epsilon, FuseBatchNormalizationType fbn_type)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input_weights, bn_mean, bn_var, fused_weights, fused_bias, input_bias, bn_beta, bn_gamma, epsilon, fbn_type);
+}
+
+void CLFuseBatchNormalizationKernel::configure(CLCompileContext &compile_context, const ICLTensor *input_weights, const ICLTensor *bn_mean, const ICLTensor *bn_var,
+ ICLTensor *fused_weights, ICLTensor *fused_bias,
+ const ICLTensor *input_bias, const ICLTensor *bn_beta, const ICLTensor *bn_gamma,
+ float epsilon, FuseBatchNormalizationType fbn_type)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input_weights, bn_mean, bn_var);
build_opts.add_option_if(bn_gamma != nullptr, "-DGAMMA");
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("fuse_batchnormalization_layer", build_opts.options()));
+ _kernel = create_kernel(compile_context, "fuse_batchnormalization_layer", build_opts.options());
}
Status CLFuseBatchNormalizationKernel::validate(const ITensorInfo *input_weights, const ITensorInfo *bn_mean, const ITensorInfo *bn_var,
}
void CLGEMMLowpMatrixMultiplyKernel::configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMReshapeInfo &gemm_info)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input0, input1, output, gemm_info);
+}
+
+void CLGEMMLowpMatrixMultiplyKernel::configure(CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMReshapeInfo &gemm_info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input0, input1, output);
kernel_name = "gemmlowp_mm_midgard";
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts.options());
// Set config_id for enabling LWS tuning
_config_id = kernel_name;
void CLGEMMLowpMatrixMultiplyNativeKernel::configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info,
const GEMMReshapeInfo &gemm_info)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input0, input1, output, lhs_info, rhs_info, gemm_info);
+}
+
+void CLGEMMLowpMatrixMultiplyNativeKernel::configure(CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info,
+ const GEMMRHSMatrixInfo &rhs_info,
+ const GEMMReshapeInfo &gemm_info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input0, input1, output);
std::string kernel_name("gemmlowp_mm_native");
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts.options());
// Set config_id for enabling LWS tuning
_config_id = kernel_name;
void CLGEMMLowpMatrixMultiplyReshapedKernel::configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info,
const GEMMReshapeInfo &gemm_info)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input0, input1, output, lhs_info, rhs_info, gemm_info);
+}
+
+void CLGEMMLowpMatrixMultiplyReshapedKernel::configure(CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info,
+ const GEMMRHSMatrixInfo &rhs_info,
+ const GEMMReshapeInfo &gemm_info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input0, input1, output);
kernel_name += rhs_info.transpose ? "rhs_t" : "rhs_nt";
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts.options());
// Set config_id for enabling LWS tuning
_config_id = kernel_name;
void CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel::configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMKernelInfo &gemm_info,
const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, const ICLTensor *bias,
const ICLTensor *output_multipliers, const ICLTensor *output_shifts)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input0, input1, output, gemm_info, vector_sum_col, vector_sum_row, bias, output_multipliers, output_shifts);
+}
+
+void CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel::configure(CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, const GEMMKernelInfo &gemm_info,
+ const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, const ICLTensor *bias,
+ const ICLTensor *output_multipliers, const ICLTensor *output_shifts)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input0, input1, output);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input0->info(),
}
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts.options());
// Set config_id for enabling LWS tuning
_config_id = kernel_name;
void CLGEMMLowpOffsetContributionKernel::configure(ICLTensor *mm_result, const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, const ICLTensor *bias, int32_t k, int32_t a_offset,
int32_t b_offset)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), mm_result, vector_sum_col, vector_sum_row, bias, k, a_offset, b_offset);
+}
+
+void CLGEMMLowpOffsetContributionKernel::configure(CLCompileContext &compile_context, ICLTensor *mm_result, const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, const ICLTensor *bias,
+ int32_t k, int32_t a_offset,
+ int32_t b_offset)
{
// Perform validate step
ARM_COMPUTE_ERROR_ON_NULLPTR(mm_result);
std::string kernel_name("gemmlowp_offset_contribution");
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts.options());
// Configure kernel window
auto win_config = validate_and_configure_window(mm_result->info(),
void CLGEMMLowpOffsetContributionOutputStageKernel::configure(const ICLTensor *mm_result, const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row, const ICLTensor *bias, ICLTensor *output,
int32_t k, int32_t a_offset, int32_t b_offset, const GEMMLowpOutputStageInfo &output_stage,
const ICLTensor *output_multipliers, const ICLTensor *output_shifts)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), mm_result, vector_sum_col, vector_sum_row, bias, output, k, a_offset, b_offset, output_stage, output_multipliers, output_shifts);
+}
+
+void CLGEMMLowpOffsetContributionOutputStageKernel::configure(CLCompileContext &compile_context, const ICLTensor *mm_result, const ICLTensor *vector_sum_col, const ICLTensor *vector_sum_row,
+ const ICLTensor *bias, ICLTensor *output,
+ int32_t k, int32_t a_offset, int32_t b_offset, const GEMMLowpOutputStageInfo &output_stage,
+ const ICLTensor *output_multipliers, const ICLTensor *output_shifts)
{
// Perform validate step
ARM_COMPUTE_ERROR_ON_NULLPTR(mm_result, output, output_multipliers, output_shifts);
kernel_name += "_" + string_from_gemmlowp_output_stage(output_stage.type);
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts.options());
// Configure kernel window
auto win_config = validate_and_configure_window(mm_result->info(),
void CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel::configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output,
const GEMMLowpOutputStageInfo *info)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, bias, output, info);
+}
+
+void CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output,
+ const GEMMLowpOutputStageInfo *info)
{
// Perform validate step
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
build_opts.add_option_if(bias != nullptr, "-DADD_BIAS");
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("gemmlowp_output_stage_quantize_down_float", build_opts.options()));
+ _kernel = create_kernel(compile_context, "gemmlowp_output_stage_quantize_down_float", build_opts.options());
// Configure kernel window
auto win_config = validate_and_configure_window(input->info(), (bias != nullptr) ? bias->info() : nullptr, output->info(), info->output_data_type);
}
void CLGEMMLowpQuantizeDownInt32ScaleKernel::configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo *output_stage)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, bias, output, output_stage);
+}
+
+void CLGEMMLowpQuantizeDownInt32ScaleKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const GEMMLowpOutputStageInfo *output_stage)
{
// Perform validate step
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
build_opts.add_option_if(bias != nullptr, "-DADD_BIAS");
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("gemmlowp_output_stage_quantize_down", build_opts.options()));
+ _kernel = create_kernel(compile_context, "gemmlowp_output_stage_quantize_down", build_opts.options());
// Configure kernel window
auto win_config = validate_and_configure_window(input->info(), (bias != nullptr) ? bias->info() : nullptr, output->info(), output_stage->output_data_type);
void CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel::configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output,
int result_fixedpoint_multiplier, int result_shift,
int min, int max)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, bias, output, result_fixedpoint_multiplier, result_shift, min, max);
+}
+
+void CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output,
+ int result_fixedpoint_multiplier, int result_shift,
+ int min, int max)
{
// Perform validate step
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
build_opts.add_option_if(bias != nullptr, "-DADD_BIAS");
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("gemmlowp_output_stage_quantize_down_fixedpoint_qsymm16", build_opts.options()));
+ _kernel = create_kernel(compile_context, "gemmlowp_output_stage_quantize_down_fixedpoint_qsymm16", build_opts.options());
// Configure kernel window
auto win_config = validate_and_configure_window(input->info(), (bias != nullptr) ? bias->info() : nullptr, output->info());
void CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel::configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output,
int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift,
int min, int max)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, bias, output, result_fixedpoint_multiplier, result_shift, result_offset_after_shift, min, max);
+}
+
+void CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output,
+ int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift,
+ int min, int max)
{
// Perform validate step
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
build_opts.add_option_if(bias != nullptr, "-DADD_BIAS");
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("gemmlowp_output_stage_quantize_down_fixedpoint", build_opts.options()));
+ _kernel = create_kernel(compile_context, "gemmlowp_output_stage_quantize_down_fixedpoint", build_opts.options());
ICLKernel::configure_internal(win_config.second);
}
void CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel::configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output,
int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift,
int min, int max)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, bias, output, result_fixedpoint_multiplier, result_shift, result_offset_after_shift, min, max);
+}
+
+void CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output,
+ int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift,
+ int min, int max)
{
// Perform validate step
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
build_opts.add_option_if(bias != nullptr, "-DADD_BIAS");
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("gemmlowp_output_stage_quantize_down_fixedpoint", build_opts.options()));
+ _kernel = create_kernel(compile_context, "gemmlowp_output_stage_quantize_down_fixedpoint", build_opts.options());
ICLKernel::configure_internal(win_config.second);
}
}
void CLGEMMLowpMatrixAReductionKernel::configure(const ICLTensor *mtx_a, ICLTensor *vector_sum_row, const GEMMLowpReductionKernelInfo &info)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), mtx_a, vector_sum_row, info);
+}
+
+void CLGEMMLowpMatrixAReductionKernel::configure(CLCompileContext &compile_context, const ICLTensor *mtx_a, ICLTensor *vector_sum_row, const GEMMLowpReductionKernelInfo &info)
{
// Perform validate step
ARM_COMPUTE_ERROR_ON_NULLPTR(mtx_a, vector_sum_row);
std::string kernel_name = "gemmlowp_matrix_a_reduction" + std::string(is_dot8_supported ? "_dot8" : "");
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts.options());
// Configure kernel window
// This kernel does not need padding
}
void CLGEMMLowpMatrixBReductionKernel::configure(const ICLTensor *mtx_b, ICLTensor *vector_sum_col, const GEMMLowpReductionKernelInfo &info)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), mtx_b, vector_sum_col, info);
+}
+
+void CLGEMMLowpMatrixBReductionKernel::configure(CLCompileContext &compile_context, const ICLTensor *mtx_b, ICLTensor *vector_sum_col, const GEMMLowpReductionKernelInfo &info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(mtx_b, vector_sum_col);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments_matrix_b_reduction(mtx_b->info(), vector_sum_col->info()));
build_opts.add_option_if(info.mul_by_scalar, "-DSCALAR=" + support::cpp11::to_string(info.scalar));
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("gemmlowp_matrix_b_reduction", build_opts.options()));
+ _kernel = create_kernel(compile_context, "gemmlowp_matrix_b_reduction", build_opts.options());
// Configure kernel window
auto win_config = validate_and_configure_window_matrix_b_reduction(_input->info(), _output->info());
}
void CLGEMMMatrixAccumulateBiasesKernel::configure(ICLTensor *accum, const ICLTensor *biases)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), accum, biases);
+}
+
+void CLGEMMMatrixAccumulateBiasesKernel::configure(CLCompileContext &compile_context, ICLTensor *accum, const ICLTensor *biases)
{
// Perform validate step
ARM_COMPUTE_ERROR_ON_NULLPTR(accum, biases);
build_opts.add_option("-DVECTOR_SIZE=" + support::cpp11::to_string(vector_size));
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("gemm_accumulate_biases", build_opts.options()));
+ _kernel = create_kernel(compile_context, "gemm_accumulate_biases", build_opts.options());
}
Status CLGEMMMatrixAccumulateBiasesKernel::validate(const ITensorInfo *accum, const ITensorInfo *biases, GPUTarget gpu_target)
void CLGEMMMatrixMultiplyKernel::configure(const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta,
bool is_interleaved_transposed, const GEMMReshapeInfo &reshape_info, bool fp_mixed_precision, const ActivationLayerInfo &activation_info)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input0, input1, input2, output, alpha, beta, is_interleaved_transposed, reshape_info, fp_mixed_precision, activation_info);
+}
+
+void CLGEMMMatrixMultiplyKernel::configure(CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta,
+ bool is_interleaved_transposed, const GEMMReshapeInfo &reshape_info, bool fp_mixed_precision, const ActivationLayerInfo &activation_info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input0, input1, output);
}
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts.options());
// Set config_id for enabling LWS tuning
_config_id = "gemm_";
void CLGEMMMatrixMultiplyNativeKernel::configure(const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta,
const GEMMLHSMatrixInfo &lhs_info,
const GEMMRHSMatrixInfo &rhs_info, const GEMMKernelInfo &gemm_info)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input0, input1, input2, output, alpha, beta, lhs_info, rhs_info, gemm_info);
+}
+
+void CLGEMMMatrixMultiplyNativeKernel::configure(CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha,
+ float beta,
+ const GEMMLHSMatrixInfo &lhs_info,
+ const GEMMRHSMatrixInfo &rhs_info, const GEMMKernelInfo &gemm_info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input0, input1, output);
std::string kernel_name("gemm_mm_native");
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts.options());
// Set config_id for enabling LWS tuning
_config_id = kernel_name;
void CLGEMMMatrixMultiplyReshapedKernel::configure(const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta,
const GEMMLHSMatrixInfo &lhs_info,
const GEMMRHSMatrixInfo &rhs_info, const GEMMKernelInfo &gemm_info)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input0, input1, input2, output, alpha, beta, lhs_info, rhs_info, gemm_info);
+}
+
+void CLGEMMMatrixMultiplyReshapedKernel::configure(CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha,
+ float beta,
+ const GEMMLHSMatrixInfo &lhs_info,
+ const GEMMRHSMatrixInfo &rhs_info, const GEMMKernelInfo &gemm_info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input0, input1, output);
kernel_name += rhs_info.transpose ? "rhs_t" : "rhs_nt";
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts.options());
// Set config_id for enabling LWS tuning
_config_id = kernel_name;
void CLGEMMMatrixMultiplyReshapedOnlyRHSKernel::configure(const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta,
const GEMMLHSMatrixInfo &lhs_info,
const GEMMRHSMatrixInfo &rhs_info, const GEMMKernelInfo &gemm_info)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input0, input1, input2, output, alpha, beta, lhs_info, rhs_info, gemm_info);
+}
+
+void CLGEMMMatrixMultiplyReshapedOnlyRHSKernel::configure(CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha,
+ float beta,
+ const GEMMLHSMatrixInfo &lhs_info,
+ const GEMMRHSMatrixInfo &rhs_info, const GEMMKernelInfo &gemm_info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input0, input1, output);
kernel_name += rhs_info.transpose ? "t" : "nt";
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts.options());
// Set config_id for enabling LWS tuning
_config_id = kernel_name;
}
void CLGEMMMatrixVectorMultiplyKernel::configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input0, input1, output);
+}
+
+void CLGEMMMatrixVectorMultiplyKernel::configure(CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input0, input1, output);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input0->info(), input1->info(), output->info()));
build_opts.add_option("-DSRC_HEIGHT=" + support::cpp11::to_string(input0->info()->dimension(1)));
std::string kernel_name = is_quantized ? std::string("gemm_mv_quantized") : std::string("gemm_mv");
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts.options());
// Add static arguments
if(is_quantized)
}
void CLGEMMReshapeLHSMatrixKernel::configure(const ICLTensor *input, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, bool reinterpret_input_as_3d)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output, lhs_info, reinterpret_input_as_3d);
+}
+
+void CLGEMMReshapeLHSMatrixKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const GEMMLHSMatrixInfo &lhs_info, bool reinterpret_input_as_3d)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
kernel_name += lhs_info.transpose ? "t" : "nt";
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts.options());
// Configure kernel window
auto win_config = validate_and_configure_window(input->info(), output->info(), lhs_info, reinterpret_input_as_3d);
}
void CLGEMMReshapeRHSMatrixKernel::configure(const ICLTensor *input, ICLTensor *output, const GEMMRHSMatrixInfo &rhs_info)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output, rhs_info);
+}
+
+void CLGEMMReshapeRHSMatrixKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const GEMMRHSMatrixInfo &rhs_info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
kernel_name += rhs_info.transpose ? "t" : "nt";
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts.options());
// Configure kernel window
auto win_config = validate_and_configure_window(input->info(), output->info(), rhs_info);
}
void CLGatherKernel::configure(const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, int axis)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, indices, output, axis);
+}
+
+void CLGatherKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *indices, ICLTensor *output, int axis)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, indices);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), indices->info(), output->info(), axis));
build_opts.add_option("-DAXIS=" + support::cpp11::to_string(_axis));
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("gather", build_opts.options()));
+ _kernel = create_kernel(compile_context, "gather", build_opts.options());
ICLKernel::configure_internal(win_config.second);
}
/*
- * Copyright (c) 2016-2018 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
}
void CLGaussian3x3Kernel::configure(const ICLTensor *input, ICLTensor *output, bool border_undefined)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output, border_undefined);
+}
+
+void CLGaussian3x3Kernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
};
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("convolution3x3_static", build_opts));
+ _kernel = create_kernel(compile_context, "convolution3x3_static", build_opts);
// Configure kernel window
constexpr unsigned int num_elems_processed_per_iteration = 8;
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
using namespace arm_compute;
void CLGaussian5x5HorKernel::configure(const ICLTensor *input, ICLTensor *output, bool border_undefined)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output, border_undefined);
+}
+
+void CLGaussian5x5HorKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined)
{
const std::array<int16_t, 5> matrix = { 1, 4, 6, 4, 1 };
// Set arguments
- CLSeparableConvolution5x5HorKernel::configure(input, output, matrix.data(), border_undefined);
+ CLSeparableConvolution5x5HorKernel::configure(compile_context, input, output, matrix.data(), border_undefined);
}
void CLGaussian5x5VertKernel::configure(const ICLTensor *input, ICLTensor *output, bool border_undefined)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output, border_undefined);
+}
+
+void CLGaussian5x5VertKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined)
{
const uint32_t scale = 256;
const std::array<int16_t, 5> matrix = { 1, 4, 6, 4, 1 };
// Set arguments
- CLSeparableConvolution5x5VertKernel::configure(input, output, matrix.data(), scale, border_undefined);
+ CLSeparableConvolution5x5VertKernel::configure(compile_context, input, output, matrix.data(), scale, border_undefined);
}
}
void CLGaussianPyramidHorKernel::configure(const ICLTensor *input, ICLTensor *output)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output);
+}
+
+void CLGaussianPyramidHorKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U16);
// Create kernel
const std::string kernel_name = std::string("gaussian1x5_sub_x");
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name));
+ _kernel = create_kernel(compile_context, kernel_name);
// Configure kernel window
constexpr unsigned int num_elems_processed_per_iteration = 16;
}
void CLGaussianPyramidVertKernel::configure(const ICLTensor *input, ICLTensor *output)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output);
+}
+
+void CLGaussianPyramidVertKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U16);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
// Create kernel
const std::string kernel_name = std::string("gaussian5x1_sub_y");
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("gaussian5x1_sub_y"));
+ _kernel = create_kernel(compile_context, "gaussian5x1_sub_y");
// Configure kernel window
constexpr unsigned int num_elems_processed_per_iteration = 8;
}
void CLComputeAllAnchorsKernel::configure(const ICLTensor *anchors, ICLTensor *all_anchors, const ComputeAnchorsInfo &info)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), anchors, all_anchors, info);
+}
+
+void CLComputeAllAnchorsKernel::configure(CLCompileContext &compile_context, const ICLTensor *anchors, ICLTensor *all_anchors, const ComputeAnchorsInfo &info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(anchors, all_anchors);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(anchors->info(), all_anchors->info(), info));
// Create kernel
const std::string kernel_name = (is_quantized) ? "generate_proposals_compute_all_anchors_quantized" : "generate_proposals_compute_all_anchors";
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts.options());
// The tensor all_anchors can be interpreted as an array of structs (each structs has values_per_roi fields).
// This means we don't need to pad on the X dimension, as we know in advance how many fields
}
void CLHOGOrientationBinningKernel::configure(const ICLTensor *input_magnitude, const ICLTensor *input_phase, ICLTensor *output, const HOGInfo *hog_info)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input_magnitude, input_phase, output, hog_info);
+}
+
+void CLHOGOrientationBinningKernel::configure(CLCompileContext &compile_context, const ICLTensor *input_magnitude, const ICLTensor *input_phase, ICLTensor *output, const HOGInfo *hog_info)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input_magnitude, 1, DataType::S16);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input_phase, 1, DataType::U8);
// Create kernel
const std::string kernel_name = std::string("hog_orientation_binning");
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts);
constexpr unsigned int num_elems_processed_per_iteration = 1;
constexpr unsigned int num_elems_read_per_iteration = 1;
}
void CLHOGBlockNormalizationKernel::configure(const ICLTensor *input, ICLTensor *output, const HOGInfo *hog_info)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output, hog_info);
+}
+
+void CLHOGBlockNormalizationKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const HOGInfo *hog_info)
{
ARM_COMPUTE_ERROR_ON(hog_info == nullptr);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, hog_info->num_bins(), DataType::F32);
build_opts.insert(args_str.str());
const std::string kernel_name = std::string("hog_block_normalization");
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts);
constexpr unsigned int num_elems_processed_per_iteration = 1;
constexpr unsigned int num_elems_read_per_iteration = 1;
void CLHOGDetectorKernel::configure(const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, cl::Buffer *num_detection_windows, const Size2D &detection_window_stride,
float threshold, uint16_t idx_class)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, hog, detection_windows, num_detection_windows, detection_window_stride, threshold, idx_class);
+}
+
+void CLHOGDetectorKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLHOG *hog, ICLDetectionWindowArray *detection_windows, cl::Buffer *num_detection_windows,
+ const Size2D &detection_window_stride,
+ float threshold, uint16_t idx_class)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_NOT_IN(input, DataType::F32);
ARM_COMPUTE_ERROR_ON(hog == nullptr);
// Create kernel
const std::string kernel_name = std::string("hog_detector");
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts);
// Set static kernel arguments
unsigned int idx = num_arguments_per_2D_tensor(); // Skip the input parameters
void CLHarrisScoreKernel::configure(const ICLImage *input1, const ICLImage *input2, ICLImage *output,
int32_t block_size, float norm_factor, float strength_thresh, float sensitivity,
bool border_undefined)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output, block_size, norm_factor, strength_thresh, sensitivity, border_undefined);
+}
+
+void CLHarrisScoreKernel::configure(CLCompileContext &compile_context, const ICLImage *input1, const ICLImage *input2, ICLImage *output,
+ int32_t block_size, float norm_factor, float strength_thresh, float sensitivity,
+ bool border_undefined)
{
ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input1);
ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input2);
std::set<std::string> build_opts = { ("-DDATA_TYPE=" + get_cl_type_from_data_type(input1->info()->data_type())) };
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(harris_score_kernel_name.str(), build_opts));
+ _kernel = create_kernel(compile_context, harris_score_kernel_name.str(), build_opts);
// Set static kernel arguments
unsigned int idx = 3 * num_arguments_per_2D_tensor(); //Skip the input and output parameters
}
void CLHeightConcatenateLayerKernel::configure(const ICLTensor *input, unsigned int height_offset, ICLTensor *output)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, height_offset, output);
+}
+
+void CLHeightConcatenateLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, unsigned int height_offset, ICLTensor *output)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), height_offset, output->info()));
}
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("concatenate_height", build_opts.options()));
+ _kernel = create_kernel(compile_context, "concatenate_height", build_opts.options());
// Configure kernel window
ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config));
}
void CLHistogramKernel::configure(const ICLImage *input, ICLDistribution1D *output)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output);
+}
+
+void CLHistogramKernel::configure(CLCompileContext &compile_context, const ICLImage *input, ICLDistribution1D *output)
{
ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input);
ARM_COMPUTE_ERROR_ON(nullptr == output);
// Create kernel
bool is_fixed_size = (256 == num_bins) && (1 == window_size) && (0 == offset) && (256 == offrange);
const std::string kernel_name = is_fixed_size ? "hist_local_kernel_fixed" : "hist_local_kernel";
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name));
+ _kernel = create_kernel(compile_context, kernel_name);
// Set static kernel arguments
unsigned int idx = num_arguments_per_2D_tensor(); //Skip the input and output parameters
}
void CLHistogramBorderKernel::configure(const ICLImage *input, ICLDistribution1D *output)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output);
+}
+
+void CLHistogramBorderKernel::configure(CLCompileContext &compile_context, const ICLImage *input, ICLDistribution1D *output)
{
ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input);
ARM_COMPUTE_ERROR_ON(nullptr == output);
// Create kernel
bool is_fixed_size = (256 == num_bins) && (1 == window_size) && (0 == offset) && (256 == offrange);
const std::string kernel_name = is_fixed_size ? "hist_border_kernel_fixed" : "hist_border_kernel";
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name));
+ _kernel = create_kernel(compile_context, kernel_name);
// Set static kernel arguments
unsigned int idx = num_arguments_per_2D_tensor(); //Skip the input and output parameters
void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation,
unsigned int num_groups)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output, kernel_dims, conv_info, has_bias, dilation, num_groups);
+}
+
+void CLIm2ColKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias,
+ const Size2D &dilation,
+ unsigned int num_groups)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), kernel_dims, conv_info, has_bias, dilation, num_groups));
Im2ColConfiguration im2col_config = configure_opencl_kernel(input->info(), kernel_dims, conv_info, has_bias, dilation, num_groups);
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(im2col_config.kernel_name, im2col_config.build_options));
+ _kernel = create_kernel(compile_context, im2col_config.kernel_name, im2col_config.build_options);
_input = input;
_output = output;
}
void CLInstanceNormalizationLayerKernel::configure(ICLTensor *input, ICLTensor *output, const InstanceNormalizationLayerKernelInfo &info)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output, info);
+}
+
+void CLInstanceNormalizationLayerKernel::configure(CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const InstanceNormalizationLayerKernelInfo &info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input);
build_opts.add_option_if(_input->info()->data_layout() == DataLayout::NHWC, "-DNHWC");
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("instance_normalization", build_opts.options()));
+ _kernel = create_kernel(compile_context, "instance_normalization", build_opts.options());
// Configure kernel window
auto win_config = validate_and_configure_window(_input->info(), _output->info());
using namespace arm_compute;
void CLIntegralImageHorKernel::configure(const ICLTensor *input, ICLTensor *output)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output);
+}
+
+void CLIntegralImageHorKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U32);
// Create kernel
const std::string kernel_name = std::string("integral_horizontal");
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name));
+ _kernel = create_kernel(compile_context, kernel_name);
// Configure kernel window
const unsigned int num_elems_processed_per_iteration = input->info()->dimension(0);
}
void CLIntegralImageVertKernel::configure(ICLTensor *in_out)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), in_out);
+}
+
+void CLIntegralImageVertKernel::configure(CLCompileContext &compile_context, ICLTensor *in_out)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(in_out, 1, DataType::U32);
// Create kernel
const std::string kernel_name = std::string("integral_vertical");
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name));
+ _kernel = create_kernel(compile_context, kernel_name);
// Configure kernel window
constexpr unsigned int num_elems_processed_per_iteration_x = 8;
}
void CLL2NormalizeLayerKernel::configure(const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, int axis, float epsilon)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, sum, output, axis, epsilon);
+}
+
+void CLL2NormalizeLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, int axis, float epsilon)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, sum, output);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), sum->info(), output->info(), axis, epsilon));
default:
ARM_COMPUTE_ERROR("Axis not supported");
}
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("l2_normalize_" + kernel_name, build_opts));
+ _kernel = create_kernel(compile_context, "l2_normalize_" + kernel_name, build_opts);
// Set epsilon argument
if(input->info()->data_type() == DataType::F32)
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
void CLLKTrackerInitKernel::configure(const ICLKeyPointArray *old_points, const ICLKeyPointArray *new_points_estimates,
ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal,
bool use_initial_estimate, size_t level, size_t num_levels, float pyramid_scale)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), old_points, new_points_estimates, old_points_internal, new_points_internal, use_initial_estimate, level, num_levels, pyramid_scale);
+}
+
+void CLLKTrackerInitKernel::configure(CLCompileContext &compile_context, const ICLKeyPointArray *old_points, const ICLKeyPointArray *new_points_estimates,
+ ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal,
+ bool use_initial_estimate, size_t level, size_t num_levels, float pyramid_scale)
{
ARM_COMPUTE_ERROR_ON(old_points == nullptr);
{
kernel_name += (use_initial_estimate) ? std::string("_max_initial_estimate") : std::string("_max");
}
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name));
+ _kernel = create_kernel(compile_context, kernel_name);
// Set static kernel arguments
unsigned int idx = 0;
}
void CLLKTrackerFinalizeKernel::configure(ICLLKInternalKeypointArray *new_points_internal, ICLKeyPointArray *new_points)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), new_points_internal, new_points);
+}
+
+void CLLKTrackerFinalizeKernel::configure(CLCompileContext &compile_context, ICLLKInternalKeypointArray *new_points_internal, ICLKeyPointArray *new_points)
{
ARM_COMPUTE_ERROR_ON(new_points_internal == nullptr);
ARM_COMPUTE_ERROR_ON(new_points == nullptr);
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("finalize"));
+ _kernel = create_kernel(compile_context, "finalize");
// Set static kernel arguments
unsigned int idx = 0;
ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal,
ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival,
size_t window_dimension, size_t level)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), old_input, old_scharr_gx, old_scharr_gy, old_points_internal, new_points_internal, coeff_table, old_ival, window_dimension, level);
+}
+
+void CLLKTrackerStage0Kernel::configure(CLCompileContext &compile_context, const ICLTensor *old_input, const ICLTensor *old_scharr_gx, const ICLTensor *old_scharr_gy,
+ ICLLKInternalKeypointArray *old_points_internal, ICLLKInternalKeypointArray *new_points_internal,
+ ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival,
+ size_t window_dimension, size_t level)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(old_input, 1, DataType::U8);
};
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("lktracker_stage0"));
+ _kernel = create_kernel(compile_context, "lktracker_stage0");
// Set arguments
unsigned int idx = 3 * num_arguments_per_2D_tensor();
void CLLKTrackerStage1Kernel::configure(const ICLTensor *new_input, ICLLKInternalKeypointArray *new_points_internal, ICLCoefficientTableArray *coeff_table, ICLOldValArray *old_ival,
Termination termination, float epsilon, size_t num_iterations, size_t window_dimension, size_t level)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), new_input, new_points_internal, coeff_table, old_ival, termination, epsilon, num_iterations, window_dimension, level);
+}
+
+void CLLKTrackerStage1Kernel::configure(CLCompileContext &compile_context, const ICLTensor *new_input, ICLLKInternalKeypointArray *new_points_internal, ICLCoefficientTableArray *coeff_table,
+ ICLOldValArray *old_ival,
+ Termination termination, float epsilon, size_t num_iterations, size_t window_dimension, size_t level)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(new_input, 1, DataType::U8);
const int term_epsilon = (termination == Termination::TERM_CRITERIA_EPSILON || termination == Termination::TERM_CRITERIA_BOTH) ? 1 : 0;
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("lktracker_stage1"));
+ _kernel = create_kernel(compile_context, "lktracker_stage1");
// Set static kernel arguments
unsigned int idx = num_arguments_per_2D_tensor();
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
} // namespace
void CLLocallyConnectedMatrixMultiplyKernel::configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input0, input1, output);
+}
+
+void CLLocallyConnectedMatrixMultiplyKernel::configure(CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input0, input1, output);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input0->info(), input1->info(), output->info()));
// Create kernel
std::string data_type_name = lower_string(string_from_data_type(input0->info()->data_type()));
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(("gemm_lc_vm_" + data_type_name), build_opts));
+ _kernel = create_kernel(compile_context, ("gemm_lc_vm_" + data_type_name), build_opts);
// Configure kernel window
auto win_config = validate_and_configure_window(input0->info(), input1->info(), output->info());
void CLMagnitudePhaseKernel::configure(const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase,
MagnitudeType mag_type, PhaseType phase_type)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), gx, gy, magnitude, phase, mag_type, phase_type);
+}
+
+void CLMagnitudePhaseKernel::configure(CLCompileContext &compile_context, const ICLTensor *gx, const ICLTensor *gy, ICLTensor *magnitude, ICLTensor *phase,
+ MagnitudeType mag_type, PhaseType phase_type)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(gx, 1, DataType::S16, DataType::S32);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(gy, 1, DataType::S16, DataType::S32);
// Create kernel
const std::string kernel_name = std::string("magnitude_phase");
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts);
// Configure kernel window
constexpr unsigned int num_elems_processed_per_iteration = 16;
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
}
void CLMeanStdDevKernel::configure(const ICLImage *input, float *mean, cl::Buffer *global_sum, float *stddev, cl::Buffer *global_sum_squared)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, mean, global_sum, stddev, global_sum_squared);
+}
+
+void CLMeanStdDevKernel::configure(CLCompileContext &compile_context, const ICLImage *input, float *mean, cl::Buffer *global_sum, float *stddev, cl::Buffer *global_sum_squared)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, mean, global_sum);
ARM_COMPUTE_ERROR_ON(stddev && nullptr == global_sum_squared);
build_opts.insert("-DSTDDEV");
}
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("mean_stddev_accumulate", build_opts));
+ _kernel = create_kernel(compile_context, "mean_stddev_accumulate", build_opts);
// Set fixed arguments
unsigned int idx = num_arguments_per_2D_tensor(); //Skip the input parameters
}
void CLMeanStdDevNormalizationKernel::configure(ICLTensor *input, ICLTensor *output, float epsilon)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output, epsilon);
+}
+
+void CLMeanStdDevNormalizationKernel::configure(CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, float epsilon)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input);
build_opts.add_option_if(_run_in_place, "-DIN_PLACE");
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("mean_stddev_normalization", build_opts.options()));
+ _kernel = create_kernel(compile_context, "mean_stddev_normalization", build_opts.options());
// Configure kernel window
auto win_config = validate_and_configure_window(input->info(), (_run_in_place) ? nullptr : output->info());
}
void CLMedian3x3Kernel::configure(const ICLTensor *input, ICLTensor *output, bool border_undefined)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output, border_undefined);
+}
+
+void CLMedian3x3Kernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
// Create kernel
const std::string kernel_name = std::string("non_linear_filter_box3x3");
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, { "-DMEDIAN" }));
+ _kernel = create_kernel(compile_context, kernel_name, { "-DMEDIAN" });
// Configure kernel window
constexpr unsigned int num_elems_processed_per_iteration = 8;
void CLMemsetKernel::configure(ICLTensor *tensor,
const PixelValue &constant_value,
Window *window)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), tensor, constant_value, window);
+}
+
+void CLMemsetKernel::configure(CLCompileContext &compile_context, ICLTensor *tensor,
+ const PixelValue &constant_value,
+ Window *window)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(tensor);
ARM_COMPUTE_ERROR_THROW_ON(validate(tensor->info(), constant_value, window));
build_opts.add_option("-DCONSTANT_VALUE=" + string_from_pixel_value(constant_value, data_type));
build_opts.add_option_if(multi_access_x, "-DVEC_SIZE=" + support::cpp11::to_string(vec_size_x));
build_opts.add_option_if(multi_access_x && remainder_x, "-DLAST_ACCESSED_X=" + support::cpp11::to_string(std::max<int>(output_width_x - vec_size_x, 0)));
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("memset", build_opts.options()));
+ _kernel = create_kernel(compile_context, "memset", build_opts.options());
}
Status CLMemsetKernel::validate(const ITensorInfo *tensor, const PixelValue &constant_value, Window *window)
}
void CLMinMaxLayerKernel::configure(const ICLTensor *input, ICLTensor *output)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output);
+}
+
+void CLMinMaxLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info()));
build_opts.emplace("-DDEPTH=" + support::cpp11::to_string(input->info()->dimension(2)));
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("minmax_layer", build_opts));
+ _kernel = create_kernel(compile_context, "minmax_layer", build_opts);
auto win_config = validate_and_configure_window(input->info(), output->info());
}
void CLMinMaxKernel::configure(const ICLImage *input, cl::Buffer *min_max)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, min_max);
+}
+
+void CLMinMaxKernel::configure(CLCompileContext &compile_context, const ICLImage *input, cl::Buffer *min_max)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S16, DataType::F32);
ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input);
}
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("minmax", build_opts));
+ _kernel = create_kernel(compile_context, "minmax", build_opts);
// Set fixed arguments
unsigned int idx = num_arguments_per_2D_tensor(); //Skip the input and output parameters
}
void CLMinMaxLocationKernel::configure(const ICLImage *input, cl::Buffer *min_max, cl::Buffer *min_max_count, ICLCoordinates2DArray *min_loc, ICLCoordinates2DArray *max_loc)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, min_max, min_max_count, min_loc, max_loc);
+}
+
+void CLMinMaxLocationKernel::configure(CLCompileContext &compile_context, const ICLImage *input, cl::Buffer *min_max, cl::Buffer *min_max_count, ICLCoordinates2DArray *min_loc,
+ ICLCoordinates2DArray *max_loc)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S16, DataType::F32);
ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input);
}
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("minmaxloc", build_opts));
+ _kernel = create_kernel(compile_context, "minmaxloc", build_opts);
// Set static arguments
unsigned int idx = num_arguments_per_2D_tensor(); //Skip the input and output parameters
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
void CLNonLinearFilterKernel::configure(const ICLTensor *input, ICLTensor *output, NonLinearFilterFunction function,
unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask,
bool border_undefined)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output, function, mask_size, pattern, mask, border_undefined);
+}
+
+void CLNonLinearFilterKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, NonLinearFilterFunction function,
+ unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask,
+ bool border_undefined)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
ss << "non_linear_filter_" << pattern_name << mask_size << "x" << mask_size;
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(ss.str(), build_opts));
+ _kernel = create_kernel(compile_context, ss.str(), build_opts);
// Configure kernel window
constexpr unsigned int num_elems_processed_per_iteration = 8;
/*
- * Copyright (c) 2016-2018 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
}
void CLNonMaximaSuppression3x3Kernel::configure(const ICLTensor *input, ICLTensor *output, bool border_undefined)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output, border_undefined);
+}
+
+void CLNonMaximaSuppression3x3Kernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, bool border_undefined)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::F32);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::F32);
// Create kernel
std::set<std::string> build_opts = { ("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())) };
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("non_max_suppression", build_opts));
+ _kernel = create_kernel(compile_context, "non_max_suppression", build_opts);
// Configure kernel window
constexpr unsigned int num_elems_processed_per_iteration = 8;
}
void CLNormalizationLayerKernel::configure(const ICLTensor *input, ICLTensor *output, NormalizationLayerInfo norm_info)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output, norm_info);
+}
+
+void CLNormalizationLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, NormalizationLayerInfo norm_info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
kernel_name = "normalization_layer_in_map_nchw";
}
}
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts.options());
// Configure kernel window
auto win_config = validate_and_configure_window(input->info(), output->info(), norm_info);
}
void CLNormalizePlanarYUVLayerKernel::configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *std)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output, mean, std);
+}
+
+void CLNormalizePlanarYUVLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *std)
{
// Perform validation step
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, mean, std);
// Create kernel
kernel_name += lower_string(string_from_data_layout(input->info()->data_layout()));
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts.options());
// Configure kernel window
auto win_config = validate_and_configure_window(input->info(), output->info(), mean->info(), std->info());
}
void CLPadLayerKernel::configure(const ICLTensor *input, ICLTensor *output, const PaddingList &padding, PixelValue constant_value, PaddingMode mode)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output, padding, constant_value, mode);
+}
+
+void CLPadLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PaddingList &padding, PixelValue constant_value, PaddingMode mode)
{
// Perform validation step
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
}
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts.options());
}
Status CLPadLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const PaddingList &padding, PixelValue constant_value, PaddingMode mode)
} // namespace
void CLPermuteKernel::configure(const ICLTensor *input, ICLTensor *output, const PermutationVector &perm)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output, perm);
+}
+
+void CLPermuteKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PermutationVector &perm)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), perm));
build_opts.add_option("-DP3=" + support::cpp11::to_string((_perm.num_dimensions() >= 3) ? perm[2] : 2));
build_opts.add_option("-DP4=" + support::cpp11::to_string((_perm.num_dimensions() >= 4) ? perm[3] : 3));
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("permute", build_opts.options()));
+ _kernel = create_kernel(compile_context, "permute", build_opts.options());
// Configure kernel window
Window win = calculate_max_window(*input->info(), Steps());
void CLPixelWiseMultiplicationKernel::configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float scale,
ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output, scale, overflow_policy, rounding_policy, act_info);
+}
+
+void CLPixelWiseMultiplicationKernel::configure(CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float scale,
+ ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input1->info(), input2->info(), output->info(),
}
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts.options());
// Set scale argument
unsigned int idx = 3 * num_arguments_per_3D_tensor(); // Skip the inputs and output parameters
}
void CLComplexPixelWiseMultiplicationKernel::configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output, act_info);
+}
+
+void CLComplexPixelWiseMultiplicationKernel::configure(CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const ActivationLayerInfo &act_info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments_complex(input1->info(), input2->info(), output->info(), act_info));
}
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("pixelwise_mul_complex", build_opts.options()));
+ _kernel = create_kernel(compile_context, "pixelwise_mul_complex", build_opts.options());
ICLKernel::configure_internal(win_config.second);
}
}
void CLPoolingLayerKernel::configure(const ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info, ICLTensor *indices)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output, pool_info, indices);
+}
+
+void CLPoolingLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info, ICLTensor *indices)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
std::string kernel_name = ((is_pool3x3_stride_le3) ? "pooling_layer_optimized_" : "pooling_layer_")
+ support::cpp11::to_string(pool_size_x);
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts.options());
}
else // Run general case
{
std::string kernel_name = is_data_type_quantized_asymmetric(data_type) ? "pooling_layer_MxN_quantized_nchw" : "pooling_layer_MxN_nchw";
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts.options());
}
break;
}
build_opts.add_option_if(output->info()->tensor_shape().total_size_upper(3) > 1,
"-DDST_DEPTH=" + support::cpp11::to_string(output->info()->dimension(idx_height)));
std::string kernel_name = is_data_type_quantized_asymmetric(data_type) ? "pooling_layer_MxN_quantized_nhwc" : "pooling_layer_MxN_nhwc";
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts.options());
break;
}
default:
}
void CLPriorBoxLayerKernel::configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const PriorBoxLayerInfo &info, cl::Buffer *min, cl::Buffer *max, cl::Buffer *aspect_ratios)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output, info, min, max, aspect_ratios);
+}
+
+void CLPriorBoxLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, const PriorBoxLayerInfo &info, cl::Buffer *min,
+ cl::Buffer *max, cl::Buffer *aspect_ratios)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output);
}
unsigned int idx = num_arguments_per_2D_tensor();
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("prior_box_layer_nchw", build_opts.options()));
+ _kernel = create_kernel(compile_context, "prior_box_layer_nchw", build_opts.options());
_kernel.setArg(idx++, *_min);
_kernel.setArg(idx++, *_max);
}
void CLQuantizationLayerKernel::configure(const ICLTensor *input, ICLTensor *output)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output);
+}
+
+void CLQuantizationLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info()));
build_opts.add_option("-DMIN_QUANT_VAL=" + support::cpp11::to_string(min_max_quant_values.first));
build_opts.add_option("-DMAX_QUANT_VAL=" + support::cpp11::to_string(min_max_quant_values.second));
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("quantization_layer", build_opts.options()));
+ _kernel = create_kernel(compile_context, "quantization_layer", build_opts.options());
}
Status CLQuantizationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output)
}
void CLROIAlignLayerKernel::configure(const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, rois, output, pool_info);
+}
+
+void CLROIAlignLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, rois);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), rois->info(), output->info(), pool_info));
// Create kernel
const std::string kernel_name = (is_qasymm) ? "roi_align_layer_quantized" : "roi_align_layer";
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts.options());
ICLKernel::configure_internal(win_config.second);
}
}
void CLROIPoolingLayerKernel::configure(const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, rois, output, pool_info);
+}
+
+void CLROIPoolingLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, rois, output);
// Create kernel
std::string kernel_name = "roi_pooling_layer";
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts);
// Set static kernel arguments
unsigned int idx = 2 * num_arguments_per_3D_tensor() + num_arguments_per_1D_array();
}
void CLRangeKernel::configure(ICLTensor *output, const float start, const float end, const float step)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), output, start, end, step);
+}
+
+void CLRangeKernel::configure(CLCompileContext &compile_context, ICLTensor *output, const float start, const float end, const float step)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(output);
kernel_name += "_quantized";
}
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts.options());
ICLKernel::configure_internal(win_config.second);
// Set config_id for enabling LWS tuning
}
void CLReductionOperationKernel::configure(const ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op, unsigned int width)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output, axis, op, width);
+}
+
+void CLReductionOperationKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, unsigned int axis, ReductionOperation op, unsigned int width)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
default:
ARM_COMPUTE_ERROR("Not supported");
}
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("reduction_operation_" + kernel_axis_name, build_opts.options()));
+ _kernel = create_kernel(compile_context, "reduction_operation_" + kernel_axis_name, build_opts.options());
// Configure kernel window
auto win_config = validate_and_configure_window(_input->info(), _output->info(), axis, op);
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
}
void CLRemapKernel::configure(const ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output, InterpolationPolicy policy, bool border_undefined)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, map_x, map_y, output, policy, border_undefined);
+}
+
+void CLRemapKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *map_x, const ICLTensor *map_y, ICLTensor *output, InterpolationPolicy policy,
+ bool border_undefined)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
std::string interpolation_name = string_from_interpolation_policy(policy);
std::transform(interpolation_name.begin(), interpolation_name.end(), interpolation_name.begin(), ::tolower);
std::string kernel_name = "remap_" + interpolation_name;
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts);
// Configure window
constexpr unsigned int num_elems_processed_per_iteration = 4;
}
void CLReorgLayerKernel::configure(const ICLTensor *input, ICLTensor *output, int32_t stride)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output, stride);
+}
+
+void CLReorgLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t stride)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), stride));
build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
build_opts.add_option("-DSRC_DEPTH=" + support::cpp11::to_string(input->info()->dimension(idx_channel)));
build_opts.add_option("-DSTRIDE=" + support::cpp11::to_string(stride));
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts.options());
// Configure window
// auto inizialize the output tensor if not yet initialized
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
}
void CLReshapeLayerKernel::configure(const ICLTensor *input, ICLTensor *output)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output);
+}
+
+void CLReshapeLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info()));
// Create kernel
std::set<std::string> build_opts = { "-DDATA_TYPE=" + get_cl_unsigned_type_from_element_size(input->info()->element_size()) };
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("reshape_layer", build_opts));
+ _kernel = create_kernel(compile_context, "reshape_layer", build_opts);
// Add static arguments
const cl_int2 input_shape =
}
void CLReverseKernel::configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *axis)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output, axis);
+}
+
+void CLReverseKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const ICLTensor *axis)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, axis);
build_opts.add_option("-DDATA_TYPE=" + get_cl_unsigned_type_from_element_size(input->info()->element_size()));
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("reverse", build_opts.options()));
+ _kernel = create_kernel(compile_context, "reverse", build_opts.options());
// Set static kernel arguments
unsigned int idx = 2 * num_arguments_per_4D_tensor() + num_arguments_per_1D_tensor();
}
void CLScaleKernel::configure(const ICLTensor *input, ICLTensor *output, InterpolationPolicy policy, BorderMode border_mode, SamplingPolicy sampling_policy, bool align_corners)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output, policy, border_mode, sampling_policy, align_corners);
+}
+
+void CLScaleKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, InterpolationPolicy policy, BorderMode border_mode, SamplingPolicy sampling_policy,
+ bool align_corners)
{
_align_corners = policy == InterpolationPolicy::BILINEAR
&& sampling_policy == SamplingPolicy::TOP_LEFT
std::string kernel_name = "scale_" + interpolation_name;
kernel_name += call_quantized_kernel ? "_quantized_" : "_";
kernel_name += lower_string(string_from_data_layout(_data_layout));
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts.options());
unsigned int idx = is_nhwc ? 2 * num_arguments_per_4D_tensor() : 2 * num_arguments_per_2D_tensor(); //Skip the input and output parameters
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
}
void CLScharr3x3Kernel::configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output_x, output_y, border_undefined);
+}
+
+void CLScharr3x3Kernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
ARM_COMPUTE_ERROR_ON((output_x == nullptr) && (output_y == nullptr));
}
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("scharr3x3", build_opts));
+ _kernel = create_kernel(compile_context, "scharr3x3", build_opts);
// Configure kernel window
constexpr unsigned int num_elems_processed_per_iteration = 8;
{
}
void CLSelectKernel::configure(const ICLTensor *c, const ICLTensor *x, const ICLTensor *y, ICLTensor *output)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), c, x, y, output);
+}
+
+void CLSelectKernel::configure(CLCompileContext &compile_context, const ICLTensor *c, const ICLTensor *x, const ICLTensor *y, ICLTensor *output)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(c, x, y, output);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(c->info(), x->info(), y->info(), output->info()));
kernel_name += "_different_rank";
kernel_name += is_input_rank_greater_than_two ? "_n" : "_2";
}
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts.options());
// Configure kernel window
auto win_config = validate_and_configure_window(c->info(), x->info(), y->info(), output->info());
}
void CLSobel3x3Kernel::configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output_x, output_y, border_undefined);
+}
+
+void CLSobel3x3Kernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
ARM_COMPUTE_ERROR_ON((output_x == nullptr) && (output_y == nullptr));
// Create kernel
const std::string kernel_name = std::string("sobel3x3");
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts);
// Configure kernel window
constexpr unsigned int num_elems_processed_per_iteration = 8;
}
void CLSobel5x5HorKernel::configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output_x, output_y, border_undefined);
+}
+
+void CLSobel5x5HorKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
ARM_COMPUTE_ERROR_ON((output_x == nullptr) && (output_y == nullptr));
// Create kernel
const std::string kernel_name = std::string("sobel_separable1x5");
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts);
// Configure kernel window
constexpr unsigned int num_elems_processed_per_iteration = 8;
}
void CLSobel5x5VertKernel::configure(const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input_x, input_y, output_x, output_y, border_undefined);
+}
+
+void CLSobel5x5VertKernel::configure(CLCompileContext &compile_context, const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined)
{
ARM_COMPUTE_ERROR_ON((output_x == nullptr) && (output_y == nullptr));
// Create kernel
const std::string kernel_name = std::string("sobel_separable5x1");
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts);
const ICLTensor *input = _run_sobel_x ? _input_x : _input_y;
}
void CLSobel7x7HorKernel::configure(const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output_x, output_y, border_undefined);
+}
+
+void CLSobel7x7HorKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
ARM_COMPUTE_ERROR_ON((output_x == nullptr) && (output_y == nullptr));
}
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts);
// Configure kernel window
constexpr unsigned int num_elems_processed_per_iteration = 8;
}
void CLSobel7x7VertKernel::configure(const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input_x, input_y, output_x, output_y, border_undefined);
+}
+
+void CLSobel7x7VertKernel::configure(CLCompileContext &compile_context, const ICLTensor *input_x, const ICLTensor *input_y, ICLTensor *output_x, ICLTensor *output_y, bool border_undefined)
{
ARM_COMPUTE_ERROR_ON((output_x == nullptr) && (output_y == nullptr));
// Create kernel
const std::string kernel_name = std::string("sobel_separable7x1");
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts);
const ICLTensor *input = _run_sobel_x ? _input_x : _input_y;
}
void CLLogits1DMaxShiftExpSumKernel::configure(const ICLTensor *input, ICLTensor *max, ICLTensor *output, ICLTensor *sum, const SoftmaxKernelInfo &info)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, max, output, sum, info);
+}
+
+void CLLogits1DMaxShiftExpSumKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *max, ICLTensor *output, ICLTensor *sum, const SoftmaxKernelInfo &info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, max, sum, output);
}
// Create kernel.
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts.options());
// Set static arguments. Both the kernels use the same arguments
unsigned int idx = 4 * num_arguments_per_3D_tensor(); //Skip the input and output parameters
}
void CLLogits1DNormKernel::configure(const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, const SoftmaxKernelInfo &info)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, sum, output, info);
+}
+
+void CLLogits1DNormKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, const SoftmaxKernelInfo &info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, sum, output);
// Create kernel
std::string kernel_name = is_quantized_asymmetric ? "softmax_layer_norm_quantized" : "softmax_layer_norm";
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts.options());
// Configure window
auto win_config = validate_and_configure_window_1DNorm(input->info(), output->info(), sum->info(), info);
}
void CLSpaceToBatchLayerKernel::configure(const ICLTensor *input, const ICLTensor *block_shape, const ICLTensor *paddings, ICLTensor *output)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, block_shape, paddings, output);
+}
+
+void CLSpaceToBatchLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *block_shape, const ICLTensor *paddings, ICLTensor *output)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), block_shape->info(), paddings->info(), output->info()));
build_opts.add_option("-DWIDTH_IN=" + support::cpp11::to_string(input->info()->dimension(idx_width)));
build_opts.add_option("-DHEIGHT_IN=" + support::cpp11::to_string(input->info()->dimension(idx_height)));
build_opts.add_option("-DBATCH_IN=" + support::cpp11::to_string(input->info()->dimension(idx_batch)));
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("space_to_batch_" + lower_string(string_from_data_layout(input->info()->data_layout())), build_opts.options()));
+ _kernel = create_kernel(compile_context, "space_to_batch_" + lower_string(string_from_data_layout(input->info()->data_layout())), build_opts.options());
// Configure kernel window
Window win = calculate_max_window(*output->info(), Steps());
void CLSpaceToBatchLayerKernel::configure(const ICLTensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right,
ICLTensor *output)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, block_shape_x, block_shape_y, padding_left, padding_right, output);
+}
+
+void CLSpaceToBatchLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left,
+ const Size2D &padding_right,
+ ICLTensor *output)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
build_opts.add_option("-DPAD_RIGHT_X=" + support::cpp11::to_string(padding_right.x()));
build_opts.add_option("-DPAD_LEFT_Y=" + support::cpp11::to_string(padding_left.y()));
build_opts.add_option("-DPAD_RIGHT_Y=" + support::cpp11::to_string(padding_right.y()));
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("space_to_batch_static_" + lower_string(string_from_data_layout(input->info()->data_layout())), build_opts.options()));
+ _kernel = create_kernel(compile_context, "space_to_batch_static_" + lower_string(string_from_data_layout(input->info()->data_layout())), build_opts.options());
// Configure kernel window
Window win = calculate_max_window(*output->info(), Steps());
}
void CLSpaceToDepthLayerKernel::configure(const ICLTensor *input, ICLTensor *output, int32_t block_shape)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output, block_shape);
+}
+
+void CLSpaceToDepthLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, int32_t block_shape)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
build_opts.add_option("-DCHANNEL_SIZE=" + support::cpp11::to_string(output->info()->dimension(idx_channel)));
build_opts.add_option("-DBLOCK_SHAPE=" + support::cpp11::to_string(block_shape));
build_opts.add_option("-DWIDTH_IN=" + support::cpp11::to_string(output->info()->dimension(idx_width)));
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("space_to_depth_" + lower_string(string_from_data_layout(input->info()->data_layout())), build_opts.options()));
+ _kernel = create_kernel(compile_context, "space_to_depth_" + lower_string(string_from_data_layout(input->info()->data_layout())), build_opts.options());
// Configure kernel window
Window win = calculate_max_window(*output->info(), Steps());
}
void CLStackLayerKernel::configure(const ICLTensor *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, ICLTensor *output)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, axis, idx_input, num_tensors, output);
+}
+
+void CLStackLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, ICLTensor *output)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), axis, idx_input, num_tensors, output->info()));
build_opts.add_option("-DDST_DIM3=" + support::cpp11::to_string(output->info()->dimension(3)));
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("stack_layer", build_opts.options()));
+ _kernel = create_kernel(compile_context, "stack_layer", build_opts.options());
ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
ICLKernel::configure_internal(win_config.second);
void CLStridedSliceKernel::configure(const ICLTensor *input, ICLTensor *output,
const Coordinates &starts, const Coordinates &ends, const BiStrides &strides,
int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output, starts, ends, strides, begin_mask, end_mask, shrink_axis_mask);
+}
+
+void CLStridedSliceKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output,
+ const Coordinates &starts, const Coordinates &ends, const BiStrides &strides,
+ int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), starts, ends, strides, begin_mask, end_mask, shrink_axis_mask));
"-DDST_DEPTH=1");
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("strided_slice", build_opts.options()));
+ _kernel = create_kernel(compile_context, "strided_slice", build_opts.options());
// Set config_id for enabling LWS tuning
_config_id = "strided_slice";
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
using namespace arm_compute;
void CLTableLookupKernel::configure(const ICLTensor *input, const ICLLut *lut, ICLTensor *output)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, lut, output);
+}
+
+void CLTableLookupKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLLut *lut, ICLTensor *output)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S16);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16);
// Create kernel
std::string kernel_name = (DataType::S16 == lut->type()) ? "tablelookup_S16" : "tablelookup_U8";
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name));
+ _kernel = create_kernel(compile_context, kernel_name);
// Set lut argument
unsigned int idx = 2 * num_arguments_per_2D_tensor(); //Skip the input and output parameters
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
void CLThresholdKernel::configure(const ICLTensor *input, ICLTensor *output, uint8_t threshold,
uint8_t false_value, uint8_t true_value, ThresholdType type, uint8_t upper)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output, threshold, false_value, true_value, type, upper);
+}
+
+void CLThresholdKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, uint8_t threshold,
+ uint8_t false_value, uint8_t true_value, ThresholdType type, uint8_t upper)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
}
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name));
+ _kernel = create_kernel(compile_context, kernel_name);
// Set arguments
unsigned int idx = 2 * num_arguments_per_2D_tensor(); //Skip the input and output parameters
}
void CLTileKernel::configure(const ICLTensor *input, ICLTensor *output, const Multiples &multiples)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output, multiples);
+}
+
+void CLTileKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Multiples &multiples)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
build_opts.add_option("-DDST_DEPTH=" + support::cpp11::to_string(output->info()->dimension(2)));
build_opts.add_option_if(multi_access_x, "-DOFFSET=" + support::cpp11::to_string(offset));
build_opts.add_option_if(multi_access_x, "-DVEC_SIZE=" + support::cpp11::to_string(vec_size_x));
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("tile", build_opts.options()));
+ _kernel = create_kernel(compile_context, "tile", build_opts.options());
// Configure window without padding
Window win = calculate_max_window(*output->info());
}
void CLTransposeKernel::configure(const ICLTensor *input, ICLTensor *output)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output);
+}
+
+void CLTransposeKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
data_type_in_bytes << input->info()->element_size();
build_opts.emplace("-DDATA_TYPE_IN_BYTES=" + data_type_in_bytes.str());
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("transpose", build_opts));
+ _kernel = create_kernel(compile_context, "transpose", build_opts);
// Configure kernel window
auto win_config = validate_and_configure_window(input->info(), output->info());
}
void CLUpsampleLayerKernel::configure(const ICLTensor *input, ICLTensor *output, const Size2D &info, const InterpolationPolicy upsampling_policy)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output, info, upsampling_policy);
+}
+
+void CLUpsampleLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const Size2D &info, const InterpolationPolicy upsampling_policy)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_UNUSED(upsampling_policy);
build_opts.add_option_if(multi_access_x, "-DVEC_SIZE_OUT=" + support::cpp11::to_string(num_elems_processed_per_iteration_x));
build_opts.add_option_if(multi_access_x, "-DLAST_ACCESSED_X_IN=" + support::cpp11::to_string(std::max<int>(_input->info()->dimension(0) - _num_elems_processed_per_iteration_input_x, 0)));
build_opts.add_option_if(multi_access_x, "-DLAST_ACCESSED_X_OUT=" + support::cpp11::to_string(std::max<int>(output_width_x - num_elems_processed_per_iteration_x, 0)));
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("upsample_layer_" + lower_string(string_from_data_layout(input->info()->data_layout())), build_opts.options()));
+ _kernel = create_kernel(compile_context, "upsample_layer_" + lower_string(string_from_data_layout(input->info()->data_layout())), build_opts.options());
ICLKernel::configure_internal(win);
}
}
void CLWarpAffineKernel::configure(const ICLTensor *input, ICLTensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output, matrix, policy);
+}
+
+void CLWarpAffineKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
std::string interpolation_name = string_from_interpolation_policy(policy);
std::transform(interpolation_name.begin(), interpolation_name.end(), interpolation_name.begin(), ::tolower);
const std::string kernel_name = "warp_affine_" + interpolation_name;
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, options));
+ _kernel = create_kernel(compile_context, kernel_name, options);
// Set static kernel arguments
unsigned int idx = 2 * num_arguments_per_2D_tensor(); //Skip the input and output parameters
/*
- * Copyright (c) 2016-2018 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
}
void CLWarpPerspectiveKernel::configure(const ICLTensor *input, ICLTensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output, matrix, policy);
+}
+
+void CLWarpPerspectiveKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const std::array<float, 9> &matrix, InterpolationPolicy policy)
{
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
std::string interpolation_name = string_from_interpolation_policy(policy);
std::transform(interpolation_name.begin(), interpolation_name.end(), interpolation_name.begin(), ::tolower);
std::string kernel_name = "warp_perspective_" + interpolation_name;
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, options));
+ _kernel = create_kernel(compile_context, kernel_name, options);
// Set static kernel arguments
unsigned int idx = 2 * num_arguments_per_2D_tensor(); //Skip the input and output parameters
}
void CLWeightsReshapeKernel::configure(const ICLTensor *input, const ICLTensor *biases, ICLTensor *output, unsigned int num_groups)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, biases, output, num_groups);
+}
+
+void CLWeightsReshapeKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *biases, ICLTensor *output, unsigned int num_groups)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
build_opts.add_option_if(biases != nullptr, "-DHAS_BIAS");
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("reshape_to_columns", build_opts.options()));
+ _kernel = create_kernel(compile_context, "reshape_to_columns", build_opts.options());
// Configure window
Window win = calculate_max_window(*input->info(), Steps());
}
void CLWidthConcatenate2TensorsKernel::configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input1, input2, output);
+}
+
+void CLWidthConcatenate2TensorsKernel::configure(CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input1->info(), input2->info(), output->info()));
}
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("concatenate_width_x2", build_opts.options()));
+ _kernel = create_kernel(compile_context, "concatenate_width_x2", build_opts.options());
// Configure kernel window
auto win_config = validate_and_configure_window(input1->info(), input2->info(), output->info());
}
void CLWidthConcatenate4TensorsKernel::configure(const ICLTensor *input1, const ICLTensor *input2, const ICLTensor *input3, const ICLTensor *input4, ICLTensor *output)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input1, input2, input3, input4, output);
+}
+
+void CLWidthConcatenate4TensorsKernel::configure(CLCompileContext &compile_context, const ICLTensor *input1, const ICLTensor *input2, const ICLTensor *input3, const ICLTensor *input4,
+ ICLTensor *output)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, input3, input4, output);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input1->info(), input2->info(), input3->info(), input4->info(), output->info()));
}
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("concatenate_width_x4", build_opts.options()));
+ _kernel = create_kernel(compile_context, "concatenate_width_x4", build_opts.options());
// Configure kernel window
auto win_config = validate_and_configure_window(input1->info(), input2->info(), input3->info(), input4->info(), output->info());
}
void CLWidthConcatenateLayerKernel::configure(const ICLTensor *input, unsigned int width_offset, ICLTensor *output)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, width_offset, output);
+}
+
+void CLWidthConcatenateLayerKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, unsigned int width_offset, ICLTensor *output)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), width_offset, output->info()));
}
// Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("concatenate_width", build_opts.options()));
+ _kernel = create_kernel(compile_context, "concatenate_width", build_opts.options());
// Configure kernel window
auto win_config = validate_and_configure_window(input->info(), width_offset, output->info());
ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config));
}
void CLWinogradFilterTransformKernel::configure(const ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output, winograd_info);
+}
+
+void CLWinogradFilterTransformKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
// Create kernel
std::string kernel_name = "winograd_filter_transform_" + output_tile_size.to_string() + "_" + kernel_size.to_string() + "_" + lower_string(string_from_data_layout(input->info()->data_layout()));
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts.options());
_input = input;
_output = output;
}
void CLWinogradInputTransformKernel::configure(const ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output, winograd_info);
+}
+
+void CLWinogradInputTransformKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output, const WinogradInfo &winograd_info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), winograd_info));
kernel_name += support::cpp11::to_string(_step_z);
kernel_name += "_" + lower_string(string_from_data_layout(_data_layout));
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts.options());
// Create window and update padding
auto win_config = validate_and_configure_window(input->info(), output->info(), winograd_info);
}
void CLWinogradOutputTransformKernel::configure(const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const WinogradInfo &winograd_info, const ActivationLayerInfo &act_info)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, bias, output, winograd_info, act_info);
+}
+
+void CLWinogradOutputTransformKernel::configure(CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *bias, ICLTensor *output, const WinogradInfo &winograd_info,
+ const ActivationLayerInfo &act_info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
// Create kernel
std::string kernel_name = "winograd_output_transform_" + output_tile_size.to_string() + "_" + kernel_size.to_string() + "_" + lower_string(string_from_data_layout(winograd_info.output_data_layout));
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts.options());
// Configure kernel window
auto win_config = validate_and_configure_window(input->info(), (bias != nullptr ? bias->info() : nullptr), output->info(), winograd_info.output_tile_size);
}
void CLYOLOLayerKernel::configure(ICLTensor *input, ICLTensor *output, const ActivationLayerInfo &act_info, int32_t num_classes)
+{
+ configure(CLKernelLibrary::get().get_compile_context(), input, output, act_info, num_classes);
+}
+
+void CLYOLOLayerKernel::configure(CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, const ActivationLayerInfo &act_info, int32_t num_classes)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input);
// Create kernel
std::string kernel_name = std::string("yolo_layer_") + lower_string(string_from_data_layout(input->info()->data_layout()));
- _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
+ _kernel = create_kernel(compile_context, kernel_name, build_opts.options());
// Make sure _kernel is initialized before calling the parent's configure
_input = input;
void CLActivationLayer::configure(ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info)
{
- auto core_ctx = _ctx ? _ctx->core_runtime_context() : /* Legacy */ nullptr;
-
- auto k = arm_compute::support::cpp14::make_unique<CLActivationLayerKernel>(core_ctx);
+ auto k = arm_compute::support::cpp14::make_unique<CLActivationLayerKernel>();
k->configure(input, output, act_info);
_kernel = std::move(k);
}