From: 장지섭/On-Device Lab(SR)/Engineer/삼성전자 Date: Thu, 31 Jan 2019 01:36:48 +0000 (+0900) Subject: Remove unnecessary kernels from ARMComputeEX (#4325) X-Git-Tag: submit/tizen/20190325.013700~284 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=563be3bef9b56806071416cde197574124180a55;p=platform%2Fcore%2Fml%2Fnnfw.git Remove unnecessary kernels from ARMComputeEX (#4325) This commit removes unnecessary kernels from ARMComputeEX Signed-off-by: jiseob.jang --- diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLBatchToSpaceNDKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLBatchToSpaceNDKernel.h deleted file mode 100644 index 1387897..0000000 --- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLBatchToSpaceNDKernel.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef __ARM_COMPUTE_CLBATCH_TO_SPACE_ND_KERNEL_H__ -#define __ARM_COMPUTE_CLBATCH_TO_SPACE_ND_KERNEL_H__ - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to perform BATCH_TO_SPACE_ND operation */ -class CLBatchToSpaceNDKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLBatchToSpaceNDKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLBatchToSpaceNDKernel(const CLBatchToSpaceNDKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLBatchToSpaceNDKernel &operator=(const CLBatchToSpaceNDKernel &) = delete; - /** Allow instances of this class to be moved */ - CLBatchToSpaceNDKernel(CLBatchToSpaceNDKernel &&) = default; - /** Allow instances of this class to be moved */ - CLBatchToSpaceNDKernel &operator=(CLBatchToSpaceNDKernel &&) = default; - /** Default destructor */ - ~CLBatchToSpaceNDKernel() = default; - /** Initialise the kernel's input and output. - * - * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. - * @param[in] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32. - */ - void configure(const ICLTensor *input, ICLTensor *output, const int32_t *block_size); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; /**< Source tensor */ - ICLTensor *_output; /**< Destination tensor */ -}; - -} // namespace arm_compute -#endif /* __ARM_COMPUTE_CLSPACE_TO_BATCH_ND_KERNEL_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLLogicalNotKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLLogicalNotKernel.h deleted file mode 100644 index c199d01..0000000 --- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLLogicalNotKernel.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef __ARM_COMPUTE_CLLOGICALNOTKERNEL_H__ -#define __ARM_COMPUTE_CLLOGICALNOTKERNEL_H__ - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to return truth values of two input tensors for LogicalNOT*/ -class CLLogicalNotKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLLogicalNotKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers). */ - CLLogicalNotKernel(const CLLogicalNotKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers). */ - CLLogicalNotKernel &operator=(const CLLogicalNotKernel &) = delete; - /** Allow instances of this class to be moved */ - CLLogicalNotKernel(CLLogicalNotKernel &&) = default; - /** Allow instances of this class to be moved */ - CLLogicalNotKernel &operator=(CLLogicalNotKernel &&) = default; - /** Initialize the kernel's input, output. - * - * @param[in] input Source tensor. - * @param[out] output Output tensor. - */ - void configure(const ICLTensor *input, ICLTensor *output); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - ICLTensor *_output; -}; - -} // namespace arm_compute -#endif /*__ARM_COMPUTE_CLLOGICALNOTKERNEL_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLNormalizationLayerExKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLNormalizationLayerExKernel.h deleted file mode 100644 index 181a622..0000000 --- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLNormalizationLayerExKernel.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef __ARM_COMPUTE_CLNORMALIZATIONLAYEREXKERNEL_H__ -#define __ARM_COMPUTE_CLNORMALIZATIONLAYEREXKERNEL_H__ - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the normalization layer kernel. - */ -class CLNormalizationLayerExKernel : public ICLKernel -{ -public: - /** Constructor */ - CLNormalizationLayerExKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLNormalizationLayerExKernel(const CLNormalizationLayerExKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLNormalizationLayerExKernel &operator=(const CLNormalizationLayerExKernel &) = delete; - /** Default Move Constructor. */ - CLNormalizationLayerExKernel(CLNormalizationLayerExKernel &&) = default; - /** Default move assignment operator */ - CLNormalizationLayerExKernel &operator=(CLNormalizationLayerExKernel &&) = default; - /** Set the input and output tensors. - * - * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions - * [width, height, IFM], - * and an optional 4th dimension for batch of inputs. Data types supported: - * F16/F32. - * @param[out] output Destination tensor. Output will have the same number of dimensions as - * input. Data types supported: same as @p input. - * @param[in] norm_info Normalization layer information like the normalization type, - * normalization size and other parameters. - */ - void configure(const ICLTensor *input, ICLTensor *output, NormalizationLayerInfo norm_info); - /** Static function to check if given info will lead to a valid configuration of @ref - * CLNormalizationLayerKernel - * - * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions - * [width, height, IFM], - * and an optional 4th dimension for batch of inputs. Data types supported: - * F16/F32. - * @param[in] output Destination tensor. Output will have the same number of dimensions as - * input. Data types supported: same as @p input. - * @param[in] norm_info Normalization layer information like the normalization type, normalization - * size and other parameters. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, - NormalizationLayerInfo norm_info); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - const ICLTensor *_input; - ICLTensor *_output; - BorderSize _border_size; - bool _is_in_map; -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_CLNORMALIZATIONLAYEREXKERNEL_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPixelWiseDivisionKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPixelWiseDivisionKernel.h deleted file mode 100644 index d579f5d..0000000 --- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLPixelWiseDivisionKernel.h +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file CLPixelWiseDivisionKernel.h - * @ingroup COM_AI_RUNTIME - * @brief This file defines CLPixelWiseDivisionKernel class - */ - -#ifndef __ARM_COMPUTE_CLPIXELWISEDIVISIONKERNEL_H__ -#define __ARM_COMPUTE_CLPIXELWISEDIVISIONKERNEL_H__ - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** - * @brief Interface for the pixelwise division kernel. - */ -class CLPixelWiseDivisionKernel : public ICLKernel -{ -public: - /** - * @brief Construct a CLPixelWiseDivisionKernel object - */ - CLPixelWiseDivisionKernel(); - - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers). - */ - CLPixelWiseDivisionKernel(const CLPixelWiseDivisionKernel &) = delete; - - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers). - */ - CLPixelWiseDivisionKernel &operator=(const CLPixelWiseDivisionKernel &) = delete; - - /** - * @brief Construct a CLPixelWiseDivisionKernel object by using move constructor - * @param[in] CLPixelWiseDivisionKernel object to move - */ - CLPixelWiseDivisionKernel(CLPixelWiseDivisionKernel &&) = default; - - /** - * @brief Allow instances of this class to be moved - * @param[in] CLPixelWiseDivisionKernel object to move - */ - CLPixelWiseDivisionKernel &operator=(CLPixelWiseDivisionKernel &&) = default; - - /** - * @brief Initialise the kernel's input, output and border mode. - * @param[in] input1 An input tensor. Data types supported: U8/S16/F16/F32. - * @param[in] input2 An input tensor. Data types supported: same as @p input1. - * @param[out] output The output tensor, Data types supported: same as @p input1. Note: - * U8 requires both inputs to be U8. - * @param[in] scale Scale to apply after division. - * Scale must be positive and its value must be either 1/255 or 1/2^n - * where n is between 0 and 15. - * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate - * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest - * even. - * @return N/A - */ - void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float scale, - ConvertPolicy overflow_policy, RoundingPolicy rounding_policy); - - /** - * @brief Static function to check if given info will lead to a valid configuration of @ref - * CLPixelWiseDivisionKernel - * @param[in] input1 An input tensor info. Data types supported: U8/S16/F16/F32. - * @param[in] input2 An input tensor info. Data types supported: same as @p input1. - * @param[in] output The output tensor info, Data types supported: same as @p input1. - * Note: U8 requires both inputs to be U8. - * @param[in] scale Scale to apply after division. - * Scale must be positive and its value must be either 1/255 or 1/2^n - * where n is between 0 and 15. - * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate - * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest even. - * @return a status - */ - static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, - const ITensorInfo *output, float scale, ConvertPolicy overflow_policy, - RoundingPolicy rounding_policy); - - /** - * @brief Enqueue the OpenCL kernel to process the given window on the passed OpenCL command - * queue. - * @note The queue is *not* flushed by this method, and therefore the kernel will not have - * been executed by the time this method returns. - * @param[in] window Region on which to execute the kernel. (Must be a valid region of - * the window returned by window()). - * @param[in,out] queue Command queue on which to enqueue the kernel.@return N/A - * @return N/A - */ - void run(const Window &window, cl::CommandQueue &queue) override; - - /** - * @brief The size of the border for that kernel - * @return The width in number of elements of the border. - */ - BorderSize border_size() const override; - -private: - const ICLTensor *_input1; - const ICLTensor *_input2; - ICLTensor *_output; -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_CLPIXELWISEDIVISIONKERNEL_H__ */ diff --git a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLStridedSliceExKernel.h b/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLStridedSliceExKernel.h deleted file mode 100644 index 6368c38..0000000 --- a/libs/ARMComputeEx/arm_compute/core/CL/kernels/CLStridedSliceExKernel.h +++ /dev/null @@ -1,142 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file CLStridedSliceExKernel.h - * @ingroup COM_AI_RUNTIME - * @brief This file defines CLStridedSliceExKernel class - */ - -#ifndef __ARM_COMPUTE_CLSTRIDEDSLICEEXKERNEL_H__ -#define __ARM_COMPUTE_CLSTRIDEDSLICEEXKERNEL_H__ - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** -* @brief Class to define an interface for the kernel to extract a strided slice of a tensor -*/ -class CLStridedSliceExKernel : public ICLKernel -{ -public: - /** - * @brief Construct a CLStridedSliceExKernel object - * */ - CLStridedSliceExKernel(); - - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers) - * */ - CLStridedSliceExKernel(const CLStridedSliceExKernel &) = delete; - - /** - * @brief Prevent instances of this class from being copied (As this class contains pointers) - * */ - CLStridedSliceExKernel &operator=(const CLStridedSliceExKernel &) = delete; - - /** - * @brief Construct a CLStridedSliceExKernel object by using default move constructor - * @param[in] CLStridedSliceExKernel object to move - * */ - CLStridedSliceExKernel(CLStridedSliceExKernel &&) = default; - - /** - * @brief Move assignment operator - * @param[in] CLStridedSliceExKernel object to move - * */ - CLStridedSliceExKernel &operator=(CLStridedSliceExKernel &&) = default; - - /** - * @brief Destruct this object - * */ - ~CLStridedSliceExKernel() = default; - - /** - * @brief Set the input and output of the kernel - * @param[in] input Source tensor. Data type supported: - * U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 - * @param[out] output Destination tensor. Data type supported: Same as @p input - * @param[in] beginData The begin tensor. Data types supported: S32. - * The number of dimensions must be 1. - * The length must be the same as the number of dimensions of input. - * @param[in] endData The end tensor. Data types supported: S32. - * The number of dimensions must be 1. - * The length must be the same as the number of dimensions of input. - * @param[in] strideData The stride tensor. Data types supported: S32. - * The number of dimensions must be 1. - * The length must be the same as the number of dimensions of input. - * @param[in] beginMask Mask for begin - * @param[in] endMask Mask for end - * @param[in] shrinkAxisMask Mask for shrink axis. - * @return N/A - */ - void configure(const ICLTensor *input, ICLTensor *output, ICLTensor *beginData, - ICLTensor *endData, ICLTensor *stridesData, int32_t beginMask, int32_t endMask, - int32_t shrinkAxisMask); - - /** - * @brief Static function to check if given info will lead to a valid configuration of @ref - * CLStridedSliceExKernel - * @param[in] input The input tensor info. Data types supported: - * U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32 - * @param[in] output The output tensor info, Data types supported: same as @p input1. - * @param[in] begin The begin tensor info. Data types supported: S32. - * The number of dimensions must be 1. - * The length must be the same as the number of dimensions of input. - * @param[in] end The end tensor info. Data types supported: S32. - * The number of dimensions must be 1. - * The length must be the same as the number of dimensions of input. - * @param[in] stride The stride tensor info. Data types supported: S32. - * The number of dimensions must be 1. - * The length must be the same as the number of dimensions of input. - * @param[in] beginMask Mask for begin - * @param[in] endMask Mask for end - * @param[in] shrinkAxisMask Mask for shrink axis. - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, - const ITensorInfo *begin, const ITensorInfo *end, - const ITensorInfo *stride, int32_t beginMask, int32_t endMask, - int32_t shrinkAxisMask); - - /** - * @brief Enqueue the OpenCL kernel to process the given window on the passed OpenCL command - * queue. - * @note The queue is *not* flushed by this method, and therefore the kernel will not have - * been executed by the time this method returns. - * @param[in] window Region on which to execute the kernel. (Must be a valid region of - * the window returned by window()). - * @param[in,out] queue Command queue on which to enqueue the kernel.@return N/A - * @return N/A - */ - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; /** Source tensor */ - ICLTensor *_output; /** Destination tensor */ - ICLTensor *_beginData; /** Start indices of input tensor */ - ICLTensor *_endData; /** Stop indices of input tensor */ - ICLTensor *_stridesData; /** Strides tensor */ - int32_t _beginMask; /** Begin mask */ - int32_t _endMask; /** End mask */ - int32_t _shrinkAxisMask; /** Shrink axis mask */ -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_CLSTRIDEDSLICEEXKERNEL_H__ */ diff --git a/libs/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp b/libs/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp index 2ef886e..c5ab86b 100644 --- a/libs/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp +++ b/libs/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp @@ -42,7 +42,6 @@ const std::map CLKernelLibraryEx::_kernel_program_map {"activation_layer_ex", "activation_layer_ex.cl"}, {"arg_op", "arg_operation.cl"}, {"arithmetic_add_qasymm8", "arithmetic_op_quantized.cl"}, - {"batch_to_space_nd", "batch_to_space_nd.cl"}, {"binary_logical_op", "binary_logical_op.cl"}, {"cast", "cast.cl"}, {"cast_qasymm_in", "cast.cl"}, @@ -57,19 +56,15 @@ const std::map CLKernelLibraryEx::_kernel_program_map {"gather_1d", "gather.cl"}, {"gather_1d_out", "gather.cl"}, {"hashtable_lookup", "hashtable_lookup.cl"}, - {"logical_not", "logical_not.cl"}, {"neg_tensor", "neg_tensor.cl"}, {"pad", "pad.cl"}, {"permute_generic", "permute_ex.cl"}, {"pixelwise_mul_qasymm8", "pixelwise_mul_quantized.cl"}, - {"pixelwise_div_float", "pixelwise_div_float.cl"}, - {"pixelwise_div_int", "pixelwise_div_int.cl"}, {"prelu", "prelu.cl"}, {"prelu_qasymm8", "prelu_quantized.cl"}, {"reduce_min_max", "reduce_operation.cl"}, {"reduce_sum_mean", "reduce_operation.cl"}, {"squared_difference", "squared_difference.cl"}, - {"strided_slice_ex", "strided_slice_ex.cl"}, {"topkv2_init", "topkv2.cl"}, {"topkv2_find_first_negative", "topkv2.cl"}, {"topkv2_reorder_negatives", "topkv2.cl"}, @@ -95,10 +90,6 @@ const std::map CLKernelLibraryEx::_program_source_map #include "./cl_kernels/arg_operation.clembed" }, { - "batch_to_space_nd.cl", -#include "./cl_kernels/batch_to_space_nd.clembed" - }, - { "cast.cl", #include "./cl_kernels/cast.clembed" }, @@ -151,18 +142,6 @@ const std::map CLKernelLibraryEx::_program_source_map #include "./cl_kernels/pad.clembed" }, { - "logical_not.cl", -#include "./cl_kernels/logical_not.clembed" - }, - { - "pixelwise_div_float.cl", -#include "./cl_kernels/pixelwise_div_float.clembed" - }, - { - "pixelwise_div_int.cl", -#include "./cl_kernels/pixelwise_div_int.clembed" - }, - { "prelu.cl", #include "./cl_kernels/prelu.clembed" }, @@ -187,10 +166,6 @@ const std::map CLKernelLibraryEx::_program_source_map #include "./cl_kernels/squared_difference.clembed" }, { - "strided_slice_ex.cl", -#include "./cl_kernels/strided_slice_ex.clembed" - }, - { "topkv2.cl", #include "./cl_kernels/topkv2.clembed" }, diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/batch_to_space_nd.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/batch_to_space_nd.cl deleted file mode 100644 index ad6a48a..0000000 --- a/libs/ARMComputeEx/src/core/CL/cl_kernels/batch_to_space_nd.cl +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016, 2017 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "helpers.h" - -#if defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(BLOCK_SIZE0) && defined(BLOCK_SIZE1) && defined(BATCH_OUT) -/** Perform batch to space rearrangement of tensor - * - * @attention Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float - * @attention Output tensor batch should be given as a preprocessor argument using -DBATCH_OUT=size. e.g. -DBATCH_OUT=16 - * @attention block size should be given as a preprocessor argument using -DBLOCK_SIZE0=size. e.g. -DBLOCK_SIZE0=1 - * - * @param[in] input_ptr Pointer to the source tensor. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 - * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes) - * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes) - * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor - * - * @param[out] output_ptr Pointer to the destination tensor. Supported data types: same as @p inpu -t_ptr - * @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes) - * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] output_stride_y Stride of the destination tensor in Y dimension (in bytes) - * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in -bytes) - * @param[in] output_stride_z Stride of the destination tensor in Z dimension (in bytes) - * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] output_stride_w Stride of the destination tensor in W dimension (in bytes) - * @param[in] output_step_w output_stride_w * number of elements along W processed per workitem(in bytes) - * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination tensor - */ -__kernel void batch_to_space_nd( - TENSOR4D_DECLARATION(input), - TENSOR4D_DECLARATION(output)) - { - Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input, 0); - Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(output, DEPTH_OUT); - - int out_index[4]={0}; - int in_index[4]={0}; - - out_index[0] = get_global_id(0);//W - out_index[1] = get_global_id(1);//H - out_index[2] = get_global_id(2) % DEPTH_OUT;//C - out_index[3] = get_global_id(2) / DEPTH_OUT;//N - - in_index[0] = out_index[0]/BLOCK_SIZE1; - in_index[1] = out_index[1]/BLOCK_SIZE0; - in_index[2] = out_index[2]; - in_index[3] = out_index[3] + ((out_index[1] % BLOCK_SIZE0) * BLOCK_SIZE0 + out_index[0] % BLOCK_SIZE1) * BATCH_OUT; - - *((__global DATA_TYPE *)out.ptr) = *((__global DATA_TYPE *)tensor4D_offset(&in, in_index[0], in_index[1], in_index[2], in_index[3])); - } -#endif // defined(DATA_TYPE) && defined(DEPTH_IN) && defined(BLOCK_SIZE0) && defined(BLOCK_SIZE1) && defined(BATCH_OUT) diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/logical_not.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/logical_not.cl deleted file mode 100644 index 5137988..0000000 --- a/libs/ARMComputeEx/src/core/CL/cl_kernels/logical_not.cl +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "helpers.h" - -/** returns truth value of the input tensor for LOGICAL NOT. - * - * @attention Data type can be passed using the -DDATA_TYPE_IN compile flag, e.g. -DDATA_TYPE_IN=bool - * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16 - * @note Can only take boolean data types. - * - * @param[in] input_ptr Pointer to the source tensor. Supported data types: QASYMM8 - * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes) - * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes) - * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor - * - * @param[out] output_ptr Pointer to the destination tensor. Supported data types: QASYMM8 - * @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes) - * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] output_stride_y Stride of the destination tensor in Y dimension (in bytes) - * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] output_stride_z Stride of the destination tensor in Z dimension (in bytes) - * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes) - */ -__kernel void logical_not( - TENSOR3D_DECLARATION(input), - TENSOR3D_DECLARATION(output)) -{ - Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input); - Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output); - - VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) in_data = - VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input.ptr); - - VSTORE(VEC_SIZE) (CONVERT(!in_data, VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)), - 0, (__global DATA_TYPE *)output.ptr); -} diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/pixelwise_div_float.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/pixelwise_div_float.cl deleted file mode 100644 index aa05121..0000000 --- a/libs/ARMComputeEx/src/core/CL/cl_kernels/pixelwise_div_float.cl +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016, 2017 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "helpers.h" - -#ifdef SATURATE -#define CONVERT_OP_FLOAT_STR(x, type, round) (convert_##type##_sat##round(x)) -#else /* SATURATE */ -#define CONVERT_OP_FLOAT_STR(x, type, round) (convert_##type##round(x)) -#endif /* SATURATE */ -#define CONVERT_OP_FLOAT(x, type, round) CONVERT_OP_FLOAT_STR(x, type, round) - -/** Performs a pixelwise division with float scale of either integer or float inputs. - * - * @attention The inputs and output data types need to be passed at compile time using -DDATA_TYPE_IN1, -DDATA_TYPE_IN2 and -DDATA_TYPE_OUT: - * e.g. -DDATA_TYPE_IN1=uchar -DDATA_TYPE_IN2=ushort -DDATA_TYPE_OUT=short - * @attention The data type of the intermediate result of the division should passed as well using -DDATA_TYPE_RES. - * e.g. If one of inputs is S16 -DDATA_TYPE_RES=int should be passed else -DDATA_TYPE_RES=short. - * @attention -DDATA_TYPE_FLOAT must be passed if floating point inputs are provided. - * - * @param[in] in1_ptr Pointer to the source image. Supported data types: U8, S16, F16, F32 - * @param[in] in1_stride_x Stride of the source image in X dimension (in bytes) - * @param[in] in1_step_x in1_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] in1_stride_y Stride of the source image in Y dimension (in bytes) - * @param[in] in1_step_y in1_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] in1_stride_z Stride of the source image in Y dimension (in bytes) - * @param[in] in1_step_z in1_stride_z * number of elements along Y processed per workitem(in bytes) - * @param[in] in1_offset_first_element_in_bytes The offset of the first element in the source image - * @param[in] in2_ptr Pointer to the source image. Supported data types: U8, S16, F16, F32 - * @param[in] in2_stride_x Stride of the source image in X dimension (in bytes) - * @param[in] in2_step_x in2_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] in2_stride_y Stride of the source image in Y dimension (in bytes) - * @param[in] in2_step_y in2_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] in2_stride_z Stride of the source image in Y dimension (in bytes) - * @param[in] in2_step_z in2_stride_z * number of elements along Y processed per workitem(in bytes) - * @param[in] in2_offset_first_element_in_bytes The offset of the first element in the source image - * @param[out] out_ptr Pointer to the destination image. Supported data types: U8, S16, F16, F32 - * @param[in] out_stride_x Stride of the destination image in X dimension (in bytes) - * @param[in] out_step_x out_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] out_stride_y Stride of the destination image in Y dimension (in bytes) - * @param[in] out_step_y out_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] out_stride_z Stride of the destination image in Y dimension (in bytes) - * @param[in] out_step_z out_stride_z * number of elements along Y processed per workitem(in bytes) - * @param[in] out_offset_first_element_in_bytes The offset of the first element in the destination image - * @param[in] scale Float scaling factor. Supported data types: F32 - */ -__kernel void pixelwise_div_float( - TENSOR3D_DECLARATION(in1), - TENSOR3D_DECLARATION(in2), - TENSOR3D_DECLARATION(out), - const float scale) -{ - // Get pixels pointer - Tensor3D in1 = CONVERT_TO_TENSOR3D_STRUCT(in1); - Tensor3D in2 = CONVERT_TO_TENSOR3D_STRUCT(in2); - Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(out); - - // Load data - VEC_DATA_TYPE(DATA_TYPE_RES, 16) - in1_data = CONVERT(vload16(0, (__global DATA_TYPE_IN1 *)in1.ptr), VEC_DATA_TYPE(DATA_TYPE_RES, 16)); - VEC_DATA_TYPE(DATA_TYPE_RES, 16) - in2_data = CONVERT(vload16(0, (__global DATA_TYPE_IN2 *)in2.ptr), VEC_DATA_TYPE(DATA_TYPE_RES, 16)); - - // Perform division -#ifdef DATA_TYPE_FLOAT - VEC_DATA_TYPE(DATA_TYPE_OUT, 16) - res = CONVERT(in1_data / in2_data * (DATA_TYPE_RES)scale, VEC_DATA_TYPE(DATA_TYPE_OUT, 16)); -#else /* DATA_TYPE_FLOAT */ - VEC_DATA_TYPE(DATA_TYPE_OUT, 16) - res = CONVERT_OP_FLOAT(CONVERT_OP_FLOAT((convert_float16(in1_data / in2_data) * scale), VEC_DATA_TYPE(DATA_TYPE_RES, 16), ROUND), VEC_DATA_TYPE(DATA_TYPE_OUT, 16), ROUND); -#endif /* DATA_TYPE_FLOAT */ - - // Store result - vstore16(res, 0, (__global DATA_TYPE_OUT *)out.ptr); -} diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/pixelwise_div_int.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/pixelwise_div_int.cl deleted file mode 100644 index fdfb780..0000000 --- a/libs/ARMComputeEx/src/core/CL/cl_kernels/pixelwise_div_int.cl +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016, 2017 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "helpers.h" - -#if defined(SATURATE) -#define CONVERT_OP_INT_STR(x, type, size) (convert_##type##size##_sat(x)) -#else // SATURATE -#define CONVERT_OP_INT_STR(x, type, size) (convert_##type##size(x)) -#endif // SATURATE -#define CONVERT_OP_INT(x, type, size) CONVERT_OP_INT_STR(x, type, size) - -#define DIV_OP(x, y, scale, type, size) CONVERT_OP_INT((x) / (y) >> scale, type, size) - -/** Performs a pixelwise division with integer scale of integer inputs. - * - * @attention The inputs and output data types need to be passed at compile time using -DDATA_TYPE_IN1, -DDATA_TYPE_IN2 and -DDATA_TYPE_OUT: - * e.g. -DDATA_TYPE_IN1=uchar -DDATA_TYPE_IN2=ushort -DDATA_TYPE_OUT=short - * @attention The data_type of the intermediate result of the division should passed as well using -DDATA_TYPE_RES. - * e.g. If one of inputs is S16 -DDATA_TYPE_RES=int should be passed else -DDATA_TYPE_RES=short. - * - * @param[in] in1_ptr Pointer to the source image. Supported data types: U8/S16 - * @param[in] in1_stride_x Stride of the source image in X dimension (in bytes) - * @param[in] in1_step_x in1_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] in1_stride_y Stride of the source image in Y dimension (in bytes) - * @param[in] in1_step_y in1_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] in1_stride_z Stride of the source image in Y dimension (in bytes) - * @param[in] in1_step_z in1_stride_z * number of elements along Y processed per workitem(in bytes) - * @param[in] in1_offset_first_element_in_bytes The offset of the first element in the source image - * @param[in] in2_ptr Pointer to the source image. Supported data types: same as @p in1_ptr - * @param[in] in2_stride_x Stride of the source image in X dimension (in bytes) - * @param[in] in2_step_x in2_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] in2_stride_y Stride of the source image in Y dimension (in bytes) - * @param[in] in2_step_y in2_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] in2_stride_z Stride of the source image in Y dimension (in bytes) - * @param[in] in2_step_z in2_stride_z * number of elements along Y processed per workitem(in bytes) - * @param[in] in2_offset_first_element_in_bytes The offset of the first element in the source image - * @param[out] out_ptr Pointer to the destination image. Supported data types: same as @p in1_ptr - * @param[in] out_stride_x Stride of the destination image in X dimension (in bytes) - * @param[in] out_step_x out_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] out_stride_y Stride of the destination image in Y dimension (in bytes) - * @param[in] out_step_y out_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] out_stride_z Stride of the destination image in Y dimension (in bytes) - * @param[in] out_step_z out_stride_z * number of elements along Y processed per workitem(in bytes) - * @param[in] out_offset_first_element_in_bytes The offset of the first element in the destination image - * @param[in] scale Integer scaling factor. Supported data types: S32 - */ -__kernel void pixelwise_div_int( - TENSOR3D_DECLARATION(in1), - TENSOR3D_DECLARATION(in2), - TENSOR3D_DECLARATION(out), - const uint scale) -{ - // Get pixels pointer - Tensor3D in1 = CONVERT_TO_TENSOR3D_STRUCT(in1); - Tensor3D in2 = CONVERT_TO_TENSOR3D_STRUCT(in2); - Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(out); - - // Load data - VEC_DATA_TYPE(DATA_TYPE_RES, 16) - in1_data = CONVERT(vload16(0, (__global DATA_TYPE_IN1 *)in1.ptr), VEC_DATA_TYPE(DATA_TYPE_RES, 16)); - VEC_DATA_TYPE(DATA_TYPE_RES, 16) - in2_data = CONVERT(vload16(0, (__global DATA_TYPE_IN2 *)in2.ptr), VEC_DATA_TYPE(DATA_TYPE_RES, 16)); - - // Perform division and store result - vstore16(DIV_OP(in1_data, in2_data, scale, DATA_TYPE_OUT, 16), 0, (__global DATA_TYPE_OUT *)out.ptr); -} diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/strided_slice_ex.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/strided_slice_ex.cl deleted file mode 100644 index b39c55b9..0000000 --- a/libs/ARMComputeEx/src/core/CL/cl_kernels/strided_slice_ex.cl +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "helpers.h" - -#if defined(ELEMENT_DATA_TYPE) && defined(DEPTH_OUT) -/** Extracts a strided slice up to 4-dimensions - * - * @note Datatype should be given as a preprocessor argument using -DELEMENT_DATA_TYPE=type. e.g. -DELEMENT_DATA_TYPE=short - * @attention Output tensor depth should be given as a preprocessor argument using -DDEPTH_OUT=size. e.g. -DDEPTH_OUT=16 - * - * @param[in] input_ptr Pointer to the source image. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 - * @param[in] input_stride_x Stride of the source image in X dimension (in bytes) - * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes) - * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image - * @param[out] output_ptr Pointer to the destination image. Supported data types: same as @p input_ptr - * @param[in] output_stride_x Stride of the destination image in X dimension (in bytes) - * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes) - * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] output_stride_w Stride of the source tensor in W dimension (in bytes) - * @param[in] output_step_w output_stride_w * number of elements along W processed per workitem(in bytes) - * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image - * @param[in] starts The stride of X dimension of input tensor to be sliced. Supported data types: S32 - * @param[in] strides The stride of Y dimension of input tensor to be sliced. Supported data types: S32 - */ -__kernel void strided_slice_ex(TENSOR4D_DECLARATION(input), - TENSOR4D_DECLARATION(output), - const int4 starts, - const int4 strides) -{ - Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT(input, 0); - Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(output, DEPTH_OUT); - - int4 indices_in = - { - starts.x + (strides.x * get_global_id(0)), - starts.y + (strides.y * get_global_id(1)), - starts.z + (strides.z * (get_global_id(2) % DEPTH_OUT)), - starts.w + (strides.w * (get_global_id(2) / DEPTH_OUT)), - }; - *((__global ELEMENT_DATA_TYPE *)out.ptr) = *((__global ELEMENT_DATA_TYPE *)tensor4D_offset(&in, indices_in.x, indices_in.y, indices_in.z, indices_in.w)); -} -#endif // defined(ELEMENT_DATA_TYPE) && defined(DEPTH_OUT) diff --git a/libs/ARMComputeEx/src/core/CL/kernels/CLBatchToSpaceNDKernel.cpp b/libs/ARMComputeEx/src/core/CL/kernels/CLBatchToSpaceNDKernel.cpp deleted file mode 100644 index 5a19106..0000000 --- a/libs/ARMComputeEx/src/core/CL/kernels/CLBatchToSpaceNDKernel.cpp +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/core/CL/kernels/CLBatchToSpaceNDKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibraryEx.h" -#include "arm_compute/core/CL/ICLTensor.h" - -using namespace arm_compute; - -namespace -{ -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, - const int32_t *block_size) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QASYMM8, - DataType::S16, DataType::S32, DataType::F16, - DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::QASYMM8, - DataType::S16, DataType::S32, DataType::F16, - DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(!(block_size[0] >= 1 && block_size[1] >= 1), - "Block size should be greater than or equal to 1."); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->dimension(2) != output->dimension(2), - "Input Depth should be equal to Output Depth"); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG( - output->dimension(3) * block_size[0] * block_size[1] != input->dimension(3), - "Input batch should be equal to (output batch * block size[0] *block size[1])"); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG((output->dimension(0) % block_size[1]) || - (output->dimension(1) % block_size[0]), - "Output height and width should be divisible by block size[0] " - "and block_size[1] respectively"); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG((output->dimension(0) != input->dimension(0) * block_size[1]) || - (output->dimension(1) != input->dimension(1) * block_size[0]), - "Output height and width should be equal to " - "input_height*blocksize[0] and input_width*blocksize[1] " - "respectively"); - - return Status{}; -} - -} // namespace - -CLBatchToSpaceNDKernel::CLBatchToSpaceNDKernel() : _input(nullptr), _output(nullptr) {} - -void CLBatchToSpaceNDKernel::configure(const ICLTensor *input, ICLTensor *output, - const int32_t *block_size) -{ - - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), block_size)); - - _input = input; - _output = output; - - // Set kernel build options - std::set build_opts; - build_opts.emplace("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())); - build_opts.emplace("-DBLOCK_SIZE0=" + support::cpp11::to_string(block_size[0])); - build_opts.emplace("-DBLOCK_SIZE1=" + support::cpp11::to_string(block_size[1])); - build_opts.emplace("-DBATCH_OUT=" + support::cpp11::to_string(output->info()->dimension(3))); - build_opts.emplace("-DDEPTH_OUT=" + support::cpp11::to_string(output->info()->dimension(2))); - - // Create kernel - _kernel = static_cast( - CLKernelLibraryEx::get().create_kernel("batch_to_space_nd", build_opts)); - - // Configure kernel window - Window win = calculate_max_window(*output->info(), Steps()); - - Coordinates coord; - coord.set_num_dimensions(output->info()->num_dimensions()); - output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape())); - - ICLKernel::configure_internal(win); -} - -void CLBatchToSpaceNDKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(ICLKernel::window(), window); - - Window slice_in = window.first_slice_window_4D().collapse(ICLKernel::window(), 2, 4); - - // Setup output slice - Window slice_out(slice_in); - slice_out.set(Window::DimX, Window::Dimension(0, 0, 0)); - slice_out.set(Window::DimY, Window::Dimension(0, 0, 0)); - slice_out.set(Window::DimZ, Window::Dimension(0, 0, 0)); - slice_out.set(3, Window::Dimension(0, 0, 0)); - - do - { - unsigned int idx = 0; - add_4D_tensor_argument(idx, _input, slice_out); - add_4D_tensor_argument(idx, _output, slice_in); - enqueue(queue, *this, slice_in); - } while (window.slide_window_slice_4D(slice_out) && window.slide_window_slice_4D(slice_in)); -} diff --git a/libs/ARMComputeEx/src/core/CL/kernels/CLLogicalNotKernel.cpp b/libs/ARMComputeEx/src/core/CL/kernels/CLLogicalNotKernel.cpp deleted file mode 100644 index 73d96ed..0000000 --- a/libs/ARMComputeEx/src/core/CL/kernels/CLLogicalNotKernel.cpp +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/core/CL/kernels/CLLogicalNotKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibraryEx.h" -#include "arm_compute/core/CL/ICLTensor.h" - -using namespace arm_compute; - -namespace -{ -constexpr unsigned int num_elems_processed_per_iteration = 16; - -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output) -{ - const TensorShape &out_shape = input->tensor_shape(); - - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8); - - // Validate in case of configured output - if (output->total_size() > 0) - { - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QASYMM8); - ARM_COMPUTE_RETURN_ERROR_ON_MSG( - detail::have_different_dimensions(out_shape, output->tensor_shape(), 0), - "Wrong shape for output"); - } - return Status{}; -} -} // namespace - -CLLogicalNotKernel::CLLogicalNotKernel() : _input(nullptr), _output(nullptr) {} - -void CLLogicalNotKernel::configure(const ICLTensor *input, ICLTensor *output) -{ - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info())); - - _input = input; - _output = output; - - // Create kernel - std::string kernel_name = "logical_not"; - std::set build_opts; - build_opts.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()))); - build_opts.emplace( - ("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration))); - - _kernel = - static_cast(CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts)); - - const ValidRegion &valid_region = input->info()->valid_region(); - - Window win = calculate_max_window(valid_region, Steps(num_elems_processed_per_iteration)); - - AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); - - output_access.set_valid_region(win, valid_region); - - ICLKernel::configure_internal(win); -} - -void CLLogicalNotKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - const TensorShape &in_shape1 = _input->info()->tensor_shape(); - - bool can_collapse = true; - if (in_shape1.total_size() > 1) - { - can_collapse = (in_shape1.num_dimensions() > Window::DimZ); - } - - bool has_collapsed = false; - Window collapsed = - can_collapse ? window.collapse_if_possible(ICLKernel::window(), Window::DimZ, &has_collapsed) - : window; - - Window slice = collapsed.first_slice_window_3D(); - - do - { - unsigned int idx = 0; - add_3D_tensor_argument(idx, _input, slice); - add_3D_tensor_argument(idx, _output, slice); - - enqueue(queue, *this, slice); - - collapsed.slide_window_slice_3D(slice); - } while (collapsed.slide_window_slice_3D(slice)); -} diff --git a/libs/ARMComputeEx/src/core/CL/kernels/CLNormalizationLayerExKernel.cpp b/libs/ARMComputeEx/src/core/CL/kernels/CLNormalizationLayerExKernel.cpp deleted file mode 100644 index e6db3d9..0000000 --- a/libs/ARMComputeEx/src/core/CL/kernels/CLNormalizationLayerExKernel.cpp +++ /dev/null @@ -1,166 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/core/CL/kernels/CLNormalizationLayerExKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibrary.h" -#include "arm_compute/core/CL/ICLTensor.h" - -using namespace arm_compute; - -namespace -{ -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, - NormalizationLayerInfo /*norm_info*/) -{ - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output); - - // Checks performed when output is configured - if (output->total_size() != 0) - { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output); - } - - return Status{}; -} - -std::pair validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, - NormalizationLayerInfo norm_info) -{ - // Output tensor auto initialization if not yet initialized - auto_init_if_empty(*output, *input->clone()); - - const unsigned int norm_size = norm_info.norm_size(); - bool is_in_map = norm_info.is_in_map(); - - const unsigned int border_width = is_in_map ? std::min(norm_size / 2, 3U) : 0; - const BorderSize border_size = BorderSize(0, border_width); - - const unsigned int num_elems_processed_per_iteration = 4; - const unsigned int num_elems_read_per_iteration = - is_in_map ? (num_elems_processed_per_iteration + 2 * (norm_size / 2)) - : num_elems_processed_per_iteration; - - Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration)); - - // We do not use a Rectangle window for IN_MAP_2D as we clamp the top and bottom accesses inside - // the kernel, avoiding padding - AccessWindowHorizontal input_access(input, -border_size.left, num_elems_read_per_iteration); - AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration); - - bool window_changed = update_window_and_padding(win, input_access, output_access); - - output_access.set_valid_region(win, input->valid_region()); - - Status err = (window_changed) - ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") - : Status{}; - return std::make_pair(err, win); -} -} // namespace - -CLNormalizationLayerExKernel::CLNormalizationLayerExKernel() - : _input(nullptr), _output(nullptr), _border_size(0), _is_in_map(false) -{ -} - -BorderSize CLNormalizationLayerExKernel::border_size() const { return _border_size; } - -void CLNormalizationLayerExKernel::configure(const ICLTensor *input, ICLTensor *output, - NormalizationLayerInfo norm_info) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - - // Output tensor auto initialization if not yet initialized - auto_init_if_empty(*output->info(), *input->info()->clone()); - - // Perform validation step - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), norm_info)); - - _input = input; - _output = output; - - const unsigned int num_elems_processed_per_iteration = 4; - const bool is_in_map_2D = (norm_info.type() == NormType::IN_MAP_2D); - - // Set build options - CLBuildOptions build_opts; - build_opts.add_option(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()))); - build_opts.add_option( - ("-DCOEFF=" + float_to_string_with_full_precision(norm_info.scale_coeff()))); - build_opts.add_option(("-DBETA=" + float_to_string_with_full_precision(norm_info.beta()))); - build_opts.add_option(("-DKAPPA=" + float_to_string_with_full_precision(norm_info.kappa()))); - build_opts.add_option( - ("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration))); - build_opts.add_option(("-DRADIUS=" + support::cpp11::to_string(norm_info.norm_size()))); - build_opts.add_option(("-DNUM_SLICES=" + support::cpp11::to_string(input->info()->dimension(2)))); - build_opts.add_option_if(is_in_map_2D, "-DIN_MAP_2D"); - - // Create kernel - std::string kernel_name = - _is_in_map ? "normalization_layer_in_map" : "normalization_layer_cross_map"; - _kernel = static_cast( - CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); - - // Configure kernel window - auto win_config = validate_and_configure_window(input->info(), output->info(), norm_info); - ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure_internal(win_config.second); - - // Set config_id for enabling LWS tuning - _config_id = "normalization_layer_"; - _config_id += lower_string(string_from_data_type(input->info()->data_type())); - _config_id += "_"; - _config_id += support::cpp11::to_string( - static_cast::type>(norm_info.type())); - _config_id += "_"; - _config_id += support::cpp11::to_string(norm_info.norm_size()); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(0)); - _config_id += "_"; - _config_id += support::cpp11::to_string(input->info()->dimension(1)); -} - -Status CLNormalizationLayerExKernel::validate(const ITensorInfo *input, const ITensorInfo *output, - NormalizationLayerInfo norm_info) -{ - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, norm_info)); - ARM_COMPUTE_RETURN_ON_ERROR( - validate_and_configure_window(input->clone().get(), output->clone().get(), norm_info).first); - - return Status{}; -} - -void CLNormalizationLayerExKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - - const int collapsed_dimension = _is_in_map ? Window::DimZ : 4; - Window window_collapsed = window.collapse_if_possible(ICLKernel::window(), collapsed_dimension); - Window slice = window_collapsed.first_slice_window_3D(); - - do - { - unsigned int idx = 0; - add_3D_tensor_argument(idx, _input, slice); - add_3D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice); - } while (window_collapsed.slide_window_slice_3D(slice)); -} diff --git a/libs/ARMComputeEx/src/core/CL/kernels/CLPixelWiseDivisionKernel.cpp b/libs/ARMComputeEx/src/core/CL/kernels/CLPixelWiseDivisionKernel.cpp deleted file mode 100644 index b985aa7..0000000 --- a/libs/ARMComputeEx/src/core/CL/kernels/CLPixelWiseDivisionKernel.cpp +++ /dev/null @@ -1,280 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/core/CL/kernels/CLPixelWiseDivisionKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibraryEx.h" -#include "arm_compute/core/CL/ICLTensor.h" - -using namespace arm_compute; - -namespace -{ -constexpr unsigned int num_elems_processed_per_iteration = 16; - -Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *input2, - const ITensorInfo *output, float scale, ConvertPolicy overflow_policy, - RoundingPolicy rounding_policy) -{ - ARM_COMPUTE_UNUSED(overflow_policy); - ARM_COMPUTE_UNUSED(rounding_policy); - - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8, DataType::S16, - DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8, DataType::S16, - DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(scale < 0, "Scale cannot be negative."); - - const TensorShape &out_shape = - TensorShape::broadcast_shape(input1->tensor_shape(), input2->tensor_shape()); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(out_shape.total_size() == 0, - "Inputs are not broadcast compatible"); - - // Validate in case of configured output - if (output->total_size() > 0) - { - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16, - DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_MSG( - output->data_type() == DataType::U8 && - (input1->data_type() != DataType::U8 || input2->data_type() != DataType::U8), - "Output can only be U8 if both inputs are U8"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG( - detail::have_different_dimensions(out_shape, output->tensor_shape(), 0), - "Wrong shape for output"); - } - - return Status{}; -} - -std::pair validate_and_configure_window(ITensorInfo *input1, ITensorInfo *input2, - ITensorInfo *output) -{ - const std::pair broadcast_pair = - ITensorInfo::broadcast_shape_and_valid_region(*input1, *input2); - const TensorShape &out_shape = broadcast_pair.first; - const ValidRegion &valid_region = broadcast_pair.second; - - // Auto initialize output if not initialized - { - set_shape_if_empty(*output, out_shape); - - if (input1->data_type() == DataType::S16 || input2->data_type() == DataType::S16) - { - set_format_if_unknown(*output, Format::S16); - } - else if (input1->data_type() == DataType::F32 || input2->data_type() == DataType::F32) - { - set_format_if_unknown(*output, Format::F32); - } - } - - Window win = calculate_max_window(valid_region, Steps(num_elems_processed_per_iteration)); - Window win_input1 = win.broadcast_if_dimension_le_one(*input1); - Window win_input2 = win.broadcast_if_dimension_le_one(*input2); - - AccessWindowHorizontal input1_access(input1, 0, num_elems_processed_per_iteration); - AccessWindowHorizontal input2_access(input2, 0, num_elems_processed_per_iteration); - AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration); - - bool window_changed = update_window_and_padding(win_input1, input1_access) || - update_window_and_padding(win_input2, input2_access) || - update_window_and_padding(win, output_access); - - output_access.set_valid_region(win, valid_region); - - Status err = (window_changed) - ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") - : Status{}; - return std::make_pair(err, win); -} -} // namespace - -CLPixelWiseDivisionKernel::CLPixelWiseDivisionKernel() - : _input1(nullptr), _input2(nullptr), _output(nullptr) -{ -} - -void CLPixelWiseDivisionKernel::configure(const ICLTensor *input1, const ICLTensor *input2, - ICLTensor *output, float scale, - ConvertPolicy overflow_policy, - RoundingPolicy rounding_policy) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input1->info(), input2->info(), output->info(), - scale, overflow_policy, rounding_policy)); - - // Configure kernel window - auto win_config = validate_and_configure_window(input1->info(), input2->info(), output->info()); - ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - - _input1 = input1; - _input2 = input2; - _output = output; - - int scale_int = -1; - // Extract sign, exponent and mantissa - int exponent = 0; - float normalized_mantissa = std::frexp(scale, &exponent); - // Use int scaling if factor is equal to 1/2^n for 0 <= n <= 15 - // frexp returns 0.5 as mantissa which means that the exponent will be in the range of -1 <= e <= - // 14 - // Moreover, it will be negative as we deal with 1/2^n - if ((normalized_mantissa == 0.5f) && (-14 <= exponent) && (exponent <= 1)) - { - // Store the positive exponent. We know that we compute 1/2^n - // Additionally we need to subtract 1 to compensate that frexp used a mantissa of 0.5 - scale_int = std::abs(exponent - 1); - } - - std::string data_type; - std::string compute_type; - // Check if it has float inputs and output - if (is_data_type_float(input1->info()->data_type()) || - is_data_type_float(input2->info()->data_type())) - { - scale_int = -1; - compute_type = (input1->info()->data_type() == DataType::F32 || - input2->info()->data_type() == DataType::F32) - ? "float" - : "half"; - data_type = "DATA_TYPE_FLOAT"; - } - else - { - if (input1->info()->data_type() == DataType::S16 || - input2->info()->data_type() == DataType::S16) - { - compute_type = "int"; - } - else - { - compute_type = "ushort"; - } - data_type = "DATA_TYPE_INT"; - } - - // Construct kernel name - std::string kernel_name = "pixelwise_div"; - kernel_name += (scale_int >= 0) ? "_int" : "_float"; - - // Set kernel build options - std::set build_opts; - build_opts.emplace( - (overflow_policy == ConvertPolicy::WRAP || is_data_type_float(output->info()->data_type())) - ? "-DWRAP" - : "-DSATURATE"); - build_opts.emplace((rounding_policy == RoundingPolicy::TO_ZERO) ? "-DROUND=_rtz" - : "-DROUND=_rte"); - build_opts.emplace("-DDATA_TYPE_IN1=" + get_cl_type_from_data_type(input1->info()->data_type())); - build_opts.emplace("-DDATA_TYPE_IN2=" + get_cl_type_from_data_type(input2->info()->data_type())); - build_opts.emplace("-DDATA_TYPE_OUT=" + get_cl_type_from_data_type(output->info()->data_type())); - build_opts.emplace("-DDATA_TYPE_RES=" + compute_type); - build_opts.emplace("-D" + data_type); - - // Create kernel - _kernel = - static_cast(CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts)); - - // Set scale argument - unsigned int idx = 3 * num_arguments_per_3D_tensor(); // Skip the inputs and output parameters - - if (scale_int >= 0) - { - _kernel.setArg(idx++, scale_int); - } - else - { - _kernel.setArg(idx++, scale); - } - - ICLKernel::configure_internal(win_config.second); -} - -Status CLPixelWiseDivisionKernel::validate(const ITensorInfo *input1, const ITensorInfo *input2, - const ITensorInfo *output, float scale, - ConvertPolicy overflow_policy, - RoundingPolicy rounding_policy) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output); - ARM_COMPUTE_RETURN_ON_ERROR( - validate_arguments(input1, input2, output, scale, overflow_policy, rounding_policy)); - ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input1->clone().get(), - input2->clone().get(), - output->clone().get()) - .first); - - return Status{}; -} - -void CLPixelWiseDivisionKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - - const TensorShape &in_shape1 = _input1->info()->tensor_shape(); - const TensorShape &in_shape2 = _input2->info()->tensor_shape(); - const TensorShape &out_shape = _output->info()->tensor_shape(); - - bool can_collapse = true; - if (std::min(in_shape1.total_size(), in_shape2.total_size()) > 1) - { - can_collapse = - (std::min(in_shape1.num_dimensions(), in_shape2.num_dimensions()) > Window::DimZ); - for (size_t d = Window::DimZ; can_collapse && (d < out_shape.num_dimensions()); ++d) - { - can_collapse = (in_shape1[d] == in_shape2[d]); - } - } - - bool has_collapsed = false; - Window collapsed = - can_collapse ? window.collapse_if_possible(ICLKernel::window(), Window::DimZ, &has_collapsed) - : window; - - const TensorShape &in_shape1_collapsed = - has_collapsed ? in_shape1.collapsed_from(Window::DimZ) : in_shape1; - const TensorShape &in_shape2_collapsed = - has_collapsed ? in_shape2.collapsed_from(Window::DimZ) : in_shape2; - - Window slice = collapsed.first_slice_window_3D(); - Window slice_input1 = slice.broadcast_if_dimension_le_one(in_shape1_collapsed); - Window slice_input2 = slice.broadcast_if_dimension_le_one(in_shape2_collapsed); - - do - { - unsigned int idx = 0; - add_3D_tensor_argument(idx, _input1, slice_input1); - add_3D_tensor_argument(idx, _input2, slice_input2); - add_3D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice); - - collapsed.slide_window_slice_3D(slice_input1); - collapsed.slide_window_slice_3D(slice_input2); - } while (collapsed.slide_window_slice_3D(slice)); -} - -BorderSize CLPixelWiseDivisionKernel::border_size() const -{ - const unsigned int replicateSize = - _output->info()->dimension(0) - - std::min(_input1->info()->dimension(0), _input2->info()->dimension(0)); - const unsigned int border = - std::min(num_elems_processed_per_iteration - 1U, replicateSize); - return BorderSize(0, border, 0, 0); -} diff --git a/libs/ARMComputeEx/src/core/CL/kernels/CLStridedSliceExKernel.cpp b/libs/ARMComputeEx/src/core/CL/kernels/CLStridedSliceExKernel.cpp deleted file mode 100644 index 9e6c5af..0000000 --- a/libs/ARMComputeEx/src/core/CL/kernels/CLStridedSliceExKernel.cpp +++ /dev/null @@ -1,253 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/core/CL/kernels/CLStridedSliceExKernel.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/CL/CLKernelLibraryEx.h" -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/TensorInfo.h" - -using namespace arm_compute; - -CLStridedSliceExKernel::CLStridedSliceExKernel() - : _input(nullptr), _output(nullptr), _beginData(nullptr), _endData(nullptr), - _stridesData(nullptr), _beginMask(0), _endMask(0), _shrinkAxisMask(0) -{ -} - -Status CLStridedSliceExKernel::validate(const ITensorInfo *input, const ITensorInfo *output, - const ITensorInfo *begin, const ITensorInfo *end, - const ITensorInfo *strides, int32_t /*beginMask*/, - int32_t /*endMask*/, int32_t /*shrinkAxisMask*/) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, begin, end, strides); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN( - input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16, - DataType::U32, DataType::S32, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(begin, 1, DataType::S32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(end, 1, DataType::S32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(strides, 1, DataType::S32); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - - ARM_COMPUTE_ERROR_ON(begin->num_dimensions() != 1 || begin->dimension(0) > 4); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(begin->tensor_shape(), end->tensor_shape(), - strides->tensor_shape()); - - return Status{}; -} - -// Return the index for the first element along that axis. This index will be a -// positive integer between [0, axisSize - 1] that can be used to index -// directly into the data. -inline int32_t StartForAxis(int32_t beginMask, int32_t begin, int32_t stride, - const TensorShape &inputShape, int32_t axis) -{ - // Begin with the specified index - int32_t start = begin; - - // beginMask override - if (beginMask & 1 << axis) - { - if (stride > 0) - { - // Forward iteration - use the first element. These values will get - // clamped below (Note: We could have set them to 0 and axisSize-1, but - // use lowest() and max() to maintain symmetry with StopForAxis()) - start = std::numeric_limits::lowest(); - } - else - { - // Backward iteration - use the last element. - start = std::numeric_limits::max(); - } - } - - // Handle negative indices - int32_t axisSize = inputShape[axis]; - if (start < 0) - { - start += axisSize; - } - - // Clamping - start = arm_compute::utility::clamp(start, 0, axisSize - 1); - - return start; -} - -// Return the "real" index for the end of iteration along that axis. This is an -// "end" in the traditional C sense, in that it points to one past the last -// element. ie. So if you were iterating through all elements of a 1D array of -// size 4, this function would return 4 as the stop, because it is one past the -// "real" indices of 0, 1, 2 & 3. -inline int32_t StopForAxis(int32_t endMask, int32_t end, int32_t stride, - const TensorShape &inputShape, int32_t axis) -{ - // Begin with the specified index - int32_t stop = end; - - // endMask override - if (endMask & (1 << axis)) - { - if (stride > 0) - { - // Forward iteration - use the last element. These values will get - // clamped below - stop = std::numeric_limits::max(); - } - else - { - // Backward iteration - use the first element. - stop = std::numeric_limits::lowest(); - } - } - - // Handle negative indices - int32_t axisSize = inputShape[axis]; - if (stop < 0) - { - stop += axisSize; - } - - // Clamping - // Because the end index points one past the last element, we need slightly - // different clamping ranges depending on the direction. - if (stride > 0) - { - // Forward iteration - stop = arm_compute::utility::clamp(stop, 0, axisSize); - } - else - { - // Backward iteration - stop = arm_compute::utility::clamp(stop, -1, axisSize - 1); - } - - return stop; -} - -inline int32_t getOutDim(int32_t start, int32_t stop, int32_t stride) -{ - int32_t ret = 0; - if (stride > 0) - { - ret = ((stop - start - 1) / stride) + 1; - } - else - { - ret = ((stop - start + 1) / stride) + 1; - } - ARM_COMPUTE_ERROR_ON_MSG(ret < 0, "The dimension must be the natural number"); - return ret; -} - -void CLStridedSliceExKernel::configure(const ICLTensor *input, ICLTensor *output, - ICLTensor *beginData, ICLTensor *endData, - ICLTensor *stridesData, int32_t beginMask, int32_t endMask, - int32_t shrinkAxisMask) -{ - ARM_COMPUTE_ERROR_THROW_ON(validate(input->info(), output->info(), beginData->info(), - endData->info(), stridesData->info(), beginMask, endMask, - shrinkAxisMask)); - - _input = input; - _output = output; - _beginData = beginData; - _endData = endData; - _stridesData = stridesData; - _beginMask = beginMask; - _endMask = endMask; - _shrinkAxisMask = shrinkAxisMask; - - // Set kernel build options - std::set build_opts; - build_opts.emplace("-DELEMENT_DATA_TYPE=" + - get_cl_type_from_data_type(input->info()->data_type())); - build_opts.emplace("-DDEPTH_OUT=" + support::cpp11::to_string(output->info()->dimension(2))); - - // Create kernel - _kernel = static_cast( - CLKernelLibraryEx::get().create_kernel("strided_slice_ex", build_opts)); - - // Configure kernel window - Window win = calculate_max_window(*output->info(), Steps()); - ICLKernel::configure_internal(win); -} - -void CLStridedSliceExKernel::run(const Window &window, cl::CommandQueue &queue) -{ - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - - _beginData->map(queue); - _endData->map(queue); - _stridesData->map(queue); - - std::vector starts; - std::vector strides; - - for (uint32_t n = 0; n < _beginData->info()->tensor_shape().total_size(); ++n) - { - const TensorShape shape = _input->info()->tensor_shape(); - starts.emplace_back( - StartForAxis(_beginMask, reinterpret_cast(_beginData->buffer())[n], - reinterpret_cast(_stridesData->buffer())[n], shape, n)); - - strides.emplace_back(reinterpret_cast(_stridesData->buffer())[n]); - } - - for (uint32_t n = _beginData->info()->tensor_shape().total_size(); n < 4; n++) - { - starts.emplace_back(0); - strides.emplace_back(1); - } - // TODO: Apply shrinkAxisMask - - _beginData->unmap(queue); - _stridesData->unmap(queue); - _endData->unmap(queue); - - unsigned int idx = 2 * num_arguments_per_4D_tensor(); // Skip the input and output parameters - const cl_int4 startsArg = {{ - static_cast(starts[0]), static_cast(starts[1]), - static_cast(starts[2]), static_cast(starts[3]), - }}; - _kernel.setArg(idx++, startsArg); - - const cl_int4 stridesArg = {{ - static_cast(strides[0]), static_cast(strides[1]), - static_cast(strides[2]), static_cast(strides[3]), - }}; - _kernel.setArg(idx++, stridesArg); - - Window slice_out = window.first_slice_window_4D().collapse(ICLKernel::window(), 2, 4); - - // Setup output slice - Window slice_in(slice_out); - slice_in.set(Window::DimX, Window::Dimension(0, 0, 0)); - slice_in.set(Window::DimY, Window::Dimension(0, 0, 0)); - slice_in.set(Window::DimZ, Window::Dimension(0, 0, 0)); - slice_in.set(3, Window::Dimension(0, 0, 0)); - - do - { - unsigned int idx = 0; - add_4D_tensor_argument(idx, _input, slice_in); - add_4D_tensor_argument(idx, _output, slice_out); - enqueue(queue, *this, slice_out); - } while (window.slide_window_slice_4D(slice_in) && window.slide_window_slice_4D(slice_out)); -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLBatchToSpaceND.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLBatchToSpaceND.cpp deleted file mode 100644 index 26e3798..0000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLBatchToSpaceND.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLBatchToSpaceND.h" - -#include "arm_compute/core/CL/kernels/CLBatchToSpaceNDKernel.h" - -using namespace arm_compute; - -void CLBatchToSpaceND::configure(ICLTensor *input, ICLTensor *output, const int32_t *block_size) -{ - auto k = arm_compute::support::cpp14::make_unique(); - k->configure(input, output, block_size); - _kernel = std::move(k); -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLLogicalNot.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLLogicalNot.cpp deleted file mode 100644 index 96caf9e..0000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLLogicalNot.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLLogicalNot.h" -#include "arm_compute/core/CL/kernels/CLLogicalNotKernel.h" -#include "arm_compute/core/CL/ICLTensor.h" - -using namespace arm_compute; - -void CLLogicalNot::configure(ICLTensor *input, ICLTensor *output) -{ - auto k = arm_compute::support::cpp14::make_unique(); - k->configure(input, output); - _kernel = std::move(k); -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLNormalizationLayerEx.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLNormalizationLayerEx.cpp deleted file mode 100644 index 276c455..0000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLNormalizationLayerEx.cpp +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLNormalizationLayerEx.h" - -#include "arm_compute/runtime/CL/CLScheduler.h" - -using namespace arm_compute; - -CLNormalizationLayerEx::CLNormalizationLayerEx() : _norm_kernel(), _border_handler() {} - -void CLNormalizationLayerEx::configure(ICLTensor *input, ICLTensor *output, - const NormalizationLayerInfo &norm_info) -{ - ARM_COMPUTE_ERROR_ON(input == nullptr); - - // Configure normalization kernel - _norm_kernel.configure(input, output, norm_info); - - // Fill the border by 3 elements since we need vload4 in the IN_MAP normalization kernel - _border_handler.configure(input, _norm_kernel.border_size(), BorderMode::CONSTANT, PixelValue(0)); -} - -Status CLNormalizationLayerEx::validate(const ITensorInfo *input, const ITensorInfo *output, - const NormalizationLayerInfo &norm_info) -{ - return CLNormalizationLayerExKernel::validate(input, output, norm_info); -} - -void CLNormalizationLayerEx::run() -{ - // Run border handler - CLScheduler::get().enqueue(_border_handler, false); - - // Run normalization kernel - CLScheduler::get().enqueue(_norm_kernel); -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLPixelWiseDivision.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLPixelWiseDivision.cpp deleted file mode 100644 index dc0baa8..0000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLPixelWiseDivision.cpp +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2016-2018 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLPixelWiseDivision.h" - -#include "arm_compute/core/CL/ICLTensor.h" -#include "arm_compute/core/CL/kernels/CLPixelWiseDivisionKernel.h" - -using namespace arm_compute; - -void CLPixelWiseDivision::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output, - float scale, ConvertPolicy overflow_policy, - RoundingPolicy rounding_policy) -{ - auto k = arm_compute::support::cpp14::make_unique(); - k->configure(input1, input2, output, scale, overflow_policy, rounding_policy); - _kernel = std::move(k); - - if (output->info()->dimension(0) > 1) - { - ICLTensor *broadcasted_info = (input1->info()->dimension(0) == 1) ? input1 : input2; - - if (broadcasted_info->info()->dimension(0) == 1) - { - _border_handler.configure(broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE); - } - } -} - -Status CLPixelWiseDivision::validate(const ITensorInfo *input1, const ITensorInfo *input2, - const ITensorInfo *output, float scale, - ConvertPolicy overflow_policy, RoundingPolicy rounding_policy) -{ - return CLPixelWiseDivisionKernel::validate(input1, input2, output, scale, overflow_policy, - rounding_policy); -} diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLStridedSliceEx.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLStridedSliceEx.cpp deleted file mode 100644 index be73534..0000000 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLStridedSliceEx.cpp +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved - * Copyright (c) 2017 ARM Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "arm_compute/runtime/CL/functions/CLStridedSliceEx.h" - -#include "arm_compute/core/CL/kernels/CLStridedSliceExKernel.h" - -using namespace arm_compute; - -void CLStridedSliceEx::configure(const ICLTensor *input, ICLTensor *output, ICLTensor *beginData, - ICLTensor *endData, ICLTensor *stridesData, int32_t beginMask, - int32_t endMask, int32_t shrinkAxisMask) -{ - auto k = arm_compute::support::cpp14::make_unique(); - k->configure(input, output, beginData, endData, stridesData, beginMask, endMask, shrinkAxisMask); - _kernel = std::move(k); -}