+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __ARM_COMPUTE_CLBATCH_TO_SPACE_ND_KERNEL_H__
-#define __ARM_COMPUTE_CLBATCH_TO_SPACE_ND_KERNEL_H__
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to perform BATCH_TO_SPACE_ND operation */
-class CLBatchToSpaceNDKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLBatchToSpaceNDKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLBatchToSpaceNDKernel(const CLBatchToSpaceNDKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLBatchToSpaceNDKernel &operator=(const CLBatchToSpaceNDKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLBatchToSpaceNDKernel(CLBatchToSpaceNDKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLBatchToSpaceNDKernel &operator=(CLBatchToSpaceNDKernel &&) = default;
- /** Default destructor */
- ~CLBatchToSpaceNDKernel() = default;
- /** Initialise the kernel's input and output.
- *
- * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
- * @param[in] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
- */
- void configure(const ICLTensor *input, ICLTensor *output, const int32_t *block_size);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input; /**< Source tensor */
- ICLTensor *_output; /**< Destination tensor */
-};
-
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_CLSPACE_TO_BATCH_ND_KERNEL_H__ */
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __ARM_COMPUTE_CLLOGICALNOTKERNEL_H__
-#define __ARM_COMPUTE_CLLOGICALNOTKERNEL_H__
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to return truth values of two input tensors for LogicalNOT*/
-class CLLogicalNotKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLLogicalNotKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers). */
- CLLogicalNotKernel(const CLLogicalNotKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers). */
- CLLogicalNotKernel &operator=(const CLLogicalNotKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLLogicalNotKernel(CLLogicalNotKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLLogicalNotKernel &operator=(CLLogicalNotKernel &&) = default;
- /** Initialize the kernel's input, output.
- *
- * @param[in] input Source tensor.
- * @param[out] output Output tensor.
- */
- void configure(const ICLTensor *input, ICLTensor *output);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
-};
-
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_CLLOGICALNOTKERNEL_H__ */
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __ARM_COMPUTE_CLNORMALIZATIONLAYEREXKERNEL_H__
-#define __ARM_COMPUTE_CLNORMALIZATIONLAYEREXKERNEL_H__
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the normalization layer kernel.
- */
-class CLNormalizationLayerExKernel : public ICLKernel
-{
-public:
- /** Constructor */
- CLNormalizationLayerExKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLNormalizationLayerExKernel(const CLNormalizationLayerExKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLNormalizationLayerExKernel &operator=(const CLNormalizationLayerExKernel &) = delete;
- /** Default Move Constructor. */
- CLNormalizationLayerExKernel(CLNormalizationLayerExKernel &&) = default;
- /** Default move assignment operator */
- CLNormalizationLayerExKernel &operator=(CLNormalizationLayerExKernel &&) = default;
- /** Set the input and output tensors.
- *
- * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions
- * [width, height, IFM],
- * and an optional 4th dimension for batch of inputs. Data types supported:
- * F16/F32.
- * @param[out] output Destination tensor. Output will have the same number of dimensions as
- * input. Data types supported: same as @p input.
- * @param[in] norm_info Normalization layer information like the normalization type,
- * normalization size and other parameters.
- */
- void configure(const ICLTensor *input, ICLTensor *output, NormalizationLayerInfo norm_info);
- /** Static function to check if given info will lead to a valid configuration of @ref
- * CLNormalizationLayerKernel
- *
- * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions
- * [width, height, IFM],
- * and an optional 4th dimension for batch of inputs. Data types supported:
- * F16/F32.
- * @param[in] output Destination tensor. Output will have the same number of dimensions as
- * input. Data types supported: same as @p input.
- * @param[in] norm_info Normalization layer information like the normalization type, normalization
- * size and other parameters.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output,
- NormalizationLayerInfo norm_info);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
- BorderSize border_size() const override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
- BorderSize _border_size;
- bool _is_in_map;
-};
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_CLNORMALIZATIONLAYEREXKERNEL_H__ */
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file CLPixelWiseDivisionKernel.h
- * @ingroup COM_AI_RUNTIME
- * @brief This file defines CLPixelWiseDivisionKernel class
- */
-
-#ifndef __ARM_COMPUTE_CLPIXELWISEDIVISIONKERNEL_H__
-#define __ARM_COMPUTE_CLPIXELWISEDIVISIONKERNEL_H__
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/**
- * @brief Interface for the pixelwise division kernel.
- */
-class CLPixelWiseDivisionKernel : public ICLKernel
-{
-public:
- /**
- * @brief Construct a CLPixelWiseDivisionKernel object
- */
- CLPixelWiseDivisionKernel();
-
- /**
- * @brief Prevent instances of this class from being copied (As this class contains pointers).
- */
- CLPixelWiseDivisionKernel(const CLPixelWiseDivisionKernel &) = delete;
-
- /**
- * @brief Prevent instances of this class from being copied (As this class contains pointers).
- */
- CLPixelWiseDivisionKernel &operator=(const CLPixelWiseDivisionKernel &) = delete;
-
- /**
- * @brief Construct a CLPixelWiseDivisionKernel object by using move constructor
- * @param[in] CLPixelWiseDivisionKernel object to move
- */
- CLPixelWiseDivisionKernel(CLPixelWiseDivisionKernel &&) = default;
-
- /**
- * @brief Allow instances of this class to be moved
- * @param[in] CLPixelWiseDivisionKernel object to move
- */
- CLPixelWiseDivisionKernel &operator=(CLPixelWiseDivisionKernel &&) = default;
-
- /**
- * @brief Initialise the kernel's input, output and border mode.
- * @param[in] input1 An input tensor. Data types supported: U8/S16/F16/F32.
- * @param[in] input2 An input tensor. Data types supported: same as @p input1.
- * @param[out] output The output tensor, Data types supported: same as @p input1. Note:
- * U8 requires both inputs to be U8.
- * @param[in] scale Scale to apply after division.
- * Scale must be positive and its value must be either 1/255 or 1/2^n
- * where n is between 0 and 15.
- * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate
- * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest
- * even.
- * @return N/A
- */
- void configure(const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float scale,
- ConvertPolicy overflow_policy, RoundingPolicy rounding_policy);
-
- /**
- * @brief Static function to check if given info will lead to a valid configuration of @ref
- * CLPixelWiseDivisionKernel
- * @param[in] input1 An input tensor info. Data types supported: U8/S16/F16/F32.
- * @param[in] input2 An input tensor info. Data types supported: same as @p input1.
- * @param[in] output The output tensor info, Data types supported: same as @p input1.
- * Note: U8 requires both inputs to be U8.
- * @param[in] scale Scale to apply after division.
- * Scale must be positive and its value must be either 1/255 or 1/2^n
- * where n is between 0 and 15.
- * @param[in] overflow_policy Overflow policy. Supported overflow policies: Wrap, Saturate
- * @param[in] rounding_policy Rounding policy. Supported rounding modes: to zero, to nearest even.
- * @return a status
- */
- static Status validate(const ITensorInfo *input1, const ITensorInfo *input2,
- const ITensorInfo *output, float scale, ConvertPolicy overflow_policy,
- RoundingPolicy rounding_policy);
-
- /**
- * @brief Enqueue the OpenCL kernel to process the given window on the passed OpenCL command
- * queue.
- * @note The queue is *not* flushed by this method, and therefore the kernel will not have
- * been executed by the time this method returns.
- * @param[in] window Region on which to execute the kernel. (Must be a valid region of
- * the window returned by window()).
- * @param[in,out] queue Command queue on which to enqueue the kernel.@return N/A
- * @return N/A
- */
- void run(const Window &window, cl::CommandQueue &queue) override;
-
- /**
- * @brief The size of the border for that kernel
- * @return The width in number of elements of the border.
- */
- BorderSize border_size() const override;
-
-private:
- const ICLTensor *_input1;
- const ICLTensor *_input2;
- ICLTensor *_output;
-};
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_CLPIXELWISEDIVISIONKERNEL_H__ */
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file CLStridedSliceExKernel.h
- * @ingroup COM_AI_RUNTIME
- * @brief This file defines CLStridedSliceExKernel class
- */
-
-#ifndef __ARM_COMPUTE_CLSTRIDEDSLICEEXKERNEL_H__
-#define __ARM_COMPUTE_CLSTRIDEDSLICEEXKERNEL_H__
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/**
-* @brief Class to define an interface for the kernel to extract a strided slice of a tensor
-*/
-class CLStridedSliceExKernel : public ICLKernel
-{
-public:
- /**
- * @brief Construct a CLStridedSliceExKernel object
- * */
- CLStridedSliceExKernel();
-
- /**
- * @brief Prevent instances of this class from being copied (As this class contains pointers)
- * */
- CLStridedSliceExKernel(const CLStridedSliceExKernel &) = delete;
-
- /**
- * @brief Prevent instances of this class from being copied (As this class contains pointers)
- * */
- CLStridedSliceExKernel &operator=(const CLStridedSliceExKernel &) = delete;
-
- /**
- * @brief Construct a CLStridedSliceExKernel object by using default move constructor
- * @param[in] CLStridedSliceExKernel object to move
- * */
- CLStridedSliceExKernel(CLStridedSliceExKernel &&) = default;
-
- /**
- * @brief Move assignment operator
- * @param[in] CLStridedSliceExKernel object to move
- * */
- CLStridedSliceExKernel &operator=(CLStridedSliceExKernel &&) = default;
-
- /**
- * @brief Destruct this object
- * */
- ~CLStridedSliceExKernel() = default;
-
- /**
- * @brief Set the input and output of the kernel
- * @param[in] input Source tensor. Data type supported:
- * U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
- * @param[out] output Destination tensor. Data type supported: Same as @p input
- * @param[in] beginData The begin tensor. Data types supported: S32.
- * The number of dimensions must be 1.
- * The length must be the same as the number of dimensions of input.
- * @param[in] endData The end tensor. Data types supported: S32.
- * The number of dimensions must be 1.
- * The length must be the same as the number of dimensions of input.
- * @param[in] strideData The stride tensor. Data types supported: S32.
- * The number of dimensions must be 1.
- * The length must be the same as the number of dimensions of input.
- * @param[in] beginMask Mask for begin
- * @param[in] endMask Mask for end
- * @param[in] shrinkAxisMask Mask for shrink axis.
- * @return N/A
- */
- void configure(const ICLTensor *input, ICLTensor *output, ICLTensor *beginData,
- ICLTensor *endData, ICLTensor *stridesData, int32_t beginMask, int32_t endMask,
- int32_t shrinkAxisMask);
-
- /**
- * @brief Static function to check if given info will lead to a valid configuration of @ref
- * CLStridedSliceExKernel
- * @param[in] input The input tensor info. Data types supported:
- * U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
- * @param[in] output The output tensor info, Data types supported: same as @p input1.
- * @param[in] begin The begin tensor info. Data types supported: S32.
- * The number of dimensions must be 1.
- * The length must be the same as the number of dimensions of input.
- * @param[in] end The end tensor info. Data types supported: S32.
- * The number of dimensions must be 1.
- * The length must be the same as the number of dimensions of input.
- * @param[in] stride The stride tensor info. Data types supported: S32.
- * The number of dimensions must be 1.
- * The length must be the same as the number of dimensions of input.
- * @param[in] beginMask Mask for begin
- * @param[in] endMask Mask for end
- * @param[in] shrinkAxisMask Mask for shrink axis.
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output,
- const ITensorInfo *begin, const ITensorInfo *end,
- const ITensorInfo *stride, int32_t beginMask, int32_t endMask,
- int32_t shrinkAxisMask);
-
- /**
- * @brief Enqueue the OpenCL kernel to process the given window on the passed OpenCL command
- * queue.
- * @note The queue is *not* flushed by this method, and therefore the kernel will not have
- * been executed by the time this method returns.
- * @param[in] window Region on which to execute the kernel. (Must be a valid region of
- * the window returned by window()).
- * @param[in,out] queue Command queue on which to enqueue the kernel.@return N/A
- * @return N/A
- */
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input; /** Source tensor */
- ICLTensor *_output; /** Destination tensor */
- ICLTensor *_beginData; /** Start indices of input tensor */
- ICLTensor *_endData; /** Stop indices of input tensor */
- ICLTensor *_stridesData; /** Strides tensor */
- int32_t _beginMask; /** Begin mask */
- int32_t _endMask; /** End mask */
- int32_t _shrinkAxisMask; /** Shrink axis mask */
-};
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_CLSTRIDEDSLICEEXKERNEL_H__ */
{"activation_layer_ex", "activation_layer_ex.cl"},
{"arg_op", "arg_operation.cl"},
{"arithmetic_add_qasymm8", "arithmetic_op_quantized.cl"},
- {"batch_to_space_nd", "batch_to_space_nd.cl"},
{"binary_logical_op", "binary_logical_op.cl"},
{"cast", "cast.cl"},
{"cast_qasymm_in", "cast.cl"},
{"gather_1d", "gather.cl"},
{"gather_1d_out", "gather.cl"},
{"hashtable_lookup", "hashtable_lookup.cl"},
- {"logical_not", "logical_not.cl"},
{"neg_tensor", "neg_tensor.cl"},
{"pad", "pad.cl"},
{"permute_generic", "permute_ex.cl"},
{"pixelwise_mul_qasymm8", "pixelwise_mul_quantized.cl"},
- {"pixelwise_div_float", "pixelwise_div_float.cl"},
- {"pixelwise_div_int", "pixelwise_div_int.cl"},
{"prelu", "prelu.cl"},
{"prelu_qasymm8", "prelu_quantized.cl"},
{"reduce_min_max", "reduce_operation.cl"},
{"reduce_sum_mean", "reduce_operation.cl"},
{"squared_difference", "squared_difference.cl"},
- {"strided_slice_ex", "strided_slice_ex.cl"},
{"topkv2_init", "topkv2.cl"},
{"topkv2_find_first_negative", "topkv2.cl"},
{"topkv2_reorder_negatives", "topkv2.cl"},
#include "./cl_kernels/arg_operation.clembed"
},
{
- "batch_to_space_nd.cl",
-#include "./cl_kernels/batch_to_space_nd.clembed"
- },
- {
"cast.cl",
#include "./cl_kernels/cast.clembed"
},
#include "./cl_kernels/pad.clembed"
},
{
- "logical_not.cl",
-#include "./cl_kernels/logical_not.clembed"
- },
- {
- "pixelwise_div_float.cl",
-#include "./cl_kernels/pixelwise_div_float.clembed"
- },
- {
- "pixelwise_div_int.cl",
-#include "./cl_kernels/pixelwise_div_int.clembed"
- },
- {
"prelu.cl",
#include "./cl_kernels/prelu.clembed"
},
#include "./cl_kernels/squared_difference.clembed"
},
{
- "strided_slice_ex.cl",
-#include "./cl_kernels/strided_slice_ex.clembed"
- },
- {
"topkv2.cl",
#include "./cl_kernels/topkv2.clembed"
},
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016, 2017 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "helpers.h"
-
-#if defined(DATA_TYPE) && defined(DEPTH_OUT) && defined(BLOCK_SIZE0) && defined(BLOCK_SIZE1) && defined(BATCH_OUT)
-/** Perform batch to space rearrangement of tensor
- *
- * @attention Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float
- * @attention Output tensor batch should be given as a preprocessor argument using -DBATCH_OUT=size. e.g. -DBATCH_OUT=16
- * @attention block size should be given as a preprocessor argument using -DBLOCK_SIZE0=size. e.g. -DBLOCK_SIZE0=1
- *
- * @param[in] input_ptr Pointer to the source tensor. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
- * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)
- * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)
- * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor
- *
- * @param[out] output_ptr Pointer to the destination tensor. Supported data types: same as @p inpu
-t_ptr
- * @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes)
- * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] output_stride_y Stride of the destination tensor in Y dimension (in bytes)
- * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in
-bytes)
- * @param[in] output_stride_z Stride of the destination tensor in Z dimension (in bytes)
- * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] output_stride_w Stride of the destination tensor in W dimension (in bytes)
- * @param[in] output_step_w output_stride_w * number of elements along W processed per workitem(in bytes)
- * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination tensor
- */
-__kernel void batch_to_space_nd(
- TENSOR4D_DECLARATION(input),
- TENSOR4D_DECLARATION(output))
- {
- Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input, 0);
- Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(output, DEPTH_OUT);
-
- int out_index[4]={0};
- int in_index[4]={0};
-
- out_index[0] = get_global_id(0);//W
- out_index[1] = get_global_id(1);//H
- out_index[2] = get_global_id(2) % DEPTH_OUT;//C
- out_index[3] = get_global_id(2) / DEPTH_OUT;//N
-
- in_index[0] = out_index[0]/BLOCK_SIZE1;
- in_index[1] = out_index[1]/BLOCK_SIZE0;
- in_index[2] = out_index[2];
- in_index[3] = out_index[3] + ((out_index[1] % BLOCK_SIZE0) * BLOCK_SIZE0 + out_index[0] % BLOCK_SIZE1) * BATCH_OUT;
-
- *((__global DATA_TYPE *)out.ptr) = *((__global DATA_TYPE *)tensor4D_offset(&in, in_index[0], in_index[1], in_index[2], in_index[3]));
- }
-#endif // defined(DATA_TYPE) && defined(DEPTH_IN) && defined(BLOCK_SIZE0) && defined(BLOCK_SIZE1) && defined(BATCH_OUT)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "helpers.h"
-
-/** returns truth value of the input tensor for LOGICAL NOT.
- *
- * @attention Data type can be passed using the -DDATA_TYPE_IN compile flag, e.g. -DDATA_TYPE_IN=bool
- * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16
- * @note Can only take boolean data types.
- *
- * @param[in] input_ptr Pointer to the source tensor. Supported data types: QASYMM8
- * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)
- * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)
- * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor
- *
- * @param[out] output_ptr Pointer to the destination tensor. Supported data types: QASYMM8
- * @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes)
- * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] output_stride_y Stride of the destination tensor in Y dimension (in bytes)
- * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] output_stride_z Stride of the destination tensor in Z dimension (in bytes)
- * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
- */
-__kernel void logical_not(
- TENSOR3D_DECLARATION(input),
- TENSOR3D_DECLARATION(output))
-{
- Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
- Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
-
- VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) in_data =
- VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input.ptr);
-
- VSTORE(VEC_SIZE) (CONVERT(!in_data, VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)),
- 0, (__global DATA_TYPE *)output.ptr);
-}
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016, 2017 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "helpers.h"
-
-#ifdef SATURATE
-#define CONVERT_OP_FLOAT_STR(x, type, round) (convert_##type##_sat##round(x))
-#else /* SATURATE */
-#define CONVERT_OP_FLOAT_STR(x, type, round) (convert_##type##round(x))
-#endif /* SATURATE */
-#define CONVERT_OP_FLOAT(x, type, round) CONVERT_OP_FLOAT_STR(x, type, round)
-
-/** Performs a pixelwise division with float scale of either integer or float inputs.
- *
- * @attention The inputs and output data types need to be passed at compile time using -DDATA_TYPE_IN1, -DDATA_TYPE_IN2 and -DDATA_TYPE_OUT:
- * e.g. -DDATA_TYPE_IN1=uchar -DDATA_TYPE_IN2=ushort -DDATA_TYPE_OUT=short
- * @attention The data type of the intermediate result of the division should passed as well using -DDATA_TYPE_RES.
- * e.g. If one of inputs is S16 -DDATA_TYPE_RES=int should be passed else -DDATA_TYPE_RES=short.
- * @attention -DDATA_TYPE_FLOAT must be passed if floating point inputs are provided.
- *
- * @param[in] in1_ptr Pointer to the source image. Supported data types: U8, S16, F16, F32
- * @param[in] in1_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] in1_step_x in1_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] in1_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] in1_step_y in1_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] in1_stride_z Stride of the source image in Y dimension (in bytes)
- * @param[in] in1_step_z in1_stride_z * number of elements along Y processed per workitem(in bytes)
- * @param[in] in1_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[in] in2_ptr Pointer to the source image. Supported data types: U8, S16, F16, F32
- * @param[in] in2_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] in2_step_x in2_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] in2_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] in2_step_y in2_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] in2_stride_z Stride of the source image in Y dimension (in bytes)
- * @param[in] in2_step_z in2_stride_z * number of elements along Y processed per workitem(in bytes)
- * @param[in] in2_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] out_ptr Pointer to the destination image. Supported data types: U8, S16, F16, F32
- * @param[in] out_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] out_step_x out_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] out_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] out_step_y out_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] out_stride_z Stride of the destination image in Y dimension (in bytes)
- * @param[in] out_step_z out_stride_z * number of elements along Y processed per workitem(in bytes)
- * @param[in] out_offset_first_element_in_bytes The offset of the first element in the destination image
- * @param[in] scale Float scaling factor. Supported data types: F32
- */
-__kernel void pixelwise_div_float(
- TENSOR3D_DECLARATION(in1),
- TENSOR3D_DECLARATION(in2),
- TENSOR3D_DECLARATION(out),
- const float scale)
-{
- // Get pixels pointer
- Tensor3D in1 = CONVERT_TO_TENSOR3D_STRUCT(in1);
- Tensor3D in2 = CONVERT_TO_TENSOR3D_STRUCT(in2);
- Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(out);
-
- // Load data
- VEC_DATA_TYPE(DATA_TYPE_RES, 16)
- in1_data = CONVERT(vload16(0, (__global DATA_TYPE_IN1 *)in1.ptr), VEC_DATA_TYPE(DATA_TYPE_RES, 16));
- VEC_DATA_TYPE(DATA_TYPE_RES, 16)
- in2_data = CONVERT(vload16(0, (__global DATA_TYPE_IN2 *)in2.ptr), VEC_DATA_TYPE(DATA_TYPE_RES, 16));
-
- // Perform division
-#ifdef DATA_TYPE_FLOAT
- VEC_DATA_TYPE(DATA_TYPE_OUT, 16)
- res = CONVERT(in1_data / in2_data * (DATA_TYPE_RES)scale, VEC_DATA_TYPE(DATA_TYPE_OUT, 16));
-#else /* DATA_TYPE_FLOAT */
- VEC_DATA_TYPE(DATA_TYPE_OUT, 16)
- res = CONVERT_OP_FLOAT(CONVERT_OP_FLOAT((convert_float16(in1_data / in2_data) * scale), VEC_DATA_TYPE(DATA_TYPE_RES, 16), ROUND), VEC_DATA_TYPE(DATA_TYPE_OUT, 16), ROUND);
-#endif /* DATA_TYPE_FLOAT */
-
- // Store result
- vstore16(res, 0, (__global DATA_TYPE_OUT *)out.ptr);
-}
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016, 2017 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "helpers.h"
-
-#if defined(SATURATE)
-#define CONVERT_OP_INT_STR(x, type, size) (convert_##type##size##_sat(x))
-#else // SATURATE
-#define CONVERT_OP_INT_STR(x, type, size) (convert_##type##size(x))
-#endif // SATURATE
-#define CONVERT_OP_INT(x, type, size) CONVERT_OP_INT_STR(x, type, size)
-
-#define DIV_OP(x, y, scale, type, size) CONVERT_OP_INT((x) / (y) >> scale, type, size)
-
-/** Performs a pixelwise division with integer scale of integer inputs.
- *
- * @attention The inputs and output data types need to be passed at compile time using -DDATA_TYPE_IN1, -DDATA_TYPE_IN2 and -DDATA_TYPE_OUT:
- * e.g. -DDATA_TYPE_IN1=uchar -DDATA_TYPE_IN2=ushort -DDATA_TYPE_OUT=short
- * @attention The data_type of the intermediate result of the division should passed as well using -DDATA_TYPE_RES.
- * e.g. If one of inputs is S16 -DDATA_TYPE_RES=int should be passed else -DDATA_TYPE_RES=short.
- *
- * @param[in] in1_ptr Pointer to the source image. Supported data types: U8/S16
- * @param[in] in1_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] in1_step_x in1_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] in1_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] in1_step_y in1_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] in1_stride_z Stride of the source image in Y dimension (in bytes)
- * @param[in] in1_step_z in1_stride_z * number of elements along Y processed per workitem(in bytes)
- * @param[in] in1_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[in] in2_ptr Pointer to the source image. Supported data types: same as @p in1_ptr
- * @param[in] in2_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] in2_step_x in2_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] in2_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] in2_step_y in2_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] in2_stride_z Stride of the source image in Y dimension (in bytes)
- * @param[in] in2_step_z in2_stride_z * number of elements along Y processed per workitem(in bytes)
- * @param[in] in2_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] out_ptr Pointer to the destination image. Supported data types: same as @p in1_ptr
- * @param[in] out_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] out_step_x out_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] out_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] out_step_y out_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] out_stride_z Stride of the destination image in Y dimension (in bytes)
- * @param[in] out_step_z out_stride_z * number of elements along Y processed per workitem(in bytes)
- * @param[in] out_offset_first_element_in_bytes The offset of the first element in the destination image
- * @param[in] scale Integer scaling factor. Supported data types: S32
- */
-__kernel void pixelwise_div_int(
- TENSOR3D_DECLARATION(in1),
- TENSOR3D_DECLARATION(in2),
- TENSOR3D_DECLARATION(out),
- const uint scale)
-{
- // Get pixels pointer
- Tensor3D in1 = CONVERT_TO_TENSOR3D_STRUCT(in1);
- Tensor3D in2 = CONVERT_TO_TENSOR3D_STRUCT(in2);
- Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(out);
-
- // Load data
- VEC_DATA_TYPE(DATA_TYPE_RES, 16)
- in1_data = CONVERT(vload16(0, (__global DATA_TYPE_IN1 *)in1.ptr), VEC_DATA_TYPE(DATA_TYPE_RES, 16));
- VEC_DATA_TYPE(DATA_TYPE_RES, 16)
- in2_data = CONVERT(vload16(0, (__global DATA_TYPE_IN2 *)in2.ptr), VEC_DATA_TYPE(DATA_TYPE_RES, 16));
-
- // Perform division and store result
- vstore16(DIV_OP(in1_data, in2_data, scale, DATA_TYPE_OUT, 16), 0, (__global DATA_TYPE_OUT *)out.ptr);
-}
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "helpers.h"
-
-#if defined(ELEMENT_DATA_TYPE) && defined(DEPTH_OUT)
-/** Extracts a strided slice up to 4-dimensions
- *
- * @note Datatype should be given as a preprocessor argument using -DELEMENT_DATA_TYPE=type. e.g. -DELEMENT_DATA_TYPE=short
- * @attention Output tensor depth should be given as a preprocessor argument using -DDEPTH_OUT=size. e.g. -DDEPTH_OUT=16
- *
- * @param[in] input_ptr Pointer to the source image. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
- * @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] output_ptr Pointer to the destination image. Supported data types: same as @p input_ptr
- * @param[in] output_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] output_stride_w Stride of the source tensor in W dimension (in bytes)
- * @param[in] output_step_w output_stride_w * number of elements along W processed per workitem(in bytes)
- * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image
- * @param[in] starts The stride of X dimension of input tensor to be sliced. Supported data types: S32
- * @param[in] strides The stride of Y dimension of input tensor to be sliced. Supported data types: S32
- */
-__kernel void strided_slice_ex(TENSOR4D_DECLARATION(input),
- TENSOR4D_DECLARATION(output),
- const int4 starts,
- const int4 strides)
-{
- Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT(input, 0);
- Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(output, DEPTH_OUT);
-
- int4 indices_in =
- {
- starts.x + (strides.x * get_global_id(0)),
- starts.y + (strides.y * get_global_id(1)),
- starts.z + (strides.z * (get_global_id(2) % DEPTH_OUT)),
- starts.w + (strides.w * (get_global_id(2) / DEPTH_OUT)),
- };
- *((__global ELEMENT_DATA_TYPE *)out.ptr) = *((__global ELEMENT_DATA_TYPE *)tensor4D_offset(&in, indices_in.x, indices_in.y, indices_in.z, indices_in.w));
-}
-#endif // defined(ELEMENT_DATA_TYPE) && defined(DEPTH_OUT)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/core/CL/kernels/CLBatchToSpaceNDKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibraryEx.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-
-using namespace arm_compute;
-
-namespace
-{
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output,
- const int32_t *block_size)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QASYMM8,
- DataType::S16, DataType::S32, DataType::F16,
- DataType::F32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::QASYMM8,
- DataType::S16, DataType::S32, DataType::F16,
- DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(!(block_size[0] >= 1 && block_size[1] >= 1),
- "Block size should be greater than or equal to 1.");
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->dimension(2) != output->dimension(2),
- "Input Depth should be equal to Output Depth");
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(
- output->dimension(3) * block_size[0] * block_size[1] != input->dimension(3),
- "Input batch should be equal to (output batch * block size[0] *block size[1])");
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG((output->dimension(0) % block_size[1]) ||
- (output->dimension(1) % block_size[0]),
- "Output height and width should be divisible by block size[0] "
- "and block_size[1] respectively");
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG((output->dimension(0) != input->dimension(0) * block_size[1]) ||
- (output->dimension(1) != input->dimension(1) * block_size[0]),
- "Output height and width should be equal to "
- "input_height*blocksize[0] and input_width*blocksize[1] "
- "respectively");
-
- return Status{};
-}
-
-} // namespace
-
-CLBatchToSpaceNDKernel::CLBatchToSpaceNDKernel() : _input(nullptr), _output(nullptr) {}
-
-void CLBatchToSpaceNDKernel::configure(const ICLTensor *input, ICLTensor *output,
- const int32_t *block_size)
-{
-
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), block_size));
-
- _input = input;
- _output = output;
-
- // Set kernel build options
- std::set<std::string> build_opts;
- build_opts.emplace("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
- build_opts.emplace("-DBLOCK_SIZE0=" + support::cpp11::to_string(block_size[0]));
- build_opts.emplace("-DBLOCK_SIZE1=" + support::cpp11::to_string(block_size[1]));
- build_opts.emplace("-DBATCH_OUT=" + support::cpp11::to_string(output->info()->dimension(3)));
- build_opts.emplace("-DDEPTH_OUT=" + support::cpp11::to_string(output->info()->dimension(2)));
-
- // Create kernel
- _kernel = static_cast<cl::Kernel>(
- CLKernelLibraryEx::get().create_kernel("batch_to_space_nd", build_opts));
-
- // Configure kernel window
- Window win = calculate_max_window(*output->info(), Steps());
-
- Coordinates coord;
- coord.set_num_dimensions(output->info()->num_dimensions());
- output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape()));
-
- ICLKernel::configure_internal(win);
-}
-
-void CLBatchToSpaceNDKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(ICLKernel::window(), window);
-
- Window slice_in = window.first_slice_window_4D().collapse(ICLKernel::window(), 2, 4);
-
- // Setup output slice
- Window slice_out(slice_in);
- slice_out.set(Window::DimX, Window::Dimension(0, 0, 0));
- slice_out.set(Window::DimY, Window::Dimension(0, 0, 0));
- slice_out.set(Window::DimZ, Window::Dimension(0, 0, 0));
- slice_out.set(3, Window::Dimension(0, 0, 0));
-
- do
- {
- unsigned int idx = 0;
- add_4D_tensor_argument(idx, _input, slice_out);
- add_4D_tensor_argument(idx, _output, slice_in);
- enqueue(queue, *this, slice_in);
- } while (window.slide_window_slice_4D(slice_out) && window.slide_window_slice_4D(slice_in));
-}
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/core/CL/kernels/CLLogicalNotKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibraryEx.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-
-using namespace arm_compute;
-
-namespace
-{
-constexpr unsigned int num_elems_processed_per_iteration = 16;
-
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
-{
- const TensorShape &out_shape = input->tensor_shape();
-
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8);
-
- // Validate in case of configured output
- if (output->total_size() > 0)
- {
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QASYMM8);
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(
- detail::have_different_dimensions(out_shape, output->tensor_shape(), 0),
- "Wrong shape for output");
- }
- return Status{};
-}
-} // namespace
-
-CLLogicalNotKernel::CLLogicalNotKernel() : _input(nullptr), _output(nullptr) {}
-
-void CLLogicalNotKernel::configure(const ICLTensor *input, ICLTensor *output)
-{
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info()));
-
- _input = input;
- _output = output;
-
- // Create kernel
- std::string kernel_name = "logical_not";
- std::set<std::string> build_opts;
- build_opts.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())));
- build_opts.emplace(
- ("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)));
-
- _kernel =
- static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts));
-
- const ValidRegion &valid_region = input->info()->valid_region();
-
- Window win = calculate_max_window(valid_region, Steps(num_elems_processed_per_iteration));
-
- AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
-
- output_access.set_valid_region(win, valid_region);
-
- ICLKernel::configure_internal(win);
-}
-
-void CLLogicalNotKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
-
- const TensorShape &in_shape1 = _input->info()->tensor_shape();
-
- bool can_collapse = true;
- if (in_shape1.total_size() > 1)
- {
- can_collapse = (in_shape1.num_dimensions() > Window::DimZ);
- }
-
- bool has_collapsed = false;
- Window collapsed =
- can_collapse ? window.collapse_if_possible(ICLKernel::window(), Window::DimZ, &has_collapsed)
- : window;
-
- Window slice = collapsed.first_slice_window_3D();
-
- do
- {
- unsigned int idx = 0;
- add_3D_tensor_argument(idx, _input, slice);
- add_3D_tensor_argument(idx, _output, slice);
-
- enqueue(queue, *this, slice);
-
- collapsed.slide_window_slice_3D(slice);
- } while (collapsed.slide_window_slice_3D(slice));
-}
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/core/CL/kernels/CLNormalizationLayerExKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-
-using namespace arm_compute;
-
-namespace
-{
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output,
- NormalizationLayerInfo /*norm_info*/)
-{
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output);
-
- // Checks performed when output is configured
- if (output->total_size() != 0)
- {
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
- }
-
- return Status{};
-}
-
-std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output,
- NormalizationLayerInfo norm_info)
-{
- // Output tensor auto initialization if not yet initialized
- auto_init_if_empty(*output, *input->clone());
-
- const unsigned int norm_size = norm_info.norm_size();
- bool is_in_map = norm_info.is_in_map();
-
- const unsigned int border_width = is_in_map ? std::min(norm_size / 2, 3U) : 0;
- const BorderSize border_size = BorderSize(0, border_width);
-
- const unsigned int num_elems_processed_per_iteration = 4;
- const unsigned int num_elems_read_per_iteration =
- is_in_map ? (num_elems_processed_per_iteration + 2 * (norm_size / 2))
- : num_elems_processed_per_iteration;
-
- Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration));
-
- // We do not use a Rectangle window for IN_MAP_2D as we clamp the top and bottom accesses inside
- // the kernel, avoiding padding
- AccessWindowHorizontal input_access(input, -border_size.left, num_elems_read_per_iteration);
- AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
-
- bool window_changed = update_window_and_padding(win, input_access, output_access);
-
- output_access.set_valid_region(win, input->valid_region());
-
- Status err = (window_changed)
- ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!")
- : Status{};
- return std::make_pair(err, win);
-}
-} // namespace
-
-CLNormalizationLayerExKernel::CLNormalizationLayerExKernel()
- : _input(nullptr), _output(nullptr), _border_size(0), _is_in_map(false)
-{
-}
-
-BorderSize CLNormalizationLayerExKernel::border_size() const { return _border_size; }
-
-void CLNormalizationLayerExKernel::configure(const ICLTensor *input, ICLTensor *output,
- NormalizationLayerInfo norm_info)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
-
- // Output tensor auto initialization if not yet initialized
- auto_init_if_empty(*output->info(), *input->info()->clone());
-
- // Perform validation step
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), norm_info));
-
- _input = input;
- _output = output;
-
- const unsigned int num_elems_processed_per_iteration = 4;
- const bool is_in_map_2D = (norm_info.type() == NormType::IN_MAP_2D);
-
- // Set build options
- CLBuildOptions build_opts;
- build_opts.add_option(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())));
- build_opts.add_option(
- ("-DCOEFF=" + float_to_string_with_full_precision(norm_info.scale_coeff())));
- build_opts.add_option(("-DBETA=" + float_to_string_with_full_precision(norm_info.beta())));
- build_opts.add_option(("-DKAPPA=" + float_to_string_with_full_precision(norm_info.kappa())));
- build_opts.add_option(
- ("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)));
- build_opts.add_option(("-DRADIUS=" + support::cpp11::to_string(norm_info.norm_size())));
- build_opts.add_option(("-DNUM_SLICES=" + support::cpp11::to_string(input->info()->dimension(2))));
- build_opts.add_option_if(is_in_map_2D, "-DIN_MAP_2D");
-
- // Create kernel
- std::string kernel_name =
- _is_in_map ? "normalization_layer_in_map" : "normalization_layer_cross_map";
- _kernel = static_cast<cl::Kernel>(
- CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
-
- // Configure kernel window
- auto win_config = validate_and_configure_window(input->info(), output->info(), norm_info);
- ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
- ICLKernel::configure_internal(win_config.second);
-
- // Set config_id for enabling LWS tuning
- _config_id = "normalization_layer_";
- _config_id += lower_string(string_from_data_type(input->info()->data_type()));
- _config_id += "_";
- _config_id += support::cpp11::to_string(
- static_cast<std::underlying_type<NormType>::type>(norm_info.type()));
- _config_id += "_";
- _config_id += support::cpp11::to_string(norm_info.norm_size());
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->info()->dimension(0));
- _config_id += "_";
- _config_id += support::cpp11::to_string(input->info()->dimension(1));
-}
-
-Status CLNormalizationLayerExKernel::validate(const ITensorInfo *input, const ITensorInfo *output,
- NormalizationLayerInfo norm_info)
-{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, norm_info));
- ARM_COMPUTE_RETURN_ON_ERROR(
- validate_and_configure_window(input->clone().get(), output->clone().get(), norm_info).first);
-
- return Status{};
-}
-
-void CLNormalizationLayerExKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
-
- const int collapsed_dimension = _is_in_map ? Window::DimZ : 4;
- Window window_collapsed = window.collapse_if_possible(ICLKernel::window(), collapsed_dimension);
- Window slice = window_collapsed.first_slice_window_3D();
-
- do
- {
- unsigned int idx = 0;
- add_3D_tensor_argument(idx, _input, slice);
- add_3D_tensor_argument(idx, _output, slice);
- enqueue(queue, *this, slice);
- } while (window_collapsed.slide_window_slice_3D(slice));
-}
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/core/CL/kernels/CLPixelWiseDivisionKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibraryEx.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-
-using namespace arm_compute;
-
-namespace
-{
-constexpr unsigned int num_elems_processed_per_iteration = 16;
-
-Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *input2,
- const ITensorInfo *output, float scale, ConvertPolicy overflow_policy,
- RoundingPolicy rounding_policy)
-{
- ARM_COMPUTE_UNUSED(overflow_policy);
- ARM_COMPUTE_UNUSED(rounding_policy);
-
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8, DataType::S16,
- DataType::F16, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8, DataType::S16,
- DataType::F16, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(scale < 0, "Scale cannot be negative.");
-
- const TensorShape &out_shape =
- TensorShape::broadcast_shape(input1->tensor_shape(), input2->tensor_shape());
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(out_shape.total_size() == 0,
- "Inputs are not broadcast compatible");
-
- // Validate in case of configured output
- if (output->total_size() > 0)
- {
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16,
- DataType::F16, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(
- output->data_type() == DataType::U8 &&
- (input1->data_type() != DataType::U8 || input2->data_type() != DataType::U8),
- "Output can only be U8 if both inputs are U8");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(
- detail::have_different_dimensions(out_shape, output->tensor_shape(), 0),
- "Wrong shape for output");
- }
-
- return Status{};
-}
-
-std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input1, ITensorInfo *input2,
- ITensorInfo *output)
-{
- const std::pair<TensorShape, ValidRegion> broadcast_pair =
- ITensorInfo::broadcast_shape_and_valid_region(*input1, *input2);
- const TensorShape &out_shape = broadcast_pair.first;
- const ValidRegion &valid_region = broadcast_pair.second;
-
- // Auto initialize output if not initialized
- {
- set_shape_if_empty(*output, out_shape);
-
- if (input1->data_type() == DataType::S16 || input2->data_type() == DataType::S16)
- {
- set_format_if_unknown(*output, Format::S16);
- }
- else if (input1->data_type() == DataType::F32 || input2->data_type() == DataType::F32)
- {
- set_format_if_unknown(*output, Format::F32);
- }
- }
-
- Window win = calculate_max_window(valid_region, Steps(num_elems_processed_per_iteration));
- Window win_input1 = win.broadcast_if_dimension_le_one(*input1);
- Window win_input2 = win.broadcast_if_dimension_le_one(*input2);
-
- AccessWindowHorizontal input1_access(input1, 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal input2_access(input2, 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
-
- bool window_changed = update_window_and_padding(win_input1, input1_access) ||
- update_window_and_padding(win_input2, input2_access) ||
- update_window_and_padding(win, output_access);
-
- output_access.set_valid_region(win, valid_region);
-
- Status err = (window_changed)
- ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!")
- : Status{};
- return std::make_pair(err, win);
-}
-} // namespace
-
-CLPixelWiseDivisionKernel::CLPixelWiseDivisionKernel()
- : _input1(nullptr), _input2(nullptr), _output(nullptr)
-{
-}
-
-void CLPixelWiseDivisionKernel::configure(const ICLTensor *input1, const ICLTensor *input2,
- ICLTensor *output, float scale,
- ConvertPolicy overflow_policy,
- RoundingPolicy rounding_policy)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input1->info(), input2->info(), output->info(),
- scale, overflow_policy, rounding_policy));
-
- // Configure kernel window
- auto win_config = validate_and_configure_window(input1->info(), input2->info(), output->info());
- ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
-
- _input1 = input1;
- _input2 = input2;
- _output = output;
-
- int scale_int = -1;
- // Extract sign, exponent and mantissa
- int exponent = 0;
- float normalized_mantissa = std::frexp(scale, &exponent);
- // Use int scaling if factor is equal to 1/2^n for 0 <= n <= 15
- // frexp returns 0.5 as mantissa which means that the exponent will be in the range of -1 <= e <=
- // 14
- // Moreover, it will be negative as we deal with 1/2^n
- if ((normalized_mantissa == 0.5f) && (-14 <= exponent) && (exponent <= 1))
- {
- // Store the positive exponent. We know that we compute 1/2^n
- // Additionally we need to subtract 1 to compensate that frexp used a mantissa of 0.5
- scale_int = std::abs(exponent - 1);
- }
-
- std::string data_type;
- std::string compute_type;
- // Check if it has float inputs and output
- if (is_data_type_float(input1->info()->data_type()) ||
- is_data_type_float(input2->info()->data_type()))
- {
- scale_int = -1;
- compute_type = (input1->info()->data_type() == DataType::F32 ||
- input2->info()->data_type() == DataType::F32)
- ? "float"
- : "half";
- data_type = "DATA_TYPE_FLOAT";
- }
- else
- {
- if (input1->info()->data_type() == DataType::S16 ||
- input2->info()->data_type() == DataType::S16)
- {
- compute_type = "int";
- }
- else
- {
- compute_type = "ushort";
- }
- data_type = "DATA_TYPE_INT";
- }
-
- // Construct kernel name
- std::string kernel_name = "pixelwise_div";
- kernel_name += (scale_int >= 0) ? "_int" : "_float";
-
- // Set kernel build options
- std::set<std::string> build_opts;
- build_opts.emplace(
- (overflow_policy == ConvertPolicy::WRAP || is_data_type_float(output->info()->data_type()))
- ? "-DWRAP"
- : "-DSATURATE");
- build_opts.emplace((rounding_policy == RoundingPolicy::TO_ZERO) ? "-DROUND=_rtz"
- : "-DROUND=_rte");
- build_opts.emplace("-DDATA_TYPE_IN1=" + get_cl_type_from_data_type(input1->info()->data_type()));
- build_opts.emplace("-DDATA_TYPE_IN2=" + get_cl_type_from_data_type(input2->info()->data_type()));
- build_opts.emplace("-DDATA_TYPE_OUT=" + get_cl_type_from_data_type(output->info()->data_type()));
- build_opts.emplace("-DDATA_TYPE_RES=" + compute_type);
- build_opts.emplace("-D" + data_type);
-
- // Create kernel
- _kernel =
- static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts));
-
- // Set scale argument
- unsigned int idx = 3 * num_arguments_per_3D_tensor(); // Skip the inputs and output parameters
-
- if (scale_int >= 0)
- {
- _kernel.setArg(idx++, scale_int);
- }
- else
- {
- _kernel.setArg(idx++, scale);
- }
-
- ICLKernel::configure_internal(win_config.second);
-}
-
-Status CLPixelWiseDivisionKernel::validate(const ITensorInfo *input1, const ITensorInfo *input2,
- const ITensorInfo *output, float scale,
- ConvertPolicy overflow_policy,
- RoundingPolicy rounding_policy)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output);
- ARM_COMPUTE_RETURN_ON_ERROR(
- validate_arguments(input1, input2, output, scale, overflow_policy, rounding_policy));
- ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input1->clone().get(),
- input2->clone().get(),
- output->clone().get())
- .first);
-
- return Status{};
-}
-
-void CLPixelWiseDivisionKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
-
- const TensorShape &in_shape1 = _input1->info()->tensor_shape();
- const TensorShape &in_shape2 = _input2->info()->tensor_shape();
- const TensorShape &out_shape = _output->info()->tensor_shape();
-
- bool can_collapse = true;
- if (std::min(in_shape1.total_size(), in_shape2.total_size()) > 1)
- {
- can_collapse =
- (std::min(in_shape1.num_dimensions(), in_shape2.num_dimensions()) > Window::DimZ);
- for (size_t d = Window::DimZ; can_collapse && (d < out_shape.num_dimensions()); ++d)
- {
- can_collapse = (in_shape1[d] == in_shape2[d]);
- }
- }
-
- bool has_collapsed = false;
- Window collapsed =
- can_collapse ? window.collapse_if_possible(ICLKernel::window(), Window::DimZ, &has_collapsed)
- : window;
-
- const TensorShape &in_shape1_collapsed =
- has_collapsed ? in_shape1.collapsed_from(Window::DimZ) : in_shape1;
- const TensorShape &in_shape2_collapsed =
- has_collapsed ? in_shape2.collapsed_from(Window::DimZ) : in_shape2;
-
- Window slice = collapsed.first_slice_window_3D();
- Window slice_input1 = slice.broadcast_if_dimension_le_one(in_shape1_collapsed);
- Window slice_input2 = slice.broadcast_if_dimension_le_one(in_shape2_collapsed);
-
- do
- {
- unsigned int idx = 0;
- add_3D_tensor_argument(idx, _input1, slice_input1);
- add_3D_tensor_argument(idx, _input2, slice_input2);
- add_3D_tensor_argument(idx, _output, slice);
- enqueue(queue, *this, slice);
-
- collapsed.slide_window_slice_3D(slice_input1);
- collapsed.slide_window_slice_3D(slice_input2);
- } while (collapsed.slide_window_slice_3D(slice));
-}
-
-BorderSize CLPixelWiseDivisionKernel::border_size() const
-{
- const unsigned int replicateSize =
- _output->info()->dimension(0) -
- std::min(_input1->info()->dimension(0), _input2->info()->dimension(0));
- const unsigned int border =
- std::min<unsigned int>(num_elems_processed_per_iteration - 1U, replicateSize);
- return BorderSize(0, border, 0, 0);
-}
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/core/CL/kernels/CLStridedSliceExKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibraryEx.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/TensorInfo.h"
-
-using namespace arm_compute;
-
-CLStridedSliceExKernel::CLStridedSliceExKernel()
- : _input(nullptr), _output(nullptr), _beginData(nullptr), _endData(nullptr),
- _stridesData(nullptr), _beginMask(0), _endMask(0), _shrinkAxisMask(0)
-{
-}
-
-Status CLStridedSliceExKernel::validate(const ITensorInfo *input, const ITensorInfo *output,
- const ITensorInfo *begin, const ITensorInfo *end,
- const ITensorInfo *strides, int32_t /*beginMask*/,
- int32_t /*endMask*/, int32_t /*shrinkAxisMask*/)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, begin, end, strides);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(
- input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16,
- DataType::U32, DataType::S32, DataType::F16, DataType::F32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(begin, 1, DataType::S32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(end, 1, DataType::S32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(strides, 1, DataType::S32);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
-
- ARM_COMPUTE_ERROR_ON(begin->num_dimensions() != 1 || begin->dimension(0) > 4);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(begin->tensor_shape(), end->tensor_shape(),
- strides->tensor_shape());
-
- return Status{};
-}
-
-// Return the index for the first element along that axis. This index will be a
-// positive integer between [0, axisSize - 1] that can be used to index
-// directly into the data.
-inline int32_t StartForAxis(int32_t beginMask, int32_t begin, int32_t stride,
- const TensorShape &inputShape, int32_t axis)
-{
- // Begin with the specified index
- int32_t start = begin;
-
- // beginMask override
- if (beginMask & 1 << axis)
- {
- if (stride > 0)
- {
- // Forward iteration - use the first element. These values will get
- // clamped below (Note: We could have set them to 0 and axisSize-1, but
- // use lowest() and max() to maintain symmetry with StopForAxis())
- start = std::numeric_limits<int32_t>::lowest();
- }
- else
- {
- // Backward iteration - use the last element.
- start = std::numeric_limits<int32_t>::max();
- }
- }
-
- // Handle negative indices
- int32_t axisSize = inputShape[axis];
- if (start < 0)
- {
- start += axisSize;
- }
-
- // Clamping
- start = arm_compute::utility::clamp(start, 0, axisSize - 1);
-
- return start;
-}
-
-// Return the "real" index for the end of iteration along that axis. This is an
-// "end" in the traditional C sense, in that it points to one past the last
-// element. ie. So if you were iterating through all elements of a 1D array of
-// size 4, this function would return 4 as the stop, because it is one past the
-// "real" indices of 0, 1, 2 & 3.
-inline int32_t StopForAxis(int32_t endMask, int32_t end, int32_t stride,
- const TensorShape &inputShape, int32_t axis)
-{
- // Begin with the specified index
- int32_t stop = end;
-
- // endMask override
- if (endMask & (1 << axis))
- {
- if (stride > 0)
- {
- // Forward iteration - use the last element. These values will get
- // clamped below
- stop = std::numeric_limits<int32_t>::max();
- }
- else
- {
- // Backward iteration - use the first element.
- stop = std::numeric_limits<int32_t>::lowest();
- }
- }
-
- // Handle negative indices
- int32_t axisSize = inputShape[axis];
- if (stop < 0)
- {
- stop += axisSize;
- }
-
- // Clamping
- // Because the end index points one past the last element, we need slightly
- // different clamping ranges depending on the direction.
- if (stride > 0)
- {
- // Forward iteration
- stop = arm_compute::utility::clamp(stop, 0, axisSize);
- }
- else
- {
- // Backward iteration
- stop = arm_compute::utility::clamp(stop, -1, axisSize - 1);
- }
-
- return stop;
-}
-
-inline int32_t getOutDim(int32_t start, int32_t stop, int32_t stride)
-{
- int32_t ret = 0;
- if (stride > 0)
- {
- ret = ((stop - start - 1) / stride) + 1;
- }
- else
- {
- ret = ((stop - start + 1) / stride) + 1;
- }
- ARM_COMPUTE_ERROR_ON_MSG(ret < 0, "The dimension must be the natural number");
- return ret;
-}
-
-void CLStridedSliceExKernel::configure(const ICLTensor *input, ICLTensor *output,
- ICLTensor *beginData, ICLTensor *endData,
- ICLTensor *stridesData, int32_t beginMask, int32_t endMask,
- int32_t shrinkAxisMask)
-{
- ARM_COMPUTE_ERROR_THROW_ON(validate(input->info(), output->info(), beginData->info(),
- endData->info(), stridesData->info(), beginMask, endMask,
- shrinkAxisMask));
-
- _input = input;
- _output = output;
- _beginData = beginData;
- _endData = endData;
- _stridesData = stridesData;
- _beginMask = beginMask;
- _endMask = endMask;
- _shrinkAxisMask = shrinkAxisMask;
-
- // Set kernel build options
- std::set<std::string> build_opts;
- build_opts.emplace("-DELEMENT_DATA_TYPE=" +
- get_cl_type_from_data_type(input->info()->data_type()));
- build_opts.emplace("-DDEPTH_OUT=" + support::cpp11::to_string(output->info()->dimension(2)));
-
- // Create kernel
- _kernel = static_cast<cl::Kernel>(
- CLKernelLibraryEx::get().create_kernel("strided_slice_ex", build_opts));
-
- // Configure kernel window
- Window win = calculate_max_window(*output->info(), Steps());
- ICLKernel::configure_internal(win);
-}
-
-void CLStridedSliceExKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
-
- _beginData->map(queue);
- _endData->map(queue);
- _stridesData->map(queue);
-
- std::vector<int32_t> starts;
- std::vector<int32_t> strides;
-
- for (uint32_t n = 0; n < _beginData->info()->tensor_shape().total_size(); ++n)
- {
- const TensorShape shape = _input->info()->tensor_shape();
- starts.emplace_back(
- StartForAxis(_beginMask, reinterpret_cast<int32_t *>(_beginData->buffer())[n],
- reinterpret_cast<int32_t *>(_stridesData->buffer())[n], shape, n));
-
- strides.emplace_back(reinterpret_cast<int32_t *>(_stridesData->buffer())[n]);
- }
-
- for (uint32_t n = _beginData->info()->tensor_shape().total_size(); n < 4; n++)
- {
- starts.emplace_back(0);
- strides.emplace_back(1);
- }
- // TODO: Apply shrinkAxisMask
-
- _beginData->unmap(queue);
- _stridesData->unmap(queue);
- _endData->unmap(queue);
-
- unsigned int idx = 2 * num_arguments_per_4D_tensor(); // Skip the input and output parameters
- const cl_int4 startsArg = {{
- static_cast<cl_int>(starts[0]), static_cast<cl_int>(starts[1]),
- static_cast<cl_int>(starts[2]), static_cast<cl_int>(starts[3]),
- }};
- _kernel.setArg<cl_int4>(idx++, startsArg);
-
- const cl_int4 stridesArg = {{
- static_cast<cl_int>(strides[0]), static_cast<cl_int>(strides[1]),
- static_cast<cl_int>(strides[2]), static_cast<cl_int>(strides[3]),
- }};
- _kernel.setArg<cl_int4>(idx++, stridesArg);
-
- Window slice_out = window.first_slice_window_4D().collapse(ICLKernel::window(), 2, 4);
-
- // Setup output slice
- Window slice_in(slice_out);
- slice_in.set(Window::DimX, Window::Dimension(0, 0, 0));
- slice_in.set(Window::DimY, Window::Dimension(0, 0, 0));
- slice_in.set(Window::DimZ, Window::Dimension(0, 0, 0));
- slice_in.set(3, Window::Dimension(0, 0, 0));
-
- do
- {
- unsigned int idx = 0;
- add_4D_tensor_argument(idx, _input, slice_in);
- add_4D_tensor_argument(idx, _output, slice_out);
- enqueue(queue, *this, slice_out);
- } while (window.slide_window_slice_4D(slice_in) && window.slide_window_slice_4D(slice_out));
-}
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/runtime/CL/functions/CLBatchToSpaceND.h"
-
-#include "arm_compute/core/CL/kernels/CLBatchToSpaceNDKernel.h"
-
-using namespace arm_compute;
-
-void CLBatchToSpaceND::configure(ICLTensor *input, ICLTensor *output, const int32_t *block_size)
-{
- auto k = arm_compute::support::cpp14::make_unique<CLBatchToSpaceNDKernel>();
- k->configure(input, output, block_size);
- _kernel = std::move(k);
-}
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/runtime/CL/functions/CLLogicalNot.h"
-#include "arm_compute/core/CL/kernels/CLLogicalNotKernel.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-
-using namespace arm_compute;
-
-void CLLogicalNot::configure(ICLTensor *input, ICLTensor *output)
-{
- auto k = arm_compute::support::cpp14::make_unique<CLLogicalNotKernel>();
- k->configure(input, output);
- _kernel = std::move(k);
-}
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/runtime/CL/functions/CLNormalizationLayerEx.h"
-
-#include "arm_compute/runtime/CL/CLScheduler.h"
-
-using namespace arm_compute;
-
-CLNormalizationLayerEx::CLNormalizationLayerEx() : _norm_kernel(), _border_handler() {}
-
-void CLNormalizationLayerEx::configure(ICLTensor *input, ICLTensor *output,
- const NormalizationLayerInfo &norm_info)
-{
- ARM_COMPUTE_ERROR_ON(input == nullptr);
-
- // Configure normalization kernel
- _norm_kernel.configure(input, output, norm_info);
-
- // Fill the border by 3 elements since we need vload4 in the IN_MAP normalization kernel
- _border_handler.configure(input, _norm_kernel.border_size(), BorderMode::CONSTANT, PixelValue(0));
-}
-
-Status CLNormalizationLayerEx::validate(const ITensorInfo *input, const ITensorInfo *output,
- const NormalizationLayerInfo &norm_info)
-{
- return CLNormalizationLayerExKernel::validate(input, output, norm_info);
-}
-
-void CLNormalizationLayerEx::run()
-{
- // Run border handler
- CLScheduler::get().enqueue(_border_handler, false);
-
- // Run normalization kernel
- CLScheduler::get().enqueue(_norm_kernel);
-}
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/runtime/CL/functions/CLPixelWiseDivision.h"
-
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLPixelWiseDivisionKernel.h"
-
-using namespace arm_compute;
-
-void CLPixelWiseDivision::configure(ICLTensor *input1, ICLTensor *input2, ICLTensor *output,
- float scale, ConvertPolicy overflow_policy,
- RoundingPolicy rounding_policy)
-{
- auto k = arm_compute::support::cpp14::make_unique<CLPixelWiseDivisionKernel>();
- k->configure(input1, input2, output, scale, overflow_policy, rounding_policy);
- _kernel = std::move(k);
-
- if (output->info()->dimension(0) > 1)
- {
- ICLTensor *broadcasted_info = (input1->info()->dimension(0) == 1) ? input1 : input2;
-
- if (broadcasted_info->info()->dimension(0) == 1)
- {
- _border_handler.configure(broadcasted_info, _kernel->border_size(), BorderMode::REPLICATE);
- }
- }
-}
-
-Status CLPixelWiseDivision::validate(const ITensorInfo *input1, const ITensorInfo *input2,
- const ITensorInfo *output, float scale,
- ConvertPolicy overflow_policy, RoundingPolicy rounding_policy)
-{
- return CLPixelWiseDivisionKernel::validate(input1, input2, output, scale, overflow_policy,
- rounding_policy);
-}
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/runtime/CL/functions/CLStridedSliceEx.h"
-
-#include "arm_compute/core/CL/kernels/CLStridedSliceExKernel.h"
-
-using namespace arm_compute;
-
-void CLStridedSliceEx::configure(const ICLTensor *input, ICLTensor *output, ICLTensor *beginData,
- ICLTensor *endData, ICLTensor *stridesData, int32_t beginMask,
- int32_t endMask, int32_t shrinkAxisMask)
-{
- auto k = arm_compute::support::cpp14::make_unique<CLStridedSliceExKernel>();
- k->configure(input, output, beginData, endData, stridesData, beginMask, endMask, shrinkAxisMask);
- _kernel = std::move(k);
-}