+++ /dev/null
-/*
-* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
-* Copyright (c) 2016-2018 ARM Limited.
-*
-* Licensed under the Apache License, Version 2.0 (the "License");
-* you may not use this file except in compliance with the License.
-* You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
-#ifndef __ARM_COMPUTE_CLPADLAYERKERNEL_H__
-#define __ARM_COMPUTE_CLPADLAYERKERNEL_H__
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to perform PAD operation */
-class CLPadLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLPadLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLPadLayerKernel(const CLPadLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLPadLayerKernel &operator=(const CLPadLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLPadLayerKernel(CLPadLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLPadLayerKernel &operator=(CLPadLayerKernel &&) = default;
- /** Default destructor */
- ~CLPadLayerKernel() = default;
- /** Initialise the kernel's input and output.
- *
- * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
- * @param[in] output Output tensor. Data types supported: U8/QASYMM8/S16/S32/F16/F32.
- * @param[in] pad_size Padding Size tensor. Data types supported : S32
- */
- void configure(const ICLTensor *input, ICLTensor *output, ICLTensor *pad_size);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input; /**< Source tensor */
- ICLTensor *_output; /**< Destination tensor */
- ICLTensor *_pad_size; /**< Padding Size tensor */
-};
-
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_CLPADLAYERKERNEL_H__ */
#include <arm_compute/runtime/CL/functions/CLHashtableLookup.h>
#include <arm_compute/runtime/CL/functions/CLLogicalNot.h>
#include <arm_compute/runtime/CL/functions/CLNeg.h>
-#include <arm_compute/runtime/CL/functions/CLPadLayerEx.h>
#include <arm_compute/runtime/CL/functions/CLPermuteEx.h>
#include <arm_compute/runtime/CL/functions/CLPixelWiseDivision.h>
#include <arm_compute/runtime/CL/functions/CLPReLU.h>
+++ /dev/null
-/*
-* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
-* Copyright (c) 2016-2018 ARM Limited.
-*
-* Licensed under the Apache License, Version 2.0 (the "License");
-* you may not use this file except in compliance with the License.
-* You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
-#ifndef __ARM_COMPUTE_CLPADLAYEREX_H__
-#define __ARM_COMPUTE_CLPADLAYEREX_H__
-
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Basic function to run @ref CLPadLayerKernel
- *
- * @note The tensor data type for the inputs must be U8/QASYMM8/S16/S32/F16/F32.
- * @note The function converts the input tensor to the tensor of the output tensor's type.
- */
-class CLPadLayerEx : public ICLSimpleFunction
-{
-public:
- /** Initialise the kernel's input and output.
- *
- * @param[in] input Input tensor. Data types supported:
- * U8/QASYMM8/S16/S32/F16/F32.
- * @param[out] output Output tensor. Data types supported:
- * U8/QASYMM8/S16/S32/F16/F32.
- * @param[in] pad_size Tensor for Padding values in NHWC format shape [n, 2],
- * where n is the rank of tensor . Data types supported: S32
- */
- void configure(ICLTensor *input, ICLTensor *output, ICLTensor *pad_size);
-};
-
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_CLPADLAYEREX_H__ */
{"gather_1d_out", "gather.cl"},
{"hashtable_lookup", "hashtable_lookup.cl"},
{"neg_tensor", "neg_tensor.cl"},
- {"pad", "pad.cl"},
{"permute_generic", "permute_ex.cl"},
{"pixelwise_mul_qasymm8", "pixelwise_mul_quantized.cl"},
{"prelu", "prelu.cl"},
#include "./cl_kernels/neg_tensor.clembed"
},
{
- "pad.cl",
-#include "./cl_kernels/pad.clembed"
- },
- {
"prelu.cl",
#include "./cl_kernels/prelu.clembed"
},
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016, 2017 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "helpers.h"
-
-#if defined(IW) && defined(IH) && defined(ID) && defined(IB) && defined(DEPTH_OUT) && \
- defined(ZERO_VALUE)
-/** Basic function to pad a tensor
- *
- * @attention Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float
- * @attention Output tensor depth should be given as a preprocessor argument using -DDEPTH_OUT=size.
- * e.g. -DDEPTH_OUT=16
- * @attention Input dimensions should be passed as a preprocessor argument using -DIW(width),
- * -DIH(height), -DID(depth) and -DIB(batch). e.g. -DIW = 4
- * @attention The value to be set by pad value using -DZERO_VALUE=value. e.g. -DZERO_VALUE=0
- *
- * @param[in] input_ptr Pointer to the source tensor. Supported data
- * types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
- * @param[in] input_stride_x Stride of the source tensor in X dimension (in
- * bytes)
- * @param[in] input_step_x input_stride_x * number of elements along X
- * processed per workitem(in bytes)
- * @param[in] input_stride_y Stride of the source tensor in Y dimension (in
- * bytes)
- * @param[in] input_step_y input_stride_y * number of elements along Y
- * processed per workitem(in bytes)
- * @param[in] input_stride_z Stride of the source tensor in Z dimension (in
- * bytes)
- * @param[in] input_step_z input_stride_z * number of elements along Z
- * processed per workitem(in bytes)
- * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source
- * tensor
- * @param[out] output_ptr Pointer to the destination tensor. Supported
- * data types: same as @p input_ptr
- * @param[in] output_stride_x Stride of the destination tensor in X dimension
- * (in bytes)
- * @param[in] output_step_x output_stride_x * number of elements along X
- * processed per workitem(in bytes)
- * @param[in] output_stride_y Stride of the destination tensor in Y dimension
- * (in bytes)
- * @param[in] output_step_y output_stride_y * number of elements along Y
- * processed per workitem(in bytes)
- * @param[in] output_stride_z Stride of the destination tensor in Z dimension
- * (in bytes)
- * @param[in] output_step_z output_stride_z * number of elements along Z
- * processed per workitem(in bytes)
- * @param[in] output_stride_w Stride of the destination tensor in W dimension
- * (in bytes)
- * @param[in] output_step_w output_stride_w * number of elements along W
- * processed per workitem(in bytes)
- * @param[in] output_offset_first_element_in_bytes The offset of the first element in the
- * destination tensor
- * @param[in] pad_values Padding values for each of the dimensions. Only
- * pad values for Up(for batch), Top(for height),
- * Left(for width) and Front(for depth) are
- * required. Supported data type: S32
- */
-
-__kernel void pad(TENSOR4D_DECLARATION(input), TENSOR4D_DECLARATION(output), const int4 pad_values)
-{
- Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input, 0);
- Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT(output, DEPTH_OUT);
-
- int index[4] = {0};
-
- index[0] = get_global_id(0); // W
- index[1] = get_global_id(1); // H
- index[2] = get_global_id(2) % DEPTH_OUT; // C
- index[3] = get_global_id(2) / DEPTH_OUT; // N
-
- if (index[0] < pad_values.x || index[0] >= (IW + pad_values.x) || index[1] < pad_values.y ||
- index[1] >= (IH + pad_values.y) || index[2] < pad_values.z ||
- index[2] >= (ID + pad_values.z) || index[3] < pad_values.w || index[3] >= (IB + pad_values.w))
- {
- *((__global DATA_TYPE *)out.ptr) = (DATA_TYPE)ZERO_VALUE;
- }
- else
- {
- *((__global DATA_TYPE *)out.ptr) = *(
- (__global DATA_TYPE *)tensor4D_offset(&in, index[0] - pad_values.x, index[1] - pad_values.y,
- index[2] - pad_values.z, index[3] - pad_values.w));
- }
-}
-
-#endif // if defined(IW) && defined(IH) && defined(ID) && defined(IB) && defined(DEPTH_OUT) &&
- // defined(ZERO_VALUE)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/core/CL/kernels/CLPadLayerKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibraryEx.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-
-using namespace arm_compute;
-
-namespace
-{
-Status validate_arguments(const ITensorInfo *input_info, const ITensorInfo *output_info,
- const ITensorInfo *pad_size_info)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input_info, 1, DataType::U8, DataType::QASYMM8,
- DataType::S16, DataType::S32, DataType::F16,
- DataType::F32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output_info, 1, DataType::U8, DataType::QASYMM8,
- DataType::S16, DataType::S32, DataType::F16,
- DataType::F32);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(pad_size_info, 1, DataType::S32);
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(
- !(input_info->num_dimensions() > 0 && input_info->num_dimensions() <= 4),
- "Pad kernel supports upto 4-D input tensor");
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(input_info->num_dimensions() != output_info->num_dimensions(),
- "Output and input should have same number of dimensions");
-
- if (input_info->data_type() == DataType::QASYMM8)
- {
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(input_info->quantization_info() !=
- output_info->quantization_info(),
- "The input and output quantization info are different!");
- }
-
- return Status{};
-}
-
-} // namespace
-
-CLPadLayerKernel::CLPadLayerKernel() : _input(nullptr), _output(nullptr), _pad_size(nullptr) {}
-
-void CLPadLayerKernel::configure(const ICLTensor *input, ICLTensor *output, ICLTensor *pad_size)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output, pad_size);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), pad_size->info()));
-
- _input = input;
- _output = output;
- _pad_size = pad_size;
-
- // Set kernel build options
- std::set<std::string> build_opts;
- build_opts.emplace("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
- build_opts.emplace("-DDEPTH_OUT=" + support::cpp11::to_string(output->info()->dimension(2)));
- build_opts.emplace("-DIW=" + support::cpp11::to_string(input->info()->dimension(0)));
- build_opts.emplace("-DIH=" + support::cpp11::to_string(input->info()->dimension(1)));
- build_opts.emplace("-DID=" + support::cpp11::to_string(input->info()->dimension(2)));
- build_opts.emplace("-DIB=" + support::cpp11::to_string(input->info()->dimension(3)));
- if (input->info()->data_type() == DataType::QASYMM8)
- {
- build_opts.emplace("-DZERO_VALUE=" +
- support::cpp11::to_string(input->info()->quantization_info().offset));
- }
- else
- {
- build_opts.emplace("-DZERO_VALUE=" + support::cpp11::to_string(0));
- }
-
- // Create kernel
- _kernel = static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel("pad", build_opts));
-
- // Configure kernel window
- Window win = calculate_max_window(*output->info(), Steps());
-
- Coordinates coord;
- coord.set_num_dimensions(output->info()->num_dimensions());
- output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape()));
-
- ICLKernel::configure_internal(win);
-}
-
-void CLPadLayerKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(ICLKernel::window(), window);
-
- _pad_size->map(queue);
-
- auto pad_width_left = *reinterpret_cast<const int32_t *>(_pad_size->ptr_to_element({0, 0}));
- auto pad_height_top = *reinterpret_cast<const int32_t *>(_pad_size->ptr_to_element({0, 1}));
- auto pad_depth_front = *reinterpret_cast<const int32_t *>(_pad_size->ptr_to_element({0, 2}));
- auto pad_batch_up = *reinterpret_cast<const int32_t *>(_pad_size->ptr_to_element({0, 3}));
-
- _pad_size->unmap(queue);
-
- // Pad_values which needs to be passed
- const cl_int4 paddingValues = {
- {static_cast<cl_int>(pad_width_left), static_cast<cl_int>(pad_height_top),
- static_cast<cl_int>(pad_depth_front), static_cast<cl_int>(pad_batch_up)}};
-
- Window slice_out = window.first_slice_window_4D().collapse(ICLKernel::window(), 2, 4);
-
- // Setup output slice
- Window slice_in(slice_out);
- slice_in.set(Window::DimX, Window::Dimension(0, 0, 0));
- slice_in.set(Window::DimY, Window::Dimension(0, 0, 0));
- slice_in.set(Window::DimZ, Window::Dimension(0, 0, 0));
- slice_in.set(3, Window::Dimension(0, 0, 0));
-
- do
- {
- unsigned int idx = 0;
- add_4D_tensor_argument(idx, _input, slice_in);
- add_4D_tensor_argument(idx, _output, slice_out);
- _kernel.setArg<cl_int4>(idx++, paddingValues);
- enqueue(queue, *this, slice_out, lws_hint());
- } while (window.slide_window_slice_4D(slice_out) && window.slide_window_slice_4D(slice_in));
-}
+++ /dev/null
-/*
-* Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
-* Copyright (c) 2016-2018 ARM Limited.
-*
-* Licensed under the Apache License, Version 2.0 (the "License");
-* you may not use this file except in compliance with the License.
-* You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
-#include "arm_compute/runtime/CL/functions/CLPadLayerEx.h"
-
-#include "arm_compute/core/CL/kernels/CLPadLayerKernel.h"
-
-using namespace arm_compute;
-
-void CLPadLayerEx::configure(ICLTensor *input, ICLTensor *output, ICLTensor *pad_size)
-{
- auto k = arm_compute::support::cpp14::make_unique<CLPadLayerKernel>();
- k->configure(input, output, pad_size);
- _kernel = std::move(k);
-}