+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __ARM_COMPUTE_CLPERMUTEEXKERNEL_H__
-#define __ARM_COMPUTE_CLPERMUTEEXKERNEL_H__
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** OpenCL kernel to perform tensor permutation.
- *
- * Permutes given a permutation vector
- */
-class CLPermuteExKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLPermuteExKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLPermuteExKernel(const CLPermuteExKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLPermuteExKernel &operator=(const CLPermuteExKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLPermuteExKernel(CLPermuteExKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLPermuteExKernel &operator=(CLPermuteExKernel &&) = default;
- /** Set the input and output of the kernel.
- *
- * @param[in] input The input tensor to permute. Data types supported:
- * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
- * @param[in] output The output tensor. Data types supported: Same as @p input
- * @param[in] perm Permutation vector
- */
- void configure(const ICLTensor *input, ICLTensor *output, const PermutationVector &perm);
- /** Static function to check if given info will lead to a valid configuration of @ref
- * CLPermuteKernel
- *
- * @param[in] input First tensor input info. Data types supported:
- * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
- * @param[in] output Output tensor info. Data types supported: same as @p input.
- * @param[in] perm Permutation vector
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output,
- const PermutationVector &perm);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
- PermutationVector _perm;
-};
-} // arm_compute
-#endif /*__ARM_COMPUTE_CLPERMUTEEXKERNEL_H__ */
#include <arm_compute/runtime/CL/functions/CLHashtableLookup.h>
#include <arm_compute/runtime/CL/functions/CLLogicalNot.h>
#include <arm_compute/runtime/CL/functions/CLNeg.h>
-#include <arm_compute/runtime/CL/functions/CLPermuteEx.h>
#include <arm_compute/runtime/CL/functions/CLPixelWiseDivision.h>
#include <arm_compute/runtime/CL/functions/CLPReLU.h>
#include <arm_compute/runtime/CL/functions/CLReduceOperation.h>
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __ARM_COMPUTE_CLPERMUTEEX_H__
-#define __ARM_COMPUTE_CLPERMUTEEX_H__
-
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Basic function to execute an @ref CLPermuteKernel. */
-class CLPermuteEx : public ICLSimpleFunction
-{
-public:
- /** Set the input and output tensors.
- *
- * @param[in] input The input tensor to permute. Data types supported:
- * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
- * @param[in] output The output tensor. Data types supported: Same as @p input
- * @param[in] perm Permutation vector
- */
- void configure(const ICLTensor *input, ICLTensor *output, const PermutationVector &perm);
- /** Static function to check if given info will lead to a valid configuration of @ref CLPermute.
- *
- * @param[in] input First tensor input info. Data types supported:
- * U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
- * @param[in] output Output tensor info. Data types supported: same as @p input.
- * @param[in] perm Permutation vector
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output,
- const PermutationVector &perm);
-};
-}
-#endif /*__ARM_COMPUTE_CLPERMUTEEX_H__ */
"topkv2_quicksort.cl",
#include "./cl_kernels/topkv2_quicksort.clembed"
},
- {
- "permute_ex.cl",
-#include "./cl_kernels/permute_ex.clembed"
- },
#endif /* EMBEDDED_KERNELS */
};
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "helpers.h"
-
-#if defined(DATA_TYPE) && defined(DEPTH_IN) && defined(P1) && defined(P2) && defined(P3) && \
- defined(P4)
-/** Perform a Generic permute operation on an input tensor of Shape DCHW.
- *
- * @attention Data type can be passed using the -DDATA_TYPE compile flag, e.g. -DDATA_TYPE=float
- * @attention Input tensor depth should be given as a preprocessor argument using -DDEPTH_IN=size.
- e.g. -DDEPTH_IN=16
- * @attention Permutation vector is passed as a preprocessor arguement using -DP1, -DP2, -DP3 and
- -DP4=int, e.g. -DP1=2
- *
- * @param[in] input_ptr Pointer to the source image. Supported data
- * types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
- * @param[in] input_stride_x Stride of the source image in X dimension (in
- * bytes)
- * @param[in] input_step_x input_stride_x * number of elements along X
- * processed per workitem(in bytes)
- * @param[in] input_stride_y Stride of the source image in Y dimension (in
- * bytes)
- * @param[in] input_step_y input_stride_y * number of elements along Y
- * processed per workitem(in bytes)
- * @param[in] input_stride_z Stride of the source tensor in Z dimension (in
- * bytes)
- * @param[in] input_step_z input_stride_z * number of elements along Z
- * processed per workitem(in bytes)
- * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source
- * image
- * @param[out] output_ptr Pointer to the destination image. Supported data
- * types: same as @p input_ptr
- * @param[in] output_stride_x Stride of the destination image in X dimension
- * (in bytes)
- * @param[in] output_step_x output_stride_x * number of elements along X
- * processed per workitem(in bytes)
- * @param[in] output_stride_y Stride of the destination image in Y dimension
- * (in bytes)
- * @param[in] output_step_y output_stride_y * number of elements along Y
- * processed per workitem(in bytes)
- * @param[in] output_stride_z Stride of the source tensor in Z dimension (in
- * bytes)
- * @param[in] output_step_z output_stride_z * number of elements along Z
- * processed per workitem(in bytes)
- * @param[in] output_stride_w Stride of the source tensor in W dimension (in
- * bytes)
- * @param[in] output_step_w output_stride_w * number of elements along W
- * processed per workitem(in bytes)
- * @param[in] output_offset_first_element_in_bytes The offset of the first element in the
- * destination image
- */
-__kernel void permute_generic(TENSOR4D_DECLARATION(input), TENSOR4D_DECLARATION(output))
-{
- Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT(input, DEPTH_IN);
- Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(output, 0);
-
- // WHCN format
- int in_index[] = {
- get_global_id(0), get_global_id(1), get_global_id(2) % DEPTH_IN, get_global_id(2) / DEPTH_IN,
- };
-
- // New locations based on Permuted index calc as out_index[index] = in_index[new_index]
- *((__global DATA_TYPE *)tensor4D_offset(&out, in_index[P1], in_index[P2], in_index[P3],
- in_index[P4])) = *((__global DATA_TYPE *)in.ptr);
-}
-#endif // defined(DATA_TYPE) && defined(DEPTH_IN) && defined(P1) && defined(P2) && defined(P3) &&
- // defined(P4)
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/core/CL/kernels/CLPermuteExKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibraryEx.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-
-using namespace arm_compute;
-
-namespace
-{
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output,
- const PermutationVector &perm)
-{
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(
- input, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::U16, DataType::S16,
- DataType::U32, DataType::S32, DataType::F16, DataType::F32);
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->num_dimensions() < 1 || input->num_dimensions() > 4,
- "Permutation upto 4-D input tensor is supported");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(perm.num_dimensions() < 1 || perm.num_dimensions() > 4,
- "Permutation vector size should be less than or equal to 4");
- for (unsigned int i = 0; i < perm.num_dimensions(); ++i)
- {
- auto &p = perm[i];
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(p >= perm.num_dimensions(),
- "Permutation vector has invalid values");
- }
-
- // Validate configured output
- if (output->total_size() != 0)
- {
- const TensorShape output_shape =
- misc::shape_calculator::compute_permutation_output_shape(*input, perm);
-
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), output_shape);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- }
- return Status{};
-}
-} // namespace
-
-CLPermuteExKernel::CLPermuteExKernel() : _input(nullptr), _output(nullptr), _perm() {}
-
-void CLPermuteExKernel::configure(const ICLTensor *input, ICLTensor *output,
- const PermutationVector &perm)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), perm));
-
- _input = input;
- _output = output;
- _perm = perm;
-
- const TensorShape output_shape =
- misc::shape_calculator::compute_permutation_output_shape(*input->info(), perm);
- // Output auto inizialitation if not yet initialized
- auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(output_shape));
-
- // Create kernel
- std::set<std::string> build_opts;
-
- build_opts.emplace("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()));
- build_opts.emplace("-DDEPTH_IN=" + support::cpp11::to_string(input->info()->dimension(2)));
-
- // New positions of batch(D), height(H), width(w) and channel(C) based on permutation vector
- build_opts.emplace("-DP1=" +
- support::cpp11::to_string((perm.num_dimensions() >= 1) ? perm[0] : 0));
- build_opts.emplace("-DP2=" +
- support::cpp11::to_string((perm.num_dimensions() >= 2) ? perm[1] : 1));
- build_opts.emplace("-DP3=" +
- support::cpp11::to_string((perm.num_dimensions() >= 3) ? perm[2] : 2));
- build_opts.emplace("-DP4=" +
- support::cpp11::to_string((perm.num_dimensions() >= 4) ? perm[3] : 3));
-
- _kernel = static_cast<cl::Kernel>(
- CLKernelLibraryEx::get().create_kernel("permute_generic", build_opts));
-
- // Configure kernel window
- Window win = calculate_max_window(*input->info(), Steps());
-
- // The CLPermute doesn't need padding so update_window_and_padding() can be skipped
- Coordinates coord;
- coord.set_num_dimensions(output->info()->num_dimensions());
- output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape()));
-
- ICLKernel::configure_internal(win);
-}
-
-Status CLPermuteExKernel::validate(const ITensorInfo *input, const ITensorInfo *output,
- const PermutationVector &perm)
-{
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, perm));
-
- return Status{};
-}
-
-void CLPermuteExKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(ICLKernel::window(), window);
-
- Window slice_in = window.first_slice_window_4D().collapse(ICLKernel::window(), 2, 4);
-
- // Setup output slice
- Window slice_out(slice_in);
- slice_out.set(Window::DimX, Window::Dimension(0, 0, 0));
- slice_out.set(Window::DimY, Window::Dimension(0, 0, 0));
- slice_out.set(Window::DimZ, Window::Dimension(0, 0, 0));
- slice_out.set(3, Window::Dimension(0, 0, 0));
-
- do
- {
- unsigned int idx = 0;
- add_4D_tensor_argument(idx, _input, slice_in);
- add_4D_tensor_argument(idx, _output, slice_out);
- enqueue(queue, *this, slice_in);
- } while (window.slide_window_slice_4D(slice_in) && window.slide_window_slice_4D(slice_out));
-}
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/runtime/CL/functions/CLPermuteEx.h"
-
-#include "arm_compute/core/CL/kernels/CLPermuteExKernel.h"
-
-using namespace arm_compute;
-
-void CLPermuteEx::configure(const ICLTensor *input, ICLTensor *output,
- const PermutationVector &perm)
-{
- auto k = arm_compute::support::cpp14::make_unique<CLPermuteExKernel>();
- k->configure(input, output, perm);
- _kernel = std::move(k);
-}
-
-Status CLPermuteEx::validate(const ITensorInfo *input, const ITensorInfo *output,
- const PermutationVector &perm)
-{
- ARM_COMPUTE_RETURN_ON_ERROR(CLPermuteExKernel::validate(input, output, perm));
- return Status{};
-}
if (::internal::arm_compute::isGpuMode())
{
- auto fn = nnfw::cpp14::make_unique<::arm_compute::CLPermuteEx>();
+ auto fn = nnfw::cpp14::make_unique<::arm_compute::CLPermute>();
fn->configure(CAST_CL(ifm_alloc), CAST_CL(ofm_alloc),
getARMComputePermutationVector(param.rank, param.pv));