This commit removes CL Kernels of Reduce Operation that is no longer needed.
Signed-off-by: jiseob.jang <jiseob.jang@samsung.com>
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file CLReduceMaxKernel.h
- * @brief This file defines CLReduceMaxKernel
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __ARM_COMPUTE_CLREDUCEMAXKERNEL_H__
-#define __ARM_COMPUTE_CLREDUCEMAXKERNEL_H__
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/**
- * @brief Class to define interface for the reduce max kernel.
- */
-class CLReduceMaxKernel : public ICLKernel
-{
-public:
- /**
- * @brief Default constructor.
- */
- CLReduceMaxKernel();
- /**
- * @brief Prevent instances of this class from being copied (As this class contains pointers).
- * @param [in] copiedInstance Const reference of CLReduceMaxKernel to be copied
- */
- CLReduceMaxKernel(const CLReduceMaxKernel &) = delete;
- /**
- * @brief Prevent instances of this class from being copied (As this class contains pointers).
- * @param [in] copiedInstance Const reference of CLReduceMaxKernel to be copied
- * @return Reference of this instance
- */
- CLReduceMaxKernel &operator=(const CLReduceMaxKernel &) = delete;
- /**
- * @brief Allow instances of this class to be moved
- * @param [in] movedInstance Rvalue reference of CLReduceMaxKernel to be moved
- */
- CLReduceMaxKernel(CLReduceMaxKernel &&) = default;
- /**
- * @brief Allow instances of this class to be moved
- * @param [in] movedInstance Rvalue reference of CLReduceMaxKernel to be moved
- * @return Reference of this instance
- */
- CLReduceMaxKernel &operator=(CLReduceMaxKernel &&) = default;
- /**
- * @brief Initialise the kernel's input, output and border mode.
- * @param[in] input An input tensor. Data types supported: U8/QASYMM8/S32/F32.
- * @param[out] output The output tensor, Data types supported: same as @p input.
- * @param[in] reduce_axis Axis to reduce
- * return N/A
- */
- void configure(const ICLTensor *input, ICLTensor *output, const uint32_t reduce_axis);
- /**
- * @brief Static function to check if given info will lead to a valid configuration of @ref
- * CLReduceMaxKernel
- * @param[in] input An input tensor info. Data types supported: U8/QASYMM8/S32/F32.
- * @param[in] output The output tensor info, Data types supported: same as @p input1.
- * @param[in] reduce_axis Axis to reduce
- * Note: U8 (QS8, QS16) requires both inputs to be U8 (QS8, QS16).
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output,
- const uint32_t reduce_axis);
-
- /*
- * @brief Run CLReduceMaxKernel op
- * @param[in] window Window to be used for in_slice
- * @param[in] queue cl::CommandQueue
- * @return N/A
- */
- void run(const Window &window, cl::CommandQueue &queue) override;
- /*
- * @brief Run CLReduceMaxKernel op on CPU
- * @param[in] queue cl::CommandQueue
- * @return N/A
- */
- void run_on_cpu(cl::CommandQueue &queue);
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
- uint32_t _reduce_axis;
-};
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_CLREDUCEMAXKERNEL_H__ */
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __ARM_COMPUTE_CLREDUCESUMKERNEL_H__
-#define __ARM_COMPUTE_CLREDUCESUMKERNEL_H__
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the reduction operation kernel */
-class CLReduceSumKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLReduceSumKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLReduceSumKernel(const CLReduceSumKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLReduceSumKernel &operator=(const CLReduceSumKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLReduceSumKernel(CLReduceSumKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLReduceSumKernel &operator=(CLReduceSumKernel &&) = default;
- /** Default destructor */
- ~CLReduceSumKernel() = default;
-
- /** Set the input and output tensors.
- *
- * @param[in] input Source tensor. Data types supported: F32. Data layouts supported: NCHW.
- * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input.
- * Output will have the same number of dimensions as input.
- * @param[in] axis Axis along which to reduce. Supported reduction axis : 0, 1
- */
- void configure(const ICLTensor *input, ICLTensor *output, std::vector<uint32_t> axis);
-
- /** Static function to check if given info will lead to a valid configuration of @ref
- * CLReduceSumKernel.
- *
- * @param[in] input Source tensor info. Data types supported: F32. Data layouts supported: NCHW.
- * @param[in] output Destination tensor info. Data types and data layouts supported: Same as @p
- * input.
- * Output will have the same number of dimensions as input.
- * @param[in] axis Axis along which to reduce. Supported reduction axis : 0, 1
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output,
- std::vector<uint32_t> axis);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
- std::vector<uint32_t> _axis;
-};
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_CLREDUCESUMKERNEL_H__ */
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file CLReductionMeanKernel.h
- * @brief This file defines CLReductionMeanKernel class
- * @ingroup COM_AI_RUNTIME
- */
-
-#ifndef __ARM_COMPUTE_CLREDUCTIONMEANKERNEL_H__
-#define __ARM_COMPUTE_CLREDUCTIONMEANKERNEL_H__
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/**
- * @brief Class to define interface for the reduction operation kernel
- */
-class CLReductionMeanKernel : public ICLKernel
-{
-public:
- /**
- * @brief Default constructor
- */
- CLReductionMeanKernel();
- /**
- * @brief Prevent instances of this class from being copied (As this class contains pointers)
- */
- CLReductionMeanKernel(const CLReductionMeanKernel &) = delete;
- /**
- * @brief Prevent instances of this class from being copied (As this class contains pointers)
- */
- CLReductionMeanKernel &operator=(const CLReductionMeanKernel &) = delete;
- /**
- * @brief Allow instances of this class to be moved
- */
- CLReductionMeanKernel(CLReductionMeanKernel &&) = default;
- /**
- * @brief Allow instances of this class to be moved
- */
- CLReductionMeanKernel &operator=(CLReductionMeanKernel &&) = default;
- /**
- * @brief Default destructor
- */
- ~CLReductionMeanKernel() = default;
-
- /**
- * @brief Set the input and output tensors.
- * @param[in] input Source tensor. Data types supported: F32. Data layouts supported: NCHW.
- * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input.
- * Output will have the same number of dimensions as input.
- * @param[in] axis Axis along which to reduce. Supported reduction axis : 0, 1
- * @return N/A
- */
- void configure(const ICLTensor *input, ICLTensor *output, std::vector<uint32_t> axis);
-
- /**
- * @brief Static function to check if given info will lead to a valid configuration of @ref
- * CLReductionMeanKernel.
- * @param[in] input Source tensor info. Data types supported: F32. Data layouts supported: NCHW.
- * @param[in] output Destination tensor info. Data types and data layouts supported: Same as @p
- * input.
- * Output will have the same number of dimensions as input.
- * @param[in] axis Axis along which to reduce. Supported reduction axis : 0, 1
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output,
- std::vector<uint32_t> axis);
-
- /*
- * @brief Run CLReductionMeanKernel op
- * @param[in] window Window to be used for in_slice
- * @param[in] queue CLQueue
- * @return N/A
- */
- void run(const Window &window, cl::CommandQueue &queue) override;
- /*
- * @brief Get border size as BorderSize
- * @return border size as BorderSize
- */
- BorderSize border_size() const override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
- std::vector<uint32_t> _reduction_axis;
- BorderSize _border_size;
-};
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_CLREDUCTIONMEANKERNEL_H__ */
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file CLReduceMax.h
- * @ingroup COM_AI_RUNTIME
- * @brief This file contains arm_compute::CLReduceMax class
- */
-
-#ifndef __ARM_COMPUTE_CLREDUCE_MAX_H__
-#define __ARM_COMPUTE_CLREDUCE_MAX_H__
-
-#include "arm_compute/core/CL/kernels/CLReduceMaxKernel.h"
-#include "arm_compute/runtime/CL/CLArray.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/IFunction.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/**
- * @brief Class to execute CLReduceMax operation
- */
-class CLReduceMax : public IFunction
-{
-public:
- /**
- * @brief Construct a new CLReduceMax object
- */
- CLReduceMax();
-
- /**
- * @brief Prevent instances of this class from being copied (As this class contains pointers)
- */
- CLReduceMax(const CLReduceMax &) = delete;
-
- /**
- * @brief Prevent instances of this class from being copied (As this class contains pointers)
- */
- CLReduceMax &operator=(const CLReduceMax &) = delete;
-
- /**
- * @brief Construct a new CLReduceMax object by using copy constructor
- * @param[in] CLReduceMax object to move
- */
- CLReduceMax(CLReduceMax &&) = default;
-
- /**
- * @brief Assign a CLReduceMax object.
- * @param[in] CLReduceMax object to assign. This object will be moved.
- */
- CLReduceMax &operator=(CLReduceMax &&) = default;
-
- /**
- * @brief Initialise the kernel's inputs and outputs.
- * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S32/F32.
- * @param[in] axis Axis to reduce. It must be sorted and no duplicates.
- * @param[out] output The result of ReduceMax operation. Data types supported: same as @p
- * input.
- * @return N/A
- */
- void configure(ICLTensor *input, std::vector<uint32_t> reduce_axis, ICLTensor *output);
-
- /**
- * @brief Static function to check if given info will lead to a valid configuration
- * @param[in] input Input tensor. Data types supported: U8/QASYMM8/S32/F32.
- * @param[in] axis Axis to reduce
- * @param[out] output The result of ReduceMax operation. Data types supported: same as @p
- * input.
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const std::vector<uint32_t> &reduce_axis,
- const ITensorInfo *output);
-
- /**
- * @brief Run the kernels contained in the function
- * This operation works on CPU on GPU depending on the value of REDUCE_MAX_RUN_ON_CPU macro
- * in CLReduceMax.cpp.
- * If REDUCE_MAX_RUN_ON_CPU == 1, CPU runs this operation.
- * Otherwise GPU runs this operation.
- * @return N/A
- */
- void run() override;
-
-private:
- void run_on_cpu();
-
- ICLTensor *_input;
- ICLTensor *_output;
- std::vector<uint32_t> _reduce_axis;
-
- std::unique_ptr<CLTensor[]> _interm_tensors{nullptr};
- std::unique_ptr<CLReduceMaxKernel[]> _reduction_kernels{nullptr};
- size_t _num_of_kernels;
-};
-}
-#endif /*__ARM_COMPUTE_CLREDUCE_MAX_H__ */
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __ARM_COMPUTE_CLREDUCESUM_H__
-#define __ARM_COMPUTE_CLREDUCESUM_H__
-
-#include "arm_compute/core/CL/kernels/CLReduceSumKernel.h"
-#include "arm_compute/runtime/IFunction.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Perform reduction operation.
- */
-class CLReduceSum : public IFunction
-{
-public:
- /** Default Constructor.
- */
- CLReduceSum();
-
- /** Set the input and output tensors.
- *
- * @param[in] input Source tensor. Data types supported: F32. Data layouts supported: NCHW.
- * @param[out] output Destination tensor. Data types and data layouts supported: Same as @p input.
- * @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1
- */
- void configure(ICLTensor *input, ICLTensor *output, std::vector<uint32_t> axis);
-
- /** Static function to check if given info will lead to a valid configuration of @ref
- * CLReduceSum.
- *
- * @param[in] input Source tensor info. Data types supported: F32. Data layouts supported: NCHW.
- * @param[in] output Destination tensor info. Data types and data layouts supported: Same as @p
- * input.
- * @param[in] axis Axis along which to reduce. Supported reduction axis : 0,1
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output,
- std::vector<uint32_t> axis);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- void run_on_cpu();
-
- ICLTensor *_input;
- ICLTensor *_output;
- std::vector<uint32_t> _axis;
-
- std::unique_ptr<ICLKernel> _kernel;
-};
-}
-#endif /*__ARM_COMPUTE_CLREDUCESUM_H__ */
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file CLReductionMean.h
- * @ingroup COM_AI_RUNTIME
- * @brief This file contains arm_compute::CLReductionMean class
- */
-
-#ifndef __ARM_COMPUTE_CLREDUCTIONMEAN_H__
-#define __ARM_COMPUTE_CLREDUCTIONMEAN_H__
-
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
-#include "arm_compute/core/CL/kernels/CLReductionMeanKernel.h"
-#include "arm_compute/runtime/IFunction.h"
-
-#include <vector>
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/**
- * @brief Class to perform ReductionMean operation
- */
-class CLReductionMean : public IFunction
-{
-public:
- /**
- * @brief Construct a new ReductionMean object
- */
- CLReductionMean();
-
- /**
- * @brief Set the input and output tensors.
- * @param[in] input Source tensor. Data types supported: F32. Data layouts supported: NCHW
- * @param[out] output Destination tensor. Data types and data layouts supported: Same as
- * @p input.
- * @param[in] axis Axis along which to reduce. Supported reduction axis : 0, 1
- * @return N/A
- */
- void configure(ICLTensor *input, ICLTensor *output, std::vector<uint32_t> axis);
-
- /**
- * @brief Static function to check if given info will lead to a valid configuration of @ref
- * CLReductionMean.
- * @param[in] input Source tensor info. Data types supported: F32. Data layouts supported: NCHW
- * @param[in] output Destination tensor info. Data types and data layouts supported: Same as @p
- * input.
- * @param[in] axis Axis along which to reduce. Supported reduction axis : 0, 1
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output,
- std::vector<uint32_t> axis);
-
- /**
- * @brief Run the OpenCL kernel for this operation
- * @return N/A
- */
- void run() override;
-
-private:
- CLReductionMeanKernel _reduction_mean_kernel;
- CLFillBorderKernel _fill_border_kernel;
-};
-}
-#endif /*__ARM_COMPUTE_CLREDUCTIONMEAN_H__ */
{"reduce_max", "reduce_operation.cl"},
{"reduce_mean", "reduce_operation.cl"},
{"reduce_sum", "reduce_operation.cl"},
- {"reduction_operation", "reduction_operation.cl"},
- {"reduction_mean", "reduction_mean.cl"},
{"remap_nearest_neighbour", "remap.cl"},
{"remap_bilinear", "remap.cl"},
{"reshape_layer", "reshape_layer.cl"},
#include "./cl_kernels/reduce_operation.clembed"
},
{
- "reduce_max.cl",
-#include "./cl_kernels/reduce_max.clembed"
- },
- {
- "reduce_sum.cl",
-#include "./cl_kernels/reduce_sum.clembed"
- },
- {
- "reduction_mean.cl",
-#include "./cl_kernels/reduction_mean.clembed"
- },
- {
"space_to_depth.cl",
#include "./cl_kernels/space_to_depth.clembed"
},
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/core/CL/kernels/CLReduceMaxKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibraryEx.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-
-using namespace arm_compute;
-
-namespace
-{
-const TensorShape inferOutputShape(const TensorShape &input_shape, const uint32_t reduce_axis)
-{
- TensorShape out_shape{input_shape};
-
- out_shape.set(reduce_axis, 1);
-
- return out_shape;
-}
-} // namespace
-
-namespace
-{
-constexpr unsigned int num_elems_processed_per_iteration = 16;
-
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output,
- const uint32_t reduce_axis)
-{
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::S32, DataType::F32,
- DataType::U8, DataType::QASYMM8);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
-
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->tensor_shape().total_size() == 0,
- "Inputs are not broadcast compatible");
-
- const TensorShape output_shape = inferOutputShape(input->tensor_shape(), reduce_axis);
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(output_shape.total_size() != output->tensor_shape().total_size(),
- "output shape's size does not match reduce_axis");
-
- const auto num_dimensions = input->tensor_shape().num_dimensions();
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(
- reduce_axis >= 0 && reduce_axis < num_dimensions,
- "reduce_axis must be greater than or equal to 0 and less than (input's rank).");
- return Status{};
-}
-
-} // namespace
-
-CLReduceMaxKernel::CLReduceMaxKernel() : _input(nullptr), _output(nullptr), _reduce_axis() {}
-
-void CLReduceMaxKernel::configure(const ICLTensor *input, ICLTensor *output,
- const uint32_t reduce_axis)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), reduce_axis));
-
- _input = input;
- _output = output;
- _reduce_axis = reduce_axis;
-
- std::unique_ptr<ITensorInfo> output_info = output->info()->clone();
- output_info->set_tensor_shape(inferOutputShape(input->info()->tensor_shape(), reduce_axis));
-
- // Construct kernel name
- std::string kernel_name = "reduce_max";
-
- // Set kernel build options
- std::set<std::string> build_opts;
- build_opts.emplace("-DDATA_TYPE=" + get_cl_type_from_data_type(output_info->data_type()));
- build_opts.emplace("-DDEPTH_OUT=" + support::cpp11::to_string(output_info->dimension(2)));
-
- // Create kernel
- _kernel =
- static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts));
-
- // Configure kernel window
- Window win = calculate_max_window(*output_info, Steps());
-
- Coordinates coord;
- coord.set_num_dimensions(output_info->num_dimensions());
- output->info()->set_valid_region(ValidRegion(coord, output_info->tensor_shape()));
-
- ICLKernel::configure(win);
-}
-
-Status CLReduceMaxKernel::validate(const ITensorInfo *input, const ITensorInfo *output,
- const uint32_t reduce_axis)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, reduce_axis));
-
- return Status{};
-}
-
-void CLReduceMaxKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
-
- const TensorShape &shape_in = _input->info()->tensor_shape();
-
- unsigned int idx = 2 * num_arguments_per_4D_tensor(); // Skip the input and output parameters
-
- // Initialize as -1 that means the axis is not reduced
- cl_int4 stops = {{
- static_cast<cl_int>(-1), static_cast<cl_int>(-1), static_cast<cl_int>(-1),
- static_cast<cl_int>(-1),
- }};
-
- stops.s[_reduce_axis] = static_cast<cl_int>(shape_in[_reduce_axis] - 1);
-
- _kernel.setArg<cl_int4>(idx++, stops);
-
- Window slice_out = window.first_slice_window_4D().collapse(ICLKernel::window(), 2, 4);
-
- // Setup input slice
- Window slice_in(slice_out);
- slice_in.set(Window::DimX, Window::Dimension(0, 0, 0));
- slice_in.set(Window::DimY, Window::Dimension(0, 0, 0));
- slice_in.set(Window::DimZ, Window::Dimension(0, 0, 0));
- slice_in.set(3, Window::Dimension(0, 0, 0));
-
- // Copy output's shape in order to use for recovering at end of this method
- const TensorShape shape_out = _output->info()->tensor_shape();
- _output->info()->set_tensor_shape(inferOutputShape(shape_in, _reduce_axis));
-
- do
- {
- unsigned int idx = 0;
- add_4D_tensor_argument(idx, _input, slice_in);
- add_4D_tensor_argument(idx, _output, slice_out);
- enqueue(queue, *this, slice_out);
- } while (window.slide_window_slice_4D(slice_in) && window.slide_window_slice_4D(slice_out));
-
- // Recover output's shape of output tensor
- _output->info()->set_tensor_shape(shape_out);
-}
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/core/CL/kernels/CLReduceSumKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibraryEx.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-
-using namespace arm_compute;
-
-namespace
-{
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output,
- std::vector<uint32_t> axis)
-{
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() != DataLayout::NCHW);
-
- const auto num_dimensions = input->tensor_shape().num_dimensions();
- for (size_t i = 0; i < axis.size(); ++i)
- {
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis[i] < num_dimensions,
- "Reduction axis less than input's rank");
- }
-
- if (output->total_size() != 0)
- {
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON(output->data_layout() != DataLayout::NCHW);
- }
-
- return Status{};
-}
-
-} // namespace
-
-CLReduceSumKernel::CLReduceSumKernel() : _input(nullptr), _output(nullptr), _axis() {}
-
-void CLReduceSumKernel::configure(const ICLTensor *input, ICLTensor *output,
- std::vector<uint32_t> axis)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), axis));
-
- _input = input;
- _output = output;
- _axis = axis;
-
- // Configure kernel window
- int cols = _input->info()->tensor_shape()[0];
- int rows = _input->info()->tensor_shape()[1];
- Window win;
- win.set(0, Window::Dimension(0, cols, 1));
- win.set(1, Window::Dimension(0, rows, 1));
-
- // Set build options
- std::set<std::string> build_opts;
- build_opts.emplace(("-DWIDTH=" + support::cpp11::to_string(cols)));
-
- // Create kernel
- _kernel =
- static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel("reduce_sum", build_opts));
-
- ICLKernel::configure(win);
-}
-
-Status CLReduceSumKernel::validate(const ITensorInfo *input, const ITensorInfo *output,
- std::vector<uint32_t> axis)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, axis));
-
- return Status{};
-}
-
-void CLReduceSumKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
-
- // Set out window
- Window window_input = window;
- Window slice_input = window_input.first_slice_window_1D();
-
- do
- {
- Window slice_output = slice_input.shift_dimensions(1);
- unsigned int idx = 0;
- add_1D_tensor_argument(idx, _input, slice_input);
- add_1D_tensor_argument(idx, _output, slice_output);
- enqueue(queue, *this, slice_input);
- } while (window_input.slide_window_slice_1D(slice_input));
-}
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/core/CL/kernels/CLReductionMeanKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibraryEx.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-
-using namespace arm_compute;
-
-namespace
-{
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output,
- std::vector<uint32_t> axis)
-{
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() != DataLayout::NCHW);
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(axis.size() >= TensorShape::num_max_dimensions,
- "Reduction axis greater than max number of dimensions");
-
- std::vector<uint32_t>::const_iterator it;
- bool axis_w = false;
- bool axis_h = false;
- for (it = axis.begin(); it != axis.end(); ++it)
- {
- if ((*it) == 0)
- {
- axis_w = true;
- }
- else if ((*it) == 1)
- {
- axis_h = true;
- }
- else
- {
- ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Unsupported axis!");
- }
- }
- // TODO Other axises (currently, only axises for both width and height are supported.)
- if (!axis_w || !axis_h)
- {
- ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Unsupported axis!");
- }
-
- if (output->total_size() != 0)
- {
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON(output->data_layout() != DataLayout::NCHW);
- }
-
- return Status{};
-}
-
-std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output,
- std::vector<uint32_t> axis)
-{
- // Output tensor auto initialization if not yet initialized
- TensorShape output_shape{input->tensor_shape()};
- output_shape.set(0, 1);
- output_shape.set(1, 1);
- auto_init_if_empty(*output, output_shape, output->num_channels(), input->data_type(),
- input->fixed_point_position());
-
- // Configure kernel window
- constexpr unsigned int num_elems_processed_per_iteration_x = 8; // step
- const unsigned int num_elems_processed_per_iteration_y = input->dimension(1);
-
- Window win = calculate_max_window(
- *input, Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y));
- AccessWindowRectangle input_access(input, 0, 0, num_elems_processed_per_iteration_x,
- num_elems_processed_per_iteration_y);
- AccessWindowHorizontal output_access(output, 0, 1);
- bool window_changed = update_window_and_padding(win, input_access, output_access);
- output_access.set_valid_region(win, output->valid_region());
-
- Status err = (window_changed)
- ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!")
- : Status{};
-
- return std::make_tuple(err, win);
-}
-} // namespace
-
-CLReductionMeanKernel::CLReductionMeanKernel()
- : _input(nullptr), _output(nullptr), _reduction_axis(), _border_size()
-{
-}
-
-BorderSize CLReductionMeanKernel::border_size() const { return _border_size; }
-
-void CLReductionMeanKernel::configure(const ICLTensor *input, ICLTensor *output,
- std::vector<uint32_t> axis)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
-
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), axis));
-
- _input = input;
- _output = output;
- _reduction_axis = axis;
-
- constexpr unsigned int num_elems_processed_per_iteration_x = 8; // step
-
- // Set border size
- _border_size = BorderSize(
- ceil_to_multiple(input->info()->dimension(0), num_elems_processed_per_iteration_x) -
- input->info()->dimension(0));
-
- // Set build options
- std::set<std::string> build_opts;
- build_opts.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())));
- // build_opts.emplace(("-DVEC_SIZE=" +
- // support::cpp11::to_string(num_elems_processed_per_iteration)));
- if (is_data_type_fixed_point(input->info()->data_type()))
- {
- build_opts.emplace("-DFIXED_POINT_POSITION=" +
- support::cpp11::to_string(input->info()->fixed_point_position()));
- }
-
- // Create kernel
- _kernel =
- static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel("reduction_mean", build_opts));
-
- // Configure kernel window
- auto win_config = validate_and_configure_window(_input->info(), _output->info(), axis);
-
- ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config));
-
- ICLKernel::configure(std::get<1>(win_config));
-}
-
-Status CLReductionMeanKernel::validate(const ITensorInfo *input, const ITensorInfo *output,
- std::vector<uint32_t> axis)
-{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, axis));
- ARM_COMPUTE_RETURN_ON_ERROR(std::get<0>(
- validate_and_configure_window(input->clone().get(), output->clone().get(), axis)));
-
- return Status{};
-}
-
-void CLReductionMeanKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
-
- // Set out window
- Window out_window(window);
- out_window.set(Window::DimX, Window::Dimension(0, 0, 0));
-
- // Get first input and output slices
- Window in_slice = window.first_slice_window_2D();
- Window out_slice = out_window.first_slice_window_2D();
-
- // Set local sums buffer
- // TODO work_group
- unsigned int local_sum_size = _lws_hint[0] * _input->info()->element_size();
-
- unsigned int idx = 2 * num_arguments_per_2D_tensor();
- _kernel.setArg(idx++, local_sum_size, nullptr);
- _kernel.setArg<cl_int>(idx++, static_cast<cl_int>(_input->info()->dimension(1))); // height
- _kernel.setArg<cl_int>(idx++, static_cast<cl_int>(_input->info()->dimension(0) *
- _input->info()->dimension(1))); // divider
-
- do
- {
- unsigned int idx = 0;
- add_2D_tensor_argument(idx, _input, in_slice);
- in_slice.set_dimension_step(Window::DimY, _input->info()->dimension(1));
- add_2D_tensor_argument(idx, _output, out_slice);
- enqueue(queue, *this, in_slice);
- } while (window.slide_window_slice_2D(in_slice) && window.slide_window_slice_2D(out_slice));
-}
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/runtime/CL/functions/CLReduceMax.h"
-
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLReduceMaxKernel.h"
-#include "arm_compute/core/TensorInfo.h"
-
-namespace arm_compute
-{
-
-CLReduceMax::CLReduceMax()
- : _input(nullptr), _output(nullptr), _reduce_axis(), _interm_tensors(), _reduction_kernels(),
- _num_of_kernels()
-{
-}
-
-void CLReduceMax::configure(ICLTensor *input, std::vector<uint32_t> reduce_axis, ICLTensor *output)
-{
- ARM_COMPUTE_ERROR_THROW_ON(validate(input->info(), reduce_axis, output->info()));
-
- _reduce_axis = reduce_axis;
-
- _input = input;
- _output = output;
-
- // NOTE The reduce_axis must have no duplication.
- _num_of_kernels = reduce_axis.size();
- const size_t num_of_interm_tensors = _num_of_kernels - 1;
-
- _interm_tensors = arm_compute::support::cpp14::make_unique<CLTensor[]>(num_of_interm_tensors);
- _reduction_kernels =
- arm_compute::support::cpp14::make_unique<CLReduceMaxKernel[]>(_num_of_kernels);
-
- TensorShape shape{input->info()->tensor_shape()};
- for (size_t i = 0; i < num_of_interm_tensors; i++)
- {
- shape.set(reduce_axis[i], 1);
- _interm_tensors[i].allocator()->init(
- TensorInfo(shape, input->info()->num_channels(), input->info()->data_type()));
- _interm_tensors[i].allocator()->allocate();
- }
-
- // Set a vector that is ordered ICLTensors sequentially.
- std::vector<ICLTensor *> tensors;
- tensors.emplace_back(input);
- for (size_t i = 0; i < num_of_interm_tensors; i++)
- {
- tensors.emplace_back(_interm_tensors.get() + i);
- }
- tensors.emplace_back(output);
-
- // Apply ReduceMax on all kernels
- for (size_t i = 0; i < _num_of_kernels; i++)
- {
- _reduction_kernels[i].configure(tensors[i], tensors[i + 1], reduce_axis[i]);
- }
-}
-
-Status CLReduceMax::validate(const ITensorInfo *input, const std::vector<uint32_t> &reduce_axis,
- const ITensorInfo *output)
-{
- const size_t num_of_kernels = reduce_axis.size();
- const size_t num_of_interm_tensors = num_of_kernels - 1;
-
- // Create temporary tensor infos
- auto interm_tensors =
- arm_compute::support::cpp14::make_unique<TensorInfo[]>(num_of_interm_tensors);
-
- // Create intermediate tensor info
- TensorShape shape{input->tensor_shape()};
-
- for (size_t i = 0; i < num_of_interm_tensors; i++)
- {
- shape.set(reduce_axis[i], 1);
- interm_tensors[i].set_data_type(input->data_type());
- interm_tensors[i].set_tensor_shape(shape);
- interm_tensors[i].set_num_channels(input->num_channels());
- }
-
- // Set a vector that is ordered ITensorInfo sequentially.
- std::vector<const ITensorInfo *> tensors;
- tensors.emplace_back(input);
- for (size_t i = 0; i < num_of_interm_tensors; i++)
- {
- tensors.emplace_back(interm_tensors.get() + i);
- }
- tensors.emplace_back(output);
-
- // Validate ReduceMax only on all kernels
- for (size_t i = 0; i < num_of_kernels; i++)
- {
- ARM_COMPUTE_RETURN_ON_ERROR(
- CLReduceMaxKernel::validate(tensors[i], tensors[i + 1], reduce_axis[i]));
- }
-
- return Status{};
-}
-
-void CLReduceMax::run()
-{
-#if REDUCE_MAX_RUN_ON_CPU
- run_on_cpu();
-
- arm_compute::CLScheduler::get().sync();
-#else
- for (size_t i = 0; i < _num_of_kernels; ++i)
- {
- CLScheduler::get().enqueue(_reduction_kernels[i]);
- }
-#endif
-}
-
-inline const TensorShape inferOutputShape(const TensorShape &input_shape,
- const std::vector<uint32_t> &reduce_axis)
-{
- TensorShape out_shape{};
-
- bool keep_axis[4] = {true, true, true, true};
-
- for (int i = 0; i < reduce_axis.size(); ++i)
- {
- auto axis = reduce_axis[i];
- keep_axis[axis] = false;
- }
-
- for (int i = 0; i < input_shape.num_dimensions(); ++i)
- {
- size_t dim = 1;
- if (keep_axis[i])
- {
- dim = input_shape[i];
- }
- out_shape.set(i, dim);
- }
-
- return out_shape;
-}
-
-template <typename T>
-inline T getReduceMax(const ICLTensor *input, const TensorShape &input_shape,
- const TensorShape &output_shape, const size_t b, const size_t d,
- const size_t h, const size_t w)
-{
- // If output[dimention] == 1, will check all values of that dimension because of reducing
- // dimension.
- // Else will check only one value.
- const size_t start_b = output_shape[3] == 1 ? 0 : b;
- const size_t start_d = output_shape[2] == 1 ? 0 : d;
- const size_t start_h = output_shape[1] == 1 ? 0 : h;
- const size_t start_w = output_shape[0] == 1 ? 0 : w;
- const size_t stop_b = output_shape[3] == 1 ? input_shape[3] - 1 : b;
- const size_t stop_d = output_shape[2] == 1 ? input_shape[2] - 1 : d;
- const size_t stop_h = output_shape[1] == 1 ? input_shape[1] - 1 : h;
- const size_t stop_w = output_shape[0] == 1 ? input_shape[0] - 1 : w;
-
- Coordinates id{w, h, d, b};
- T max_value = *reinterpret_cast<T *>(input->ptr_to_element(id));
-
- for (size_t in_b = start_b; in_b <= stop_b; ++in_b)
- {
- id.set(3, in_b);
- for (size_t in_d = start_d; in_d <= stop_d; ++in_d)
- {
- id.set(2, in_d);
- for (size_t in_h = start_h; in_h <= stop_h; ++in_h)
- {
- id.set(1, in_h);
- for (size_t in_w = start_w; in_w <= stop_w; ++in_w)
- {
- id.set(0, in_w);
- max_value = std::max<T>(max_value, *reinterpret_cast<T *>(input->ptr_to_element(id)));
- }
- }
- }
- }
-
- return max_value;
-}
-
-template <typename T>
-inline void reduceMax(const ICLTensor *input, const TensorShape &input_shape,
- const TensorShape &output_shape, ICLTensor *output)
-{
- Coordinates id;
- for (size_t out_b = 0; out_b < output_shape[3]; ++out_b)
- {
- id.set(3, out_b);
- for (size_t out_d = 0; out_d < output_shape[2]; ++out_d)
- {
- id.set(2, out_d);
- for (size_t out_h = 0; out_h < output_shape[1]; ++out_h)
- {
- id.set(1, out_h);
- for (size_t out_w = 0; out_w < output_shape[0]; ++out_w)
- {
- id.set(0, out_w);
- *reinterpret_cast<T *>(output->ptr_to_element(id)) =
- getReduceMax<T>(input, input_shape, output_shape, out_b, out_d, out_h, out_w);
- }
- }
- }
- }
-}
-
-void CLReduceMax::run_on_cpu()
-{
- cl::CommandQueue q = CLScheduler::get().queue();
-
- _input->map(q);
- _output->map(q);
-
- TensorShape input_shape = _input->info()->tensor_shape();
- TensorShape output_shape = inferOutputShape(input_shape, _reduce_axis);
-
- // NOTE The param input_dims and output_dims's num_dimensions can be less 4.
- // However we should suppose the num_dimensions always are 4 to support up to 4.
- input_shape.set_num_dimensions(4);
- output_shape.set_num_dimensions(4);
-
- const TensorShape output_shape_origin = _output->info()->tensor_shape();
- _output->info()->set_tensor_shape(output_shape);
-
- switch (_input->info()->data_type())
- {
- case DataType::QASYMM8:
- reduceMax<uint8_t>(_input, input_shape, output_shape, _output);
- break;
- case DataType::S32:
- reduceMax<int32_t>(_input, input_shape, output_shape, _output);
- break;
- case DataType::F32:
- reduceMax<float>(_input, input_shape, output_shape, _output);
- break;
- defualt:
- ARM_COMPUTE_ERROR("DataType not supported");
- break;
- }
-
- _output->info()->set_tensor_shape(output_shape_origin);
-
- _input->unmap(q);
- _output->unmap(q);
-}
-} // namespace arm_compute
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/runtime/CL/functions/CLReduceSum.h"
-
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLReduceSumKernel.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-
-#define REDUCE_SUM_RUN_ON_CPU 1
-
-namespace arm_compute
-{
-
-CLReduceSum::CLReduceSum() : _input(nullptr), _output(nullptr), _axis(), _kernel(nullptr) {}
-
-Status CLReduceSum::validate(const ITensorInfo *input, const ITensorInfo *output,
- std::vector<uint32_t> axis)
-{
- ARM_COMPUTE_RETURN_ON_ERROR(CLReduceSumKernel::validate(input, output, axis));
- return Status{};
-}
-
-void CLReduceSum::configure(ICLTensor *input, ICLTensor *output, std::vector<uint32_t> axis)
-{
- _input = input;
- _output = output;
- _axis = axis;
-
- auto k = arm_compute::support::cpp14::make_unique<CLReduceSumKernel>();
- k->configure(input, output, axis);
- _kernel = std::move(k);
-}
-
-void CLReduceSum::run()
-{
-#if REDUCE_SUM_RUN_ON_CPU
- run_on_cpu();
-
- arm_compute::CLScheduler::get().sync();
-#else
- arm_compute::CLScheduler::get().enqueue(*_kernel);
-#endif
-}
-
-inline const TensorShape inferOutputShape(const TensorShape &input_shape,
- const std::vector<uint32_t> &reduce_axis)
-{
- TensorShape out_shape{};
-
- bool keep_axis[4] = {true, true, true, true};
-
- for (int i = 0; i < reduce_axis.size(); ++i)
- {
- auto axis = reduce_axis[i];
- keep_axis[axis] = false;
- }
-
- for (int i = 0; i < input_shape.num_dimensions(); ++i)
- {
- size_t dim = 1;
- if (keep_axis[i])
- {
- dim = input_shape[i];
- }
- out_shape.set(i, dim);
- }
-
- return out_shape;
-}
-
-template <typename T>
-inline T getReduceSum(const ICLTensor *input, const TensorShape &input_shape,
- const TensorShape &output_shape, const size_t b, const size_t d,
- const size_t h, const size_t w)
-{
- T sum_value = 0;
- Coordinates id;
-
- // If output[dimention] == 1, will check all values of that dimension because of reducing
- // dimension.
- // Else will check only one value.
- const size_t start_b = output_shape[3] == 1 ? 0 : b;
- const size_t start_d = output_shape[2] == 1 ? 0 : d;
- const size_t start_h = output_shape[1] == 1 ? 0 : h;
- const size_t start_w = output_shape[0] == 1 ? 0 : w;
- const size_t stop_b = output_shape[3] == 1 ? input_shape[3] - 1 : b;
- const size_t stop_d = output_shape[2] == 1 ? input_shape[2] - 1 : d;
- const size_t stop_h = output_shape[1] == 1 ? input_shape[1] - 1 : h;
- const size_t stop_w = output_shape[0] == 1 ? input_shape[0] - 1 : w;
- for (size_t in_b = start_b; in_b <= stop_b; ++in_b)
- {
- id.set(3, in_b);
- for (size_t in_d = start_d; in_d <= stop_d; ++in_d)
- {
- id.set(2, in_d);
- for (size_t in_h = start_h; in_h <= stop_h; ++in_h)
- {
- id.set(1, in_h);
- for (size_t in_w = start_w; in_w <= stop_w; ++in_w)
- {
- id.set(0, in_w);
- sum_value += *reinterpret_cast<T *>(input->ptr_to_element(id));
- }
- }
- }
- }
-
- return sum_value;
-}
-
-template <typename T>
-inline void reduceSum(const ICLTensor *input, const TensorShape &input_shape,
- const TensorShape &output_shape, ICLTensor *output)
-{
- Coordinates id;
- for (size_t out_b = 0; out_b < output_shape[3]; ++out_b)
- {
- id.set(3, out_b);
- for (size_t out_d = 0; out_d < output_shape[2]; ++out_d)
- {
- id.set(2, out_d);
- for (size_t out_h = 0; out_h < output_shape[1]; ++out_h)
- {
- id.set(1, out_h);
- for (size_t out_w = 0; out_w < output_shape[0]; ++out_w)
- {
- id.set(0, out_w);
- *reinterpret_cast<T *>(output->ptr_to_element(id)) =
- getReduceSum<T>(input, input_shape, output_shape, out_b, out_d, out_h, out_w);
- }
- }
- }
- }
-}
-
-void CLReduceSum::run_on_cpu()
-{
- cl::CommandQueue q = CLScheduler::get().queue();
-
- _input->map(q);
- _output->map(q);
-
- TensorShape input_shape = _input->info()->tensor_shape();
- TensorShape output_shape = inferOutputShape(input_shape, _axis);
-
- // NOTE The param input_dims and output_dims's num_dimensions can be less 4.
- // However we should suppose the num_dimensions always are 4 to support up to 4.
- input_shape.set_num_dimensions(4);
- output_shape.set_num_dimensions(4);
-
- const TensorShape output_shape_origin = _output->info()->tensor_shape();
- _output->info()->set_tensor_shape(output_shape);
-
- switch (_input->info()->data_type())
- {
- case DataType::QASYMM8:
- reduceSum<uint8_t>(_input, input_shape, output_shape, _output);
- break;
- case DataType::S32:
- reduceSum<int32_t>(_input, input_shape, output_shape, _output);
- break;
- case DataType::F32:
- reduceSum<float>(_input, input_shape, output_shape, _output);
- break;
- defualt:
- ARM_COMPUTE_ERROR("DataType not supported");
- break;
- }
-
- _output->info()->set_tensor_shape(output_shape_origin);
-
- _input->unmap(q);
- _output->unmap(q);
-}
-} // namespace arm_compute
+++ /dev/null
-/*
- * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
- * Copyright (c) 2017-2018 ARM Limited.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "arm_compute/runtime/CL/functions/CLReductionMean.h"
-
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLReductionMeanKernel.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-
-using namespace arm_compute;
-
-CLReductionMean::CLReductionMean() : _reduction_mean_kernel(), _fill_border_kernel() {}
-
-Status CLReductionMean::validate(const ITensorInfo *input, const ITensorInfo *output,
- std::vector<uint32_t> axis)
-{
- ARM_COMPUTE_RETURN_ON_ERROR(CLReductionMeanKernel::validate(input, output, axis));
- return Status{};
-}
-
-void CLReductionMean::configure(ICLTensor *input, ICLTensor *output, std::vector<uint32_t> axis)
-{
- _reduction_mean_kernel.configure(input, output, axis);
- _fill_border_kernel.configure(input, _reduction_mean_kernel.border_size(), BorderMode::CONSTANT,
- PixelValue(0));
-}
-
-void CLReductionMean::run()
-{
- CLScheduler::get().enqueue(_fill_border_kernel);
- CLScheduler::get().enqueue(_reduction_mean_kernel);
-}