From bfda10bc6c34f3b5016085fb5c3c2bb1a99ab652 Mon Sep 17 00:00:00 2001 From: =?utf8?q?=EC=9E=A5=EC=A7=80=EC=84=AD/=EB=8F=99=EC=9E=91=EC=A0=9C?= =?utf8?q?=EC=96=B4Lab=28SR=29/Engineer/=EC=82=BC=EC=84=B1=EC=A0=84?= =?utf8?q?=EC=9E=90?= Date: Mon, 8 Oct 2018 10:34:30 +0900 Subject: [PATCH] Fix a bug of ReduceMax for cpu (#3012) This commit fixes a bug of ReduceMax for cpu. - Fix ReduceMax to work with `ICLTensor` even if the `ICLTensor`s have padding. Signed-off-by: jiseob.jang --- .../src/runtime/CL/functions/CLReduceMax.cpp | 51 ++++++++++++---------- 1 file changed, 27 insertions(+), 24 deletions(-) diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLReduceMax.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLReduceMax.cpp index cc2fc4a..57b720b 100644 --- a/libs/ARMComputeEx/src/runtime/CL/functions/CLReduceMax.cpp +++ b/libs/ARMComputeEx/src/runtime/CL/functions/CLReduceMax.cpp @@ -68,15 +68,6 @@ void CLReduceMax::run() #endif } -inline int32_t offset4D(const TensorShape &shape, int32_t b, int32_t d, int32_t h, int32_t w) -{ - int32_t offset = b * shape[2] * shape[1] * shape[0]; - offset += d * shape[1] * shape[0]; - offset += h * shape[0]; - offset += w; - return offset; -} - inline const TensorShape inferOutputShape(const TensorShape &input_shape, const std::vector &reduce_axis) { @@ -104,12 +95,10 @@ inline const TensorShape inferOutputShape(const TensorShape &input_shape, } template -inline T getReduceMax(const T *input_data, const TensorShape &input_shape, +inline T getReduceMax(const ICLTensor *input, const TensorShape &input_shape, const TensorShape &output_shape, const size_t b, const size_t d, const size_t h, const size_t w) { - T max_value = input_data[offset4D(input_shape, b, d, h, w)]; - // If output[dimention] == 1, will check all values of that dimension because of reducing // dimension. // Else will check only one value. @@ -121,16 +110,23 @@ inline T getReduceMax(const T *input_data, const TensorShape &input_shape, const size_t stop_d = output_shape[2] == 1 ? input_shape[2] - 1 : d; const size_t stop_h = output_shape[1] == 1 ? input_shape[1] - 1 : h; const size_t stop_w = output_shape[0] == 1 ? input_shape[0] - 1 : w; + + Coordinates id{w, h, d, b}; + T max_value = *reinterpret_cast(input->ptr_to_element(id)); + for (size_t in_b = start_b; in_b <= stop_b; ++in_b) { + id.set(3, in_b); for (size_t in_d = start_d; in_d <= stop_d; ++in_d) { + id.set(2, in_d); for (size_t in_h = start_h; in_h <= stop_h; ++in_h) { + id.set(1, in_h); for (size_t in_w = start_w; in_w <= stop_w; ++in_w) { - max_value = - std::max(max_value, input_data[offset4D(input_shape, in_b, in_d, in_h, in_w)]); + id.set(0, in_w); + max_value = std::max(max_value, *reinterpret_cast(input->ptr_to_element(id))); } } } @@ -140,19 +136,24 @@ inline T getReduceMax(const T *input_data, const TensorShape &input_shape, } template -inline void reduceMax(const T *input_data, const TensorShape &input_shape, - const TensorShape &output_shape, T *output_data) +inline void reduceMax(const ICLTensor *input, const TensorShape &input_shape, + const TensorShape &output_shape, ICLTensor *output) { + Coordinates id; for (size_t out_b = 0; out_b < output_shape[3]; ++out_b) { + id.set(3, out_b); for (size_t out_d = 0; out_d < output_shape[2]; ++out_d) { + id.set(2, out_d); for (size_t out_h = 0; out_h < output_shape[1]; ++out_h) { + id.set(1, out_h); for (size_t out_w = 0; out_w < output_shape[0]; ++out_w) { - output_data[offset4D(output_shape, out_b, out_d, out_h, out_w)] = - getReduceMax(input_data, input_shape, output_shape, out_b, out_d, out_h, out_w); + id.set(0, out_w); + *reinterpret_cast(output->ptr_to_element(id)) = + getReduceMax(input, input_shape, output_shape, out_b, out_d, out_h, out_w); } } } @@ -174,25 +175,27 @@ void CLReduceMax::run_on_cpu() input_shape.set_num_dimensions(4); output_shape.set_num_dimensions(4); + const TensorShape output_shape_origin = _output->info()->tensor_shape(); + _output->info()->set_tensor_shape(output_shape); + switch (_input->info()->data_type()) { case DataType::QASYMM8: - reduceMax(reinterpret_cast(_input->buffer()), input_shape, output_shape, - reinterpret_cast(_output->buffer())); + reduceMax(_input, input_shape, output_shape, _output); break; case DataType::S32: - reduceMax(reinterpret_cast(_input->buffer()), input_shape, output_shape, - reinterpret_cast(_output->buffer())); + reduceMax(_input, input_shape, output_shape, _output); break; case DataType::F32: - reduceMax(reinterpret_cast(_input->buffer()), input_shape, output_shape, - reinterpret_cast(_output->buffer())); + reduceMax(_input, input_shape, output_shape, _output); break; defualt: ARM_COMPUTE_ERROR("DataType not supported"); break; } + _output->info()->set_tensor_shape(output_shape_origin); + _input->unmap(q); _output->unmap(q); } -- 2.7.4