Fix a bug of ReduceMax for cpu (#3012)

author 장지섭/동작제어Lab(SR)/Engineer/삼성전자 <jiseob.jang@samsung.com>

Mon, 8 Oct 2018 01:34:30 +0000 (10:34 +0900)

committer 이춘석/동작제어Lab(SR)/Staff Engineer/삼성전자 <chunseok.lee@samsung.com>

Mon, 8 Oct 2018 01:34:30 +0000 (10:34 +0900)
author 장지섭/동작제어Lab(SR)/Engineer/삼성전자 <jiseob.jang@samsung.com>
Mon, 8 Oct 2018 01:34:30 +0000 (10:34 +0900)
committer 이춘석/동작제어Lab(SR)/Staff Engineer/삼성전자 <chunseok.lee@samsung.com>
Mon, 8 Oct 2018 01:34:30 +0000 (10:34 +0900)
diff --git a/libs/ARMComputeEx/src/runtime/CL/functions/CLReduceMax.cpp b/libs/ARMComputeEx/src/runtime/CL/functions/CLReduceMax.cpp

index cc2fc4a..57b720b 100644 (file)
--- a/libs/ARMComputeEx/src/runtime/CL/functions/CLReduceMax.cpp
+++ b/libs/ARMComputeEx/src/runtime/CL/functions/CLReduceMax.cpp
@@ -68,15 +68,6 @@ void CLReduceMax::run()
  #endif
  }
  
-inline int32_t offset4D(const TensorShape &shape, int32_t b, int32_t d, int32_t h, int32_t w)
-{
-  int32_t offset = b * shape[2] * shape[1] * shape[0];
-  offset += d * shape[1] * shape[0];
-  offset += h * shape[0];
-  offset += w;
-  return offset;
-}
-
  inline const TensorShape inferOutputShape(const TensorShape &input_shape,
                                            const std::vector<uint32_t> &reduce_axis)
  {
@@ -104,12 +95,10 @@ inline const TensorShape inferOutputShape(const TensorShape &input_shape,
  }
  
  template <typename T>
-inline T getReduceMax(const T *input_data, const TensorShape &input_shape,
+inline T getReduceMax(const ICLTensor *input, const TensorShape &input_shape,
                        const TensorShape &output_shape, const size_t b, const size_t d,
                        const size_t h, const size_t w)
  {
-  T max_value = input_data[offset4D(input_shape, b, d, h, w)];
-
    // If output[dimention] == 1, will check all values of that dimension because of reducing
    // dimension.
    // Else will check only one value.
@@ -121,16 +110,23 @@ inline T getReduceMax(const T *input_data, const TensorShape &input_shape,
    const size_t stop_d = output_shape[2] == 1 ? input_shape[2] - 1 : d;
    const size_t stop_h = output_shape[1] == 1 ? input_shape[1] - 1 : h;
    const size_t stop_w = output_shape[0] == 1 ? input_shape[0] - 1 : w;
+
+  Coordinates id{w, h, d, b};
+  T max_value = *reinterpret_cast<T *>(input->ptr_to_element(id));
+
    for (size_t in_b = start_b; in_b <= stop_b; ++in_b)
    {
+    id.set(3, in_b);
      for (size_t in_d = start_d; in_d <= stop_d; ++in_d)
      {
+      id.set(2, in_d);
        for (size_t in_h = start_h; in_h <= stop_h; ++in_h)
        {
+        id.set(1, in_h);
          for (size_t in_w = start_w; in_w <= stop_w; ++in_w)
          {
-          max_value =
-              std::max<T>(max_value, input_data[offset4D(input_shape, in_b, in_d, in_h, in_w)]);
+          id.set(0, in_w);
+          max_value = std::max<T>(max_value, *reinterpret_cast<T *>(input->ptr_to_element(id)));
          }
        }
      }
@@ -140,19 +136,24 @@ inline T getReduceMax(const T *input_data, const TensorShape &input_shape,
  }
  
  template <typename T>
-inline void reduceMax(const T *input_data, const TensorShape &input_shape,
-                      const TensorShape &output_shape, T *output_data)
+inline void reduceMax(const ICLTensor *input, const TensorShape &input_shape,
+                      const TensorShape &output_shape, ICLTensor *output)
  {
+  Coordinates id;
    for (size_t out_b = 0; out_b < output_shape[3]; ++out_b)
    {
+    id.set(3, out_b);
      for (size_t out_d = 0; out_d < output_shape[2]; ++out_d)
      {
+      id.set(2, out_d);
        for (size_t out_h = 0; out_h < output_shape[1]; ++out_h)
        {
+        id.set(1, out_h);
          for (size_t out_w = 0; out_w < output_shape[0]; ++out_w)
          {
-          output_data[offset4D(output_shape, out_b, out_d, out_h, out_w)] =
-              getReduceMax(input_data, input_shape, output_shape, out_b, out_d, out_h, out_w);
+          id.set(0, out_w);
+          *reinterpret_cast<T *>(output->ptr_to_element(id)) =
+              getReduceMax<T>(input, input_shape, output_shape, out_b, out_d, out_h, out_w);
          }
        }
      }
@@ -174,25 +175,27 @@ void CLReduceMax::run_on_cpu()
    input_shape.set_num_dimensions(4);
    output_shape.set_num_dimensions(4);
  
+  const TensorShape output_shape_origin = _output->info()->tensor_shape();
+  _output->info()->set_tensor_shape(output_shape);
+
    switch (_input->info()->data_type())
    {
      case DataType::QASYMM8:
-      reduceMax(reinterpret_cast<const uint8_t *>(_input->buffer()), input_shape, output_shape,
-                reinterpret_cast<uint8_t *>(_output->buffer()));
+      reduceMax<uint8_t>(_input, input_shape, output_shape, _output);
        break;
      case DataType::S32:
-      reduceMax(reinterpret_cast<const int32_t *>(_input->buffer()), input_shape, output_shape,
-                reinterpret_cast<int32_t *>(_output->buffer()));
+      reduceMax<int32_t>(_input, input_shape, output_shape, _output);
        break;
      case DataType::F32:
-      reduceMax(reinterpret_cast<const float *>(_input->buffer()), input_shape, output_shape,
-                reinterpret_cast<float *>(_output->buffer()));
+      reduceMax<float>(_input, input_shape, output_shape, _output);
        break;
      defualt:
        ARM_COMPUTE_ERROR("DataType not supported");
        break;
    }
  
+  _output->info()->set_tensor_shape(output_shape_origin);
+
    _input->unmap(q);
    _output->unmap(q);
  }
author	장지섭/동작제어Lab(SR)/Engineer/삼성전자 <jiseob.jang@samsung.com>
	Mon, 8 Oct 2018 01:34:30 +0000 (10:34 +0900)
committer	이춘석/동작제어Lab(SR)/Staff Engineer/삼성전자 <chunseok.lee@samsung.com>
	Mon, 8 Oct 2018 01:34:30 +0000 (10:34 +0900)