Fix data layout retention and handling of leftovers when there is no padding
authorMichele Di Giorgio <michele.digiorgio@arm.com>
Fri, 12 Feb 2021 17:34:17 +0000 (17:34 +0000)
committerGiorgio Arena <giorgio.arena@arm.com>
Tue, 16 Feb 2021 10:12:44 +0000 (10:12 +0000)
Resolves COMPMID-4258

Change-Id: I8782bf725cd0d376d538021406eb1f5be962b2cb
Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/c/VisualCompute/ComputeLibrary/+/298627
Reviewed-by: Teresa Charlin Reyes <teresa.charlinreyes@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Comments-Addressed: Teresa Charlin Reyes <teresa.charlinreyes@arm.com>
Tested-by: Georgios Pinitas <georgios.pinitas@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5082
Reviewed-by: TeresaARM <teresa.charlinreyes@arm.com>
src/core/NEON/kernels/arm_conv/pooling/pooling_depthfirst_generic.hpp
src/core/NEON/kernels/arm_conv/pooling/pooling_depthfirst_generic_quantized.hpp
src/core/cpu/kernels/CpuPoolingKernel.cpp

index fa06a0078b44689512f9cc616280f71541cb61f9..5979862ed81c95467b63a84abc1deb4f69c9cfd7 100644 (file)
@@ -191,6 +191,13 @@ class PoolingDepthfirstGeneric : public PoolingCommon<typename strategy::operand
         const auto pad_bottom = static_cast<unsigned int>(std::max<int>(end_in_i - height, 0));
         const auto valid_rows = input_rows() - pad_top - pad_bottom;
 
+        // Compute the number of pooling window rows which are contained in
+        // either the valid region of the input tensor, or the padding.
+        const auto padded_bottom = std::min<unsigned int>(
+          start_in_i + m_args.pool_window.rows, height + padding.bottom
+        );
+        const auto n_total_rows = padded_bottom - start_in_i;
+
         auto outptr_col = outptr_row;
         auto inptr_row = inptr_batch + (start_in_i + pad_top) * ld_input_row;
 
@@ -205,6 +212,13 @@ class PoolingDepthfirstGeneric : public PoolingCommon<typename strategy::operand
           const auto pad_right = static_cast<unsigned int>(std::max<int>(0, end_in_j - width));
           const auto valid_cols = input_cols() - pad_left - pad_right;
 
+          // Compute the number of pooling window columns which are contained
+          // in either the valid region of the input tensor, or the padding.
+          const auto padded_right = std::min<unsigned int>(
+            start_in_j + m_args.pool_window.cols, width + padding.right
+          );
+          const auto n_total_cols = padded_right - start_in_j;
+
           // Construct the input pointer array - fill in all valid points
           // contiguously.
           const TInput **ptrs = inptr_array;
@@ -222,7 +236,8 @@ class PoolingDepthfirstGeneric : public PoolingCommon<typename strategy::operand
 
           // Compute the number of valid cells
           const auto valid_cells = valid_rows * valid_cols;
-          const auto window_cells = m_args.exclude_padding ? valid_cells : input_rows() * input_cols();
+          const auto cells_in_range = n_total_rows * n_total_cols;
+          const auto window_cells = m_args.exclude_padding ? valid_cells : cells_in_range;
 
           // Get the output pointer for this call
           TOutput *outptr = outptr_col;
index 1f2891f814c779cd52c3824ca5785bcb5545f790..f3cb9a1d1f2318260e03e97354ab0663dce7c75f 100644 (file)
@@ -191,6 +191,13 @@ class PoolingDepthfirstGenericQuantized : public PoolingCommon<typename strategy
         const auto pad_top = static_cast<unsigned int>(-std::min(start_in_i, 0));
         const auto pad_bottom = static_cast<unsigned int>(-std::min(static_cast<int>(height) - end_in_i, 0));
 
+        // Compute the number of pooling window rows which are contained in
+        // either the valid region of the input tensor, or the padding.
+        const auto padded_bottom = std::min<unsigned int>(
+          start_in_i + m_args.pool_window.rows, height + padding.bottom
+        );
+        const auto n_total_rows = padded_bottom - start_in_i;
+
         for (int out_j = 0, start_in_j = -padding.left;
              out_j < static_cast<int>(output_width);
              out_j++, start_in_j += m_args.pool_stride.cols)
@@ -201,6 +208,13 @@ class PoolingDepthfirstGenericQuantized : public PoolingCommon<typename strategy
           const auto pad_left = static_cast<unsigned int>(-std::min(start_in_j, 0));
           const auto pad_right = static_cast<unsigned int>(-std::min(static_cast<int>(width) - end_in_j, 0));
 
+          // Compute the number of pooling window columns which are contained
+          // in either the valid region of the input tensor, or the padding.
+          const auto padded_right = std::min<unsigned int>(
+            start_in_j + m_args.pool_window.cols, width + padding.right
+          );
+          const auto n_total_cols = padded_right - start_in_j;
+
           // Construct the input pointer array - fill in all valid points
           // contiguously.
           const TInput **ptrs = inptr_array;
@@ -221,7 +235,8 @@ class PoolingDepthfirstGenericQuantized : public PoolingCommon<typename strategy
           const auto valid_rows = input_rows() - pad_top - pad_bottom;
           const auto valid_cols = input_cols() - pad_left - pad_right;
           const auto valid_cells = valid_rows * valid_cols;
-          const auto window_cells = m_args.exclude_padding ? valid_cells : input_rows() * input_cols();
+          const auto cells_in_range = n_total_rows * n_total_cols;
+          const auto window_cells = m_args.exclude_padding ? valid_cells : cells_in_range;
 
           // Get the output pointer for this call
           TOutput *outptr = outptr_batch + out_i * ld_output_row + out_j * ld_output_col;
index 21afad2c3fc816be15b61bf742e05cd69401586a..e159bb40a9bf5fd6eeb85578891f54118f135309 100644 (file)
@@ -356,7 +356,7 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *src, ITenso
         dst_shape.set(1, pooled_h);
         TensorInfo dst_info(src->clone()->set_tensor_shape(dst_shape));
         win = calculate_max_window(dst_info, Steps(num_elems_processed_per_iteration));
-        AccessWindowStatic     src_access(src, -pool_pad_left, -pool_pad_top, src_width + border_size.right, src_height + border_size.bottom);
+        AccessWindowStatic     src_access(src, -pool_pad_left, -pool_pad_top, ceil_to_multiple(src_width + border_size.right, pool_size_x), src_height + border_size.bottom);
         AccessWindowHorizontal dst_access(dst, 0, num_elems_horizontal_window);
         if(indices)
         {
@@ -368,6 +368,8 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *src, ITenso
             window_changed = update_window_and_padding(win, src_access, dst_access);
         }
         dst_access.set_valid_region(win, ValidRegion(Coordinates(), dst->tensor_shape()));
+
+        border_size = src->padding();
     }
 
     Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
@@ -529,7 +531,7 @@ void CpuPoolingKernel::run_op(ITensorPack &tensors, const Window &window, const
         window_src.set(Window::DimZ, Window::Dimension(0, src->info()->dimension(2), pool_stride_y));
     }
 
-    const auto *uk = get_implementation(src->info()->data_type(), src->info()->data_layout(), _pool_stride_x, _pool_size);
+    const auto *uk = get_implementation(src->info()->data_type(), _data_layout, _pool_stride_x, _pool_size);
     ARM_COMPUTE_ERROR_ON(uk == nullptr || uk->ukernel == nullptr);
 
     uk->ukernel(src, dst, indices, _pool_info, window_src, window);