COMPMID-3850: NEPooling regression for NHWC
authorGeorgios Pinitas <georgios.pinitas@arm.com>
Thu, 5 Nov 2020 20:06:49 +0000 (20:06 +0000)
committerGeorgios Pinitas <georgios.pinitas@arm.com>
Fri, 6 Nov 2020 17:33:01 +0000 (17:33 +0000)
Expand left-over loop to handle multiples of 8 for quantized data type
during MaxPooling.

Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Change-Id: I1304d174c45d2c98247470ac8b4bb6752bbc03a6
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4339
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
src/core/NEON/kernels/NEPoolingLayerKernel.cpp

index 0f0b9eed5abdf20d4c9a4bc8ca5e9b0092651499..b46843badd64cc3ddef6b9cd356e78ea43447aa4 100644 (file)
@@ -2283,9 +2283,10 @@ void NEPoolingLayerKernel::poolingMxN_q8_nchw(const Window &window_input, const
 template <typename T>
 void NEPoolingLayerKernel::poolingMxN_q8_nhwc(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding)
 {
-    const int window_start_x = window.x().start();
-    const int window_end_x   = window.x().end();
-    const int window_step_x  = 16;
+    const int window_start_x     = window.x().start();
+    const int window_end_x       = window.x().end();
+    const int window_step_x      = 16;
+    const int window_half_step_x = window_step_x / 2;
 
     Window window_out = window;
     window_out.set(Window::DimX, Window::Dimension(0, 1, 1));
@@ -2422,6 +2423,27 @@ void NEPoolingLayerKernel::poolingMxN_q8_nhwc(const Window &window_input, const
             }
         }
 
+        if(pooling_type == PoolingType::MAX)
+        {
+            for(; x_off <= (window_end_x - window_half_step_x); x_off += window_half_step_x)
+            {
+                q8x8_t vres = wrapper::vdup_n(std::numeric_limits<T>::min(), wrapper::traits::vector_64_tag{});
+                for(int y = pool_start_y; y < pool_end_y; ++y)
+                {
+                    for(int x = pool_start_x; x < pool_end_x; ++x)
+                    {
+                        const q8x8_t data = wrapper::vload(reinterpret_cast<const T *>(input.ptr() + (x - pool_pad_left) * static_cast<int>(_input->info()->strides_in_bytes().y()) + (y - pool_pad_top) * static_cast<int>
+                                                                                       (_input->info()->strides_in_bytes().z())) + x_off);
+                        vres              = wrapper::vmax(vres, data);
+                    }
+                }
+
+                // Store result
+                wrapper::vstore(reinterpret_cast<T *>(output.ptr()) + x_off,
+                                (input_qinfo != output_qinfo) ? vrequantize_pooling<q8x8_t>(vres, requant_qinfo) : vres);
+            }
+        }
+
         // Left-overs loop
         for(; x_off < window_end_x; ++x_off)
         {