Merge pull request #15136 from ChipKerchner:dotProd_unroll

author Chip Kerchner <49959681+ChipKerchner@users.noreply.github.com>

Thu, 25 Jul 2019 18:21:32 +0000 (14:21 -0400)

committer Alexander Alekhin <alexander.a.alekhin@gmail.com>

Thu, 25 Jul 2019 18:21:32 +0000 (21:21 +0300)
author Chip Kerchner <49959681+ChipKerchner@users.noreply.github.com>
Thu, 25 Jul 2019 18:21:32 +0000 (14:21 -0400)
committer Alexander Alekhin <alexander.a.alekhin@gmail.com>
Thu, 25 Jul 2019 18:21:32 +0000 (21:21 +0300)
diff --git a/modules/core/src/matmul.simd.hpp b/modules/core/src/matmul.simd.hpp

index ef54bb0..bb6b6c5 100644 (file)
--- a/modules/core/src/matmul.simd.hpp
+++ b/modules/core/src/matmul.simd.hpp
@@ -2511,6 +2511,27 @@ double dotProd_32f(const float* src1, const float* src2, int len)
  
          int j = 0;
          int cWidth = v_float32::nlanes;
+
+#if CV_ENABLE_UNROLLED
+        v_float32 v_sum1 = vx_setzero_f32();
+        v_float32 v_sum2 = vx_setzero_f32();
+        v_float32 v_sum3 = vx_setzero_f32();
+
+        for (; j <= blockSize - (cWidth * 4); j += (cWidth * 4))
+        {
+            v_sum  = v_muladd(vx_load(src1 + j),
+                              vx_load(src2 + j), v_sum);
+            v_sum1 = v_muladd(vx_load(src1 + j + cWidth),
+                              vx_load(src2 + j + cWidth), v_sum1);
+            v_sum2 = v_muladd(vx_load(src1 + j + (cWidth * 2)),
+                              vx_load(src2 + j + (cWidth * 2)), v_sum2);
+            v_sum3 = v_muladd(vx_load(src1 + j + (cWidth * 3)),
+                              vx_load(src2 + j + (cWidth * 3)), v_sum3);
+        }
+
+        v_sum += v_sum1 + v_sum2 + v_sum3;
+#endif
+
          for (; j <= blockSize - cWidth; j += cWidth)
              v_sum = v_muladd(vx_load(src1 + j), vx_load(src2 + j), v_sum);
  
@@ -2532,4 +2553,4 @@ double dotProd_64f(const double* src1, const double* src2, int len)
  
  #endif
  CV_CPU_OPTIMIZATION_NAMESPACE_END
-} // namespace
-\ No newline at end of file
+} // namespace
author	Chip Kerchner <49959681+ChipKerchner@users.noreply.github.com>
	Thu, 25 Jul 2019 18:21:32 +0000 (14:21 -0400)
committer	Alexander Alekhin <alexander.a.alekhin@gmail.com>
	Thu, 25 Jul 2019 18:21:32 +0000 (21:21 +0300)