Merge pull request #15048 from ChipKerchner:reduceStoreGatheringThreshold
authorChip Kerchner <49959681+ChipKerchner@users.noreply.github.com>
Tue, 16 Jul 2019 13:10:49 +0000 (09:10 -0400)
committerAlexander Alekhin <alexander.a.alekhin@gmail.com>
Tue, 16 Jul 2019 13:10:49 +0000 (16:10 +0300)
* Reduce store gathering pressures - speeds thresholds by up to 20%

* Rename temporary histogram array and initialize so that MACOSX builder is happy

modules/imgproc/src/thresh.cpp

index d724db4..466b0a8 100644 (file)
@@ -1159,6 +1159,9 @@ getThreshVal_Otsu_8u( const Mat& _src )
 
     const int N = 256;
     int i, j, h[N] = {0};
+    #if CV_ENABLE_UNROLLED
+    int h_unrolled[3][N] = {};
+    #endif
     for( i = 0; i < size.height; i++ )
     {
         const uchar* src = _src.ptr() + step*i;
@@ -1167,9 +1170,9 @@ getThreshVal_Otsu_8u( const Mat& _src )
         for( ; j <= size.width - 4; j += 4 )
         {
             int v0 = src[j], v1 = src[j+1];
-            h[v0]++; h[v1]++;
+            h[v0]++; h_unrolled[0][v1]++;
             v0 = src[j+2]; v1 = src[j+3];
-            h[v0]++; h[v1]++;
+            h_unrolled[1][v0]++; h_unrolled[2][v1]++;
         }
         #endif
         for( ; j < size.width; j++ )
@@ -1178,7 +1181,12 @@ getThreshVal_Otsu_8u( const Mat& _src )
 
     double mu = 0, scale = 1./(size.width*size.height);
     for( i = 0; i < N; i++ )
+    {
+        #if CV_ENABLE_UNROLLED
+        h[i] += h_unrolled[0][i] + h_unrolled[1][i] + h_unrolled[2][i];
+        #endif
         mu += i*(double)h[i];
+    }
 
     mu *= scale;
     double mu1 = 0, q1 = 0;
@@ -1223,6 +1231,9 @@ getThreshVal_Triangle_8u( const Mat& _src )
 
     const int N = 256;
     int i, j, h[N] = {0};
+    #if CV_ENABLE_UNROLLED
+    int h_unrolled[3][N] = {};
+    #endif
     for( i = 0; i < size.height; i++ )
     {
         const uchar* src = _src.ptr() + step*i;
@@ -1231,9 +1242,9 @@ getThreshVal_Triangle_8u( const Mat& _src )
         for( ; j <= size.width - 4; j += 4 )
         {
             int v0 = src[j], v1 = src[j+1];
-            h[v0]++; h[v1]++;
+            h[v0]++; h_unrolled[0][v1]++;
             v0 = src[j+2]; v1 = src[j+3];
-            h[v0]++; h[v1]++;
+            h_unrolled[1][v0]++; h_unrolled[2][v1]++;
         }
         #endif
         for( ; j < size.width; j++ )
@@ -1244,6 +1255,13 @@ getThreshVal_Triangle_8u( const Mat& _src )
     int temp;
     bool isflipped = false;
 
+    #if CV_ENABLE_UNROLLED
+    for( i = 0; i < N; i++ )
+    {
+        h[i] += h_unrolled[0][i] + h_unrolled[1][i] + h_unrolled[2][i];
+    }
+    #endif
+
     for( i = 0; i < N; i++ )
     {
         if( h[i] > 0 )