From c9fcc12e3b775eff95c77bf6d87bb5f5c6ae585f Mon Sep 17 00:00:00 2001 From: Chip Kerchner <49959681+ChipKerchner@users.noreply.github.com> Date: Tue, 16 Jul 2019 09:10:49 -0400 Subject: [PATCH] Merge pull request #15048 from ChipKerchner:reduceStoreGatheringThreshold * Reduce store gathering pressures - speeds thresholds by up to 20% * Rename temporary histogram array and initialize so that MACOSX builder is happy --- modules/imgproc/src/thresh.cpp | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/modules/imgproc/src/thresh.cpp b/modules/imgproc/src/thresh.cpp index d724db4..466b0a8 100644 --- a/modules/imgproc/src/thresh.cpp +++ b/modules/imgproc/src/thresh.cpp @@ -1159,6 +1159,9 @@ getThreshVal_Otsu_8u( const Mat& _src ) const int N = 256; int i, j, h[N] = {0}; + #if CV_ENABLE_UNROLLED + int h_unrolled[3][N] = {}; + #endif for( i = 0; i < size.height; i++ ) { const uchar* src = _src.ptr() + step*i; @@ -1167,9 +1170,9 @@ getThreshVal_Otsu_8u( const Mat& _src ) for( ; j <= size.width - 4; j += 4 ) { int v0 = src[j], v1 = src[j+1]; - h[v0]++; h[v1]++; + h[v0]++; h_unrolled[0][v1]++; v0 = src[j+2]; v1 = src[j+3]; - h[v0]++; h[v1]++; + h_unrolled[1][v0]++; h_unrolled[2][v1]++; } #endif for( ; j < size.width; j++ ) @@ -1178,7 +1181,12 @@ getThreshVal_Otsu_8u( const Mat& _src ) double mu = 0, scale = 1./(size.width*size.height); for( i = 0; i < N; i++ ) + { + #if CV_ENABLE_UNROLLED + h[i] += h_unrolled[0][i] + h_unrolled[1][i] + h_unrolled[2][i]; + #endif mu += i*(double)h[i]; + } mu *= scale; double mu1 = 0, q1 = 0; @@ -1223,6 +1231,9 @@ getThreshVal_Triangle_8u( const Mat& _src ) const int N = 256; int i, j, h[N] = {0}; + #if CV_ENABLE_UNROLLED + int h_unrolled[3][N] = {}; + #endif for( i = 0; i < size.height; i++ ) { const uchar* src = _src.ptr() + step*i; @@ -1231,9 +1242,9 @@ getThreshVal_Triangle_8u( const Mat& _src ) for( ; j <= size.width - 4; j += 4 ) { int v0 = src[j], v1 = src[j+1]; - h[v0]++; h[v1]++; + h[v0]++; h_unrolled[0][v1]++; v0 = src[j+2]; v1 = src[j+3]; - h[v0]++; h[v1]++; + h_unrolled[1][v0]++; h_unrolled[2][v1]++; } #endif for( ; j < size.width; j++ ) @@ -1244,6 +1255,13 @@ getThreshVal_Triangle_8u( const Mat& _src ) int temp; bool isflipped = false; + #if CV_ENABLE_UNROLLED + for( i = 0; i < N; i++ ) + { + h[i] += h_unrolled[0][i] + h_unrolled[1][i] + h_unrolled[2][i]; + } + #endif + for( i = 0; i < N; i++ ) { if( h[i] > 0 ) -- 2.7.4