remove danger race condition
authorTomoaki Teshima <tomoaki.teshima@gmail.com>
Mon, 15 Feb 2021 12:01:41 +0000 (21:01 +0900)
committerTomoaki Teshima <tomoaki.teshima@gmail.com>
Mon, 15 Feb 2021 12:01:41 +0000 (21:01 +0900)
modules/cudafilters/src/cuda/median_filter.cu

index cbc53f4b4f3674174c4db73c9fe9c19876654d18..dd43a365c0a9c9be24d74e0d998f8c2355c38ba4 100644 (file)
@@ -50,9 +50,6 @@
 
 namespace cv { namespace cuda { namespace device
 {
-    // // namespace imgproc
-    // {
-
         __device__ void histogramAddAndSub8(int* H, const int * hist_colAdd,const int * hist_colSub){
             int tx = threadIdx.x;
             if (tx<8){
@@ -120,6 +117,25 @@ namespace cv { namespace cuda { namespace device
                 luc[tx]=0;
         }
 
+#define scanNeighbor(array, range, index, threadIndex)             \
+        {                                                          \
+            int v = 0;                                             \
+            if (index <= threadIndex && threadIndex < range)       \
+                v = array[threadIndex] + array[threadIndex-index]; \
+            __syncthreads();                                       \
+            if (index <= threadIndex && threadIndex < range)       \
+                array[threadIndex] = v;                            \
+        }
+#define findMedian(array, range, threadIndex, result, count, position) \
+        if (threadIndex < range)                                       \
+        {                                                              \
+            if (array[threadIndex+1] > position && array[threadIndex] <= position) \
+            {                                                          \
+                *result = threadIndex+1;                               \
+                *count  = array[threadIndex];                          \
+            }                                                          \
+        }
+
         __device__ void histogramMedianPar8LookupOnly(int* H,int* Hscan, const int medPos,int* retval, int* countAtMed){
             int tx=threadIdx.x;
             *retval=*countAtMed=0;
@@ -127,28 +143,14 @@ namespace cv { namespace cuda { namespace device
                 Hscan[tx]=H[tx];
             }
             __syncthreads();
-            if (1 <= tx && tx < 8 )
-                Hscan[tx]+=Hscan[tx-1];
+            scanNeighbor(Hscan, 8, 1, tx);
             __syncthreads();
-            if (2 <= tx && tx < 8 )
-                Hscan[tx]+=Hscan[tx-2];
+            scanNeighbor(Hscan, 8, 2, tx);
             __syncthreads();
-            if (4 <= tx && tx < 8 )
-                Hscan[tx]+=Hscan[tx-4];
+            scanNeighbor(Hscan, 8, 4, tx);
             __syncthreads();
 
-            if(tx<7){
-                if(Hscan[tx+1] > medPos && Hscan[tx] < medPos){
-                    *retval=tx+1;
-                    *countAtMed=Hscan[tx];
-                }
-                else if(Hscan[tx]==medPos){
-                  if(Hscan[tx+1]>medPos){
-                     *retval=tx+1;
-                     *countAtMed=Hscan[tx];
-                  }
-                }
-            }
+            findMedian(Hscan, 7, tx, retval, countAtMed, medPos);
         }
 
         __device__ void histogramMedianPar32LookupOnly(int* H,int* Hscan, const int medPos,int* retval, int* countAtMed){
@@ -158,33 +160,18 @@ namespace cv { namespace cuda { namespace device
                 Hscan[tx]=H[tx];
             }
             __syncthreads();
-            if ( 1 <= tx && tx < 32 )
-                Hscan[tx]+=Hscan[tx-1];
+            scanNeighbor(Hscan, 32,  1, tx);
             __syncthreads();
-            if ( 2 <= tx && tx < 32 )
-                Hscan[tx]+=Hscan[tx-2];
+            scanNeighbor(Hscan, 32,  2, tx);
             __syncthreads();
-            if ( 4 <= tx && tx < 32 )
-                Hscan[tx]+=Hscan[tx-4];
+            scanNeighbor(Hscan, 32,  4, tx);
             __syncthreads();
-            if ( 8 <= tx && tx < 32 )
-                Hscan[tx]+=Hscan[tx-8];
+            scanNeighbor(Hscan, 32,  8, tx);
             __syncthreads();
-            if ( 16 <= tx && tx < 32 )
-                Hscan[tx]+=Hscan[tx-16];
+            scanNeighbor(Hscan, 32, 16, tx);
             __syncthreads();
-            if(tx<31){
-                if(Hscan[tx+1] > medPos && Hscan[tx] < medPos){
-                    *retval=tx+1;
-                    *countAtMed=Hscan[tx];
-                }
-                else if(Hscan[tx]==medPos){
-                  if(Hscan[tx+1]>medPos){
-                      *retval=tx+1;
-                      *countAtMed=Hscan[tx];
-                  }
-                }
-            }
+
+            findMedian(Hscan, 31, tx, retval, countAtMed, medPos);
          }
 
     __global__ void cuMedianFilterMultiBlock(PtrStepSzb src, PtrStepSzb  dest, PtrStepSzi histPar, PtrStepSzi coarseHistGrid,int r, int medPos_)
@@ -283,7 +270,6 @@ namespace cv { namespace cuda { namespace device
             __syncthreads();
 
             histogramMultipleAdd8(HCoarse,histCoarse, 2*r+1);
-//            __syncthreads();
             int cols_m_1=cols-1;
 
              for(int j=r;j<cols-r;j++){
@@ -295,23 +281,24 @@ namespace cv { namespace cuda { namespace device
                 histogramMedianPar8LookupOnly(HCoarse,HCoarseScan,medPos, &firstBin,&countAtMed);
                 __syncthreads();
 
-                if ( luc[firstBin] <= (j-r))
+                int loopIndex = luc[firstBin];
+                if (loopIndex <= (j-r))
                 {
                     histogramClear32(HFine[firstBin]);
-                    for ( luc[firstBin] = j-r; luc[firstBin] < ::min(j+r+1,cols); luc[firstBin]++ ){
-                        histogramAdd32(HFine[firstBin], hist+(luc[firstBin]*256+(firstBin<<5) ) );
+                    for ( loopIndex = j-r; loopIndex < ::min(j+r+1,cols); loopIndex++ ){
+                        histogramAdd32(HFine[firstBin], hist+(loopIndex*256+(firstBin<<5) ) );
                     }
                 }
                 else{
-                    for ( ; luc[firstBin] < (j+r+1);luc[firstBin]++ ) {
+                    for ( ; loopIndex < (j+r+1);loopIndex++ ) {
                         histogramAddAndSub32(HFine[firstBin],
-                        hist+(::min(luc[firstBin],cols_m_1)*256+(firstBin<<5) ),
-                        hist+(::max(luc[firstBin]-2*r-1,0)*256+(firstBin<<5) ) );
+                        hist+(::min(loopIndex,cols_m_1)*256+(firstBin<<5) ),
+                        hist+(::max(loopIndex-2*r-1,0)*256+(firstBin<<5) ) );
                         __syncthreads();
-
                     }
                 }
                 __syncthreads();
+                luc[firstBin] = loopIndex;
 
                 int leftOver=medPos-countAtMed;
                 if(leftOver>=0){