namespace cv { namespace cuda { namespace device
{
- // // namespace imgproc
- // {
-
__device__ void histogramAddAndSub8(int* H, const int * hist_colAdd,const int * hist_colSub){
int tx = threadIdx.x;
if (tx<8){
luc[tx]=0;
}
+#define scanNeighbor(array, range, index, threadIndex) \
+ { \
+ int v = 0; \
+ if (index <= threadIndex && threadIndex < range) \
+ v = array[threadIndex] + array[threadIndex-index]; \
+ __syncthreads(); \
+ if (index <= threadIndex && threadIndex < range) \
+ array[threadIndex] = v; \
+ }
+#define findMedian(array, range, threadIndex, result, count, position) \
+ if (threadIndex < range) \
+ { \
+ if (array[threadIndex+1] > position && array[threadIndex] <= position) \
+ { \
+ *result = threadIndex+1; \
+ *count = array[threadIndex]; \
+ } \
+ }
+
__device__ void histogramMedianPar8LookupOnly(int* H,int* Hscan, const int medPos,int* retval, int* countAtMed){
int tx=threadIdx.x;
*retval=*countAtMed=0;
Hscan[tx]=H[tx];
}
__syncthreads();
- if (1 <= tx && tx < 8 )
- Hscan[tx]+=Hscan[tx-1];
+ scanNeighbor(Hscan, 8, 1, tx);
__syncthreads();
- if (2 <= tx && tx < 8 )
- Hscan[tx]+=Hscan[tx-2];
+ scanNeighbor(Hscan, 8, 2, tx);
__syncthreads();
- if (4 <= tx && tx < 8 )
- Hscan[tx]+=Hscan[tx-4];
+ scanNeighbor(Hscan, 8, 4, tx);
__syncthreads();
- if(tx<7){
- if(Hscan[tx+1] > medPos && Hscan[tx] < medPos){
- *retval=tx+1;
- *countAtMed=Hscan[tx];
- }
- else if(Hscan[tx]==medPos){
- if(Hscan[tx+1]>medPos){
- *retval=tx+1;
- *countAtMed=Hscan[tx];
- }
- }
- }
+ findMedian(Hscan, 7, tx, retval, countAtMed, medPos);
}
__device__ void histogramMedianPar32LookupOnly(int* H,int* Hscan, const int medPos,int* retval, int* countAtMed){
Hscan[tx]=H[tx];
}
__syncthreads();
- if ( 1 <= tx && tx < 32 )
- Hscan[tx]+=Hscan[tx-1];
+ scanNeighbor(Hscan, 32, 1, tx);
__syncthreads();
- if ( 2 <= tx && tx < 32 )
- Hscan[tx]+=Hscan[tx-2];
+ scanNeighbor(Hscan, 32, 2, tx);
__syncthreads();
- if ( 4 <= tx && tx < 32 )
- Hscan[tx]+=Hscan[tx-4];
+ scanNeighbor(Hscan, 32, 4, tx);
__syncthreads();
- if ( 8 <= tx && tx < 32 )
- Hscan[tx]+=Hscan[tx-8];
+ scanNeighbor(Hscan, 32, 8, tx);
__syncthreads();
- if ( 16 <= tx && tx < 32 )
- Hscan[tx]+=Hscan[tx-16];
+ scanNeighbor(Hscan, 32, 16, tx);
__syncthreads();
- if(tx<31){
- if(Hscan[tx+1] > medPos && Hscan[tx] < medPos){
- *retval=tx+1;
- *countAtMed=Hscan[tx];
- }
- else if(Hscan[tx]==medPos){
- if(Hscan[tx+1]>medPos){
- *retval=tx+1;
- *countAtMed=Hscan[tx];
- }
- }
- }
+
+ findMedian(Hscan, 31, tx, retval, countAtMed, medPos);
}
__global__ void cuMedianFilterMultiBlock(PtrStepSzb src, PtrStepSzb dest, PtrStepSzi histPar, PtrStepSzi coarseHistGrid,int r, int medPos_)
__syncthreads();
histogramMultipleAdd8(HCoarse,histCoarse, 2*r+1);
-// __syncthreads();
int cols_m_1=cols-1;
for(int j=r;j<cols-r;j++){
histogramMedianPar8LookupOnly(HCoarse,HCoarseScan,medPos, &firstBin,&countAtMed);
__syncthreads();
- if ( luc[firstBin] <= (j-r))
+ int loopIndex = luc[firstBin];
+ if (loopIndex <= (j-r))
{
histogramClear32(HFine[firstBin]);
- for ( luc[firstBin] = j-r; luc[firstBin] < ::min(j+r+1,cols); luc[firstBin]++ ){
- histogramAdd32(HFine[firstBin], hist+(luc[firstBin]*256+(firstBin<<5) ) );
+ for ( loopIndex = j-r; loopIndex < ::min(j+r+1,cols); loopIndex++ ){
+ histogramAdd32(HFine[firstBin], hist+(loopIndex*256+(firstBin<<5) ) );
}
}
else{
- for ( ; luc[firstBin] < (j+r+1);luc[firstBin]++ ) {
+ for ( ; loopIndex < (j+r+1);loopIndex++ ) {
histogramAddAndSub32(HFine[firstBin],
- hist+(::min(luc[firstBin],cols_m_1)*256+(firstBin<<5) ),
- hist+(::max(luc[firstBin]-2*r-1,0)*256+(firstBin<<5) ) );
+ hist+(::min(loopIndex,cols_m_1)*256+(firstBin<<5) ),
+ hist+(::max(loopIndex-2*r-1,0)*256+(firstBin<<5) ) );
__syncthreads();
-
}
}
__syncthreads();
+ luc[firstBin] = loopIndex;
int leftOver=medPos-countAtMed;
if(leftOver>=0){