From: vbystricky Date: Thu, 26 Jun 2014 11:43:40 +0000 (+0400) Subject: Change local size X-Git-Tag: submit/tizen_ivi/20141117.190038~2^2~302^2 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=1a73aa1f6a3e25cdbe749e4ceaf1a75ece0fb6fb;p=profile%2Fivi%2Fopencv.git Change local size --- diff --git a/modules/imgproc/src/filter.cpp b/modules/imgproc/src/filter.cpp index d23de91..6c0da79 100644 --- a/modules/imgproc/src/filter.cpp +++ b/modules/imgproc/src/filter.cpp @@ -3471,7 +3471,8 @@ static bool ocl_sepColFilter2D(const UMat & buf, UMat & dst, const Mat & kernelY return k.run(2, globalsize, localsize, false); } -const int optimizedSepFilterLocalSize = 16; +const int optimizedSepFilterLocalWidth = 16; +const int optimizedSepFilterLocalHeight = 8; static bool ocl_sepFilter2D_SinglePass(InputArray _src, OutputArray _dst, Mat row_kernel, Mat col_kernel, @@ -3491,8 +3492,8 @@ static bool ocl_sepFilter2D_SinglePass(InputArray _src, OutputArray _dst, borderType == BORDER_REFLECT_101)) return false; - size_t lt2[2] = { optimizedSepFilterLocalSize, optimizedSepFilterLocalSize }; - size_t gt2[2] = { lt2[0] * (1 + (size.width - 1) / lt2[0]), optimizedSepFilterLocalSize}; + size_t lt2[2] = { optimizedSepFilterLocalWidth, optimizedSepFilterLocalHeight }; + size_t gt2[2] = { lt2[0] * (1 + (size.width - 1) / lt2[0]), lt2[1]}; char cvt[2][40]; const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP", @@ -3584,8 +3585,8 @@ static bool ocl_sepFilter2D( InputArray _src, OutputArray _dst, int ddepth, } CV_OCL_RUN_(kernelY.cols <= 21 && kernelX.cols <= 21 && - imgSize.width > optimizedSepFilterLocalSize + anchor.x && - imgSize.height > optimizedSepFilterLocalSize + anchor.y && + imgSize.width > optimizedSepFilterLocalWidth + anchor.x && + imgSize.height > optimizedSepFilterLocalHeight + anchor.y && (!(borderType & BORDER_ISOLATED) || _src.offset() == 0) && anchor == Point(kernelX.cols >> 1, kernelY.cols >> 1) && (d.isIntel() || (d.isAMD() && !d.hostUnifiedMemory())), diff --git a/modules/imgproc/src/opencl/filterSep_singlePass.cl b/modules/imgproc/src/opencl/filterSep_singlePass.cl index 6c3bbdc..8c14f2d 100644 --- a/modules/imgproc/src/opencl/filterSep_singlePass.cl +++ b/modules/imgproc/src/opencl/filterSep_singlePass.cl @@ -127,10 +127,9 @@ __kernel void sep_filter(__global uchar* Src, int src_step, int srcOffsetX, int // and read my own source pixel into local memory // with account for extra border pixels, which will be read by starting workitems int clocY = liy; - int cSrcY = liy + srcOffsetY - RADIUSY; do { - int yb = cSrcY; + int yb = clocY + srcOffsetY - RADIUSY; EXTRAPOLATE(yb, (height)); int clocX = lix; @@ -147,7 +146,6 @@ __kernel void sep_filter(__global uchar* Src, int src_step, int srcOffsetX, int while(clocX < BLK_X+(RADIUSX*2)); clocY += BLK_Y; - cSrcY += BLK_Y; } while (clocY < BLK_Y+(RADIUSY*2)); barrier(CLK_LOCAL_MEM_FENCE); @@ -206,8 +204,8 @@ __kernel void sep_filter(__global uchar* Src, int src_step, int srcOffsetX, int } barrier(CLK_LOCAL_MEM_FENCE); - int cSrcY = y + BLK_Y + liy + srcOffsetY + RADIUSY; - EXTRAPOLATE(cSrcY, (height)); + int yb = y + liy + BLK_Y + srcOffsetY + RADIUSY; + EXTRAPOLATE(yb, (height)); clocX = lix; int cSrcX = x + srcOffsetX - RADIUSX; @@ -215,7 +213,7 @@ __kernel void sep_filter(__global uchar* Src, int src_step, int srcOffsetX, int { int xb = cSrcX; EXTRAPOLATE(xb,(width)); - lsmem[liy + 2*RADIUSY][clocX] = ELEM(xb, cSrcY, (width), (height), 0 ); + lsmem[liy + 2*RADIUSY][clocX] = ELEM(xb, yb, (width), (height), 0 ); clocX += BLK_X; cSrcX += BLK_X;