return k.run(2, globalsize, localsize, false);
}
-const int optimizedSepFilterLocalSize = 16;
+const int optimizedSepFilterLocalWidth = 16;
+const int optimizedSepFilterLocalHeight = 8;
static bool ocl_sepFilter2D_SinglePass(InputArray _src, OutputArray _dst,
Mat row_kernel, Mat col_kernel,
borderType == BORDER_REFLECT_101))
return false;
- size_t lt2[2] = { optimizedSepFilterLocalSize, optimizedSepFilterLocalSize };
- size_t gt2[2] = { lt2[0] * (1 + (size.width - 1) / lt2[0]), optimizedSepFilterLocalSize};
+ size_t lt2[2] = { optimizedSepFilterLocalWidth, optimizedSepFilterLocalHeight };
+ size_t gt2[2] = { lt2[0] * (1 + (size.width - 1) / lt2[0]), lt2[1]};
char cvt[2][40];
const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP",
}
CV_OCL_RUN_(kernelY.cols <= 21 && kernelX.cols <= 21 &&
- imgSize.width > optimizedSepFilterLocalSize + anchor.x &&
- imgSize.height > optimizedSepFilterLocalSize + anchor.y &&
+ imgSize.width > optimizedSepFilterLocalWidth + anchor.x &&
+ imgSize.height > optimizedSepFilterLocalHeight + anchor.y &&
(!(borderType & BORDER_ISOLATED) || _src.offset() == 0) &&
anchor == Point(kernelX.cols >> 1, kernelY.cols >> 1) &&
(d.isIntel() || (d.isAMD() && !d.hostUnifiedMemory())),
// and read my own source pixel into local memory
// with account for extra border pixels, which will be read by starting workitems
int clocY = liy;
- int cSrcY = liy + srcOffsetY - RADIUSY;
do
{
- int yb = cSrcY;
+ int yb = clocY + srcOffsetY - RADIUSY;
EXTRAPOLATE(yb, (height));
int clocX = lix;
while(clocX < BLK_X+(RADIUSX*2));
clocY += BLK_Y;
- cSrcY += BLK_Y;
}
while (clocY < BLK_Y+(RADIUSY*2));
barrier(CLK_LOCAL_MEM_FENCE);
}
barrier(CLK_LOCAL_MEM_FENCE);
- int cSrcY = y + BLK_Y + liy + srcOffsetY + RADIUSY;
- EXTRAPOLATE(cSrcY, (height));
+ int yb = y + liy + BLK_Y + srcOffsetY + RADIUSY;
+ EXTRAPOLATE(yb, (height));
clocX = lix;
int cSrcX = x + srcOffsetX - RADIUSX;
{
int xb = cSrcX;
EXTRAPOLATE(xb,(width));
- lsmem[liy + 2*RADIUSY][clocX] = ELEM(xb, cSrcY, (width), (height), 0 );
+ lsmem[liy + 2*RADIUSY][clocX] = ELEM(xb, yb, (width), (height), 0 );
clocX += BLK_X;
cSrcX += BLK_X;