static bool ocl_copyMakeBorder( InputArray _src, OutputArray _dst, int top, int bottom,
int left, int right, int borderType, const Scalar& value )
{
- int type = _src.type(), cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type);
+ int type = _src.type(), cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type),
+ rowsPerWI = ocl::Device::getDefault().isIntel() ? 4 : 1;
bool isolated = (borderType & BORDER_ISOLATED) != 0;
borderType &= ~cv::BORDER_ISOLATED;
const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP", "BORDER_REFLECT_101" };
int scalarcn = cn == 3 ? 4 : cn;
int sctype = CV_MAKETYPE(depth, scalarcn);
- String buildOptions = format(
- "-D T=%s -D %s "
- "-D T1=%s -D cn=%d -D ST=%s",
- ocl::memopTypeToStr(type), borderMap[borderType],
- ocl::memopTypeToStr(depth), cn, ocl::memopTypeToStr(sctype)
- );
+ String buildOptions = format("-D T=%s -D %s -D T1=%s -D cn=%d -D ST=%s -D rowsPerWI=%d",
+ ocl::memopTypeToStr(type), borderMap[borderType],
+ ocl::memopTypeToStr(depth), cn,
+ ocl::memopTypeToStr(sctype), rowsPerWI);
ocl::Kernel k("copyMakeBorder", ocl::core::copymakeborder_oclsrc, buildOptions);
if (k.empty())
k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst),
top, left, ocl::KernelArg::Constant(Mat(1, 1, sctype, value)));
- size_t globalsize[2] = { dst.cols, dst.rows };
+ size_t globalsize[2] = { dst.cols, (dst.rows + rowsPerWI - 1) / rowsPerWI };
return k.run(2, globalsize, NULL, false);
}
#endif
#ifdef BORDER_CONSTANT
-#define EXTRAPOLATE(x, y, v) v = scalar;
+#define EXTRAPOLATE(x, cols) \
+ ;
#elif defined BORDER_REPLICATE
-#define EXTRAPOLATE(x, y, v) \
- { \
- x = clamp(x, 0, src_cols - 1); \
- y = clamp(y, 0, src_rows - 1); \
- v = loadpix(srcptr + mad24(y, src_step, mad24(x, TSIZE, src_offset))); \
- }
+#define EXTRAPOLATE(x, cols) \
+ x = clamp(x, 0, cols - 1);
#elif defined BORDER_WRAP
-#define EXTRAPOLATE(x, y, v) \
+#define EXTRAPOLATE(x, cols) \
{ \
if (x < 0) \
- x -= ((x - src_cols + 1) / src_cols) * src_cols; \
- if (x >= src_cols) \
- x %= src_cols; \
- \
- if (y < 0) \
- y -= ((y - src_rows + 1) / src_rows) * src_rows; \
- if( y >= src_rows ) \
- y %= src_rows; \
- v = loadpix(srcptr + mad24(y, src_step, mad24(x, TSIZE, src_offset))); \
+ x -= ((x - cols + 1) / cols) * cols; \
+ if (x >= cols) \
+ x %= cols; \
}
#elif defined(BORDER_REFLECT) || defined(BORDER_REFLECT_101)
#ifdef BORDER_REFLECT
#else
#define DELTA int delta = 1
#endif
-#define EXTRAPOLATE(x, y, v) \
+#define EXTRAPOLATE(x, cols) \
{ \
DELTA; \
- if (src_cols == 1) \
+ if (cols == 1) \
x = 0; \
else \
do \
if( x < 0 ) \
x = -x - 1 + delta; \
else \
- x = src_cols - 1 - (x - src_cols) - delta; \
- } \
- while (x >= src_cols || x < 0); \
- \
- if (src_rows == 1) \
- y = 0; \
- else \
- do \
- { \
- if( y < 0 ) \
- y = -y - 1 + delta; \
- else \
- y = src_rows - 1 - (y - src_rows) - delta; \
+ x = cols - 1 - (x - cols) - delta; \
} \
- while (y >= src_rows || y < 0); \
- v = loadpix(srcptr + mad24(y, src_step, mad24(x, TSIZE, src_offset))); \
+ while (x >= cols || x < 0); \
}
#else
-#error No extrapolation method
+#error "No extrapolation method"
#endif
-#define NEED_EXTRAPOLATION(gx, gy) (gx >= src_cols || gy >= src_rows || gx < 0 || gy < 0)
+#define NEED_EXTRAPOLATION(x, cols) (x >= cols || x < 0)
__kernel void copyMakeBorder(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols,
__global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols,
int top, int left, ST nVal)
{
int x = get_global_id(0);
- int y = get_global_id(1);
+ int y0 = get_global_id(1) * rowsPerWI;
#ifdef BORDER_CONSTANT
T scalar = convertScalar(nVal);
#endif
- if (x < dst_cols && y < dst_rows)
+ if (x < dst_cols)
{
- int src_x = x - left;
- int src_y = y - top;
+ int src_x = x - left, src_y;
+ int dst_index = mad24(y0, dst_step, mad24(x, (int)TSIZE, dst_offset));
- int dst_index = mad24(y, dst_step, mad24(x, (int)TSIZE, dst_offset));
- __global T * dst = (__global T *)(dstptr + dst_index);
-
- T v;
- if (NEED_EXTRAPOLATION(src_x, src_y))
+ if (NEED_EXTRAPOLATION(src_x, src_cols))
{
- EXTRAPOLATE(src_x, src_y, v)
+#ifdef BORDER_CONSTANT
+ for (int y = y0, y1 = min(y0 + rowsPerWI, dst_rows); y < y1; ++y, dst_index += dst_step)
+ storepix(scalar, dstptr + dst_index);
+ return;
+#endif
+ EXTRAPOLATE(src_x, src_cols)
}
- else
+ src_x = mad24(src_x, TSIZE, src_offset);
+
+ for (int y = y0, y1 = min(y0 + rowsPerWI, dst_rows); y < y1; ++y, dst_index += dst_step)
{
- int src_index = mad24(src_y, src_step, mad24(src_x, TSIZE, src_offset));
- v = loadpix(srcptr + src_index);
+ src_y = y - top;
+ if (NEED_EXTRAPOLATION(src_y, src_rows))
+ {
+ EXTRAPOLATE(src_y, src_rows)
+#ifdef BORDER_CONSTANT
+ storepix(scalar, dstptr + dst_index);
+ continue;
+#endif
+ }
+ int src_index = mad24(src_y, src_step, src_x);
+ storepix(loadpix(srcptr + src_index), dstptr + dst_index);
}
- storepix(v, dst);
}
}