size_t localsize[2] = {0, 1};
ocl::Kernel kernel;
+ UMat src; Size wholeSize;
+ if (!isIsolatedBorder)
+ {
+ src = _src.getUMat();
+ Point ofs;
+ src.locateROI(wholeSize, ofs);
+ }
size_t maxWorkItemSizes[32]; device.maxWorkItemSizes(maxWorkItemSizes);
size_t tryWorkItems = maxWorkItemSizes[0];
int requiredLeft = (int)BLOCK_SIZE; // not this: anchor.x;
int requiredBottom = ksize.height - 1 - anchor.y;
int requiredRight = (int)BLOCK_SIZE; // not this: ksize.width - 1 - anchor.x;
- int h = sz.height;
- int w = sz.width;
+ int h = isIsolatedBorder ? sz.height : wholeSize.height;
+ int w = isIsolatedBorder ? sz.width : wholeSize.width;
bool extra_extrapolation = h < requiredTop || h < requiredBottom || w < requiredLeft || w < requiredRight;
if ((w < ksize.width) || (h < ksize.height))
_dst.create(sz, CV_MAKETYPE(ddepth, cn));
UMat dst = _dst.getUMat();
- UMat src = _src.getUMat();
+ if (src.empty())
+ src = _src.getUMat();
int idxArg = 0;
- idxArg = kernel.set(idxArg, ocl::KernelArg::ReadOnlyNoSize(src));
+ idxArg = kernel.set(idxArg, ocl::KernelArg::PtrReadOnly(src));
+ idxArg = kernel.set(idxArg, (int)src.step);
+
+ int srcOffsetX = (int)((src.offset % src.step) / src.elemSize());
+ int srcOffsetY = (int)(src.offset / src.step);
+ int srcEndX = (isIsolatedBorder ? (srcOffsetX + sz.width) : wholeSize.width);
+ int srcEndY = (isIsolatedBorder ? (srcOffsetY + sz.height) : wholeSize.height);
+ idxArg = kernel.set(idxArg, srcOffsetX);
+ idxArg = kernel.set(idxArg, srcOffsetY);
+ idxArg = kernel.set(idxArg, srcEndX);
+ idxArg = kernel.set(idxArg, srcEndY);
+
idxArg = kernel.set(idxArg, ocl::KernelArg::WriteOnly(dst));
float borderValue[4] = {0, 0, 0, 0};
double borderValueDouble[4] = {0, 0, 0, 0};
do \
{ \
if (x < minX) \
- x = -(x - minX) - 1 + delta; \
+ x = minX - (x - minX) - 1 + delta; \
else \
x = maxX - 1 - (x - maxX) - delta; \
} \
do \
{ \
if (y < minY) \
- y = -(y - minY) - 1 + delta; \
+ y = minY - (y - minY) - 1 + delta; \
else \
y = maxY - 1 - (y - maxY) - delta; \
} \
#endif
-inline INTERMEDIATE_TYPE readSrcPixel(int2 pos, __global const uchar* srcptr, int srcstep, int srcoffset, const struct RectCoords srcCoords
+inline INTERMEDIATE_TYPE readSrcPixel(int2 pos, __global const uchar* srcptr, int srcstep, const struct RectCoords srcCoords
#ifdef BORDER_CONSTANT
, SCALAR_TYPE borderValue
#endif
#endif
{
//__global TYPE* ptr = (__global TYPE*)((__global char*)src + pos.x * sizeof(TYPE) + pos.y * srcStepBytes);
- __global TYPE* ptr = (__global TYPE*)(srcptr + pos.y * srcstep + srcoffset + pos.x * sizeof(TYPE));
+ __global TYPE* ptr = (__global TYPE*)(srcptr + pos.y * srcstep + pos.x * sizeof(TYPE));
return CONVERT_TO_FPTYPE(*ptr);
}
else
if(pos.x >= 0 && pos.y >= 0 && pos.x < srcCoords.x2 && pos.y < srcCoords.y2)
{
//__global TYPE* ptr = (__global TYPE*)((__global char*)src + pos.x * sizeof(TYPE) + pos.y * srcStepBytes);
- __global TYPE* ptr = (__global TYPE*)(srcptr + pos.y * srcstep + srcoffset + pos.x * sizeof(TYPE));
+ __global TYPE* ptr = (__global TYPE*)(srcptr + pos.y * srcstep + pos.x * sizeof(TYPE));
return CONVERT_TO_FPTYPE(*ptr);
}
else
__kernel
__attribute__((reqd_work_group_size(LOCAL_SIZE, 1, 1)))
-void filter2D(__global const uchar* srcptr, int srcstep, int srcoffset,
- __global uchar* dstptr, int dststep, int dstoffset,
+void filter2D(__global const uchar* srcptr, int srcstep, int srcOffsetX, int srcOffsetY, int srcEndX, int srcEndY,
+ __global uchar* dstptr, int dststep, int dstoffset,
int rows, int cols,
#ifdef BORDER_CONSTANT
SCALAR_TYPE borderValue,
__constant FPTYPE* kernelData // transposed: [KERNEL_SIZE_X][KERNEL_SIZE_Y2_ALIGNED]
)
{
- const struct RectCoords srcCoords = {0, 0, cols, rows}; // for non-isolated border: offsetX, offsetY, wholeX, wholeY
- const struct RectCoords dstCoords = {0, 0, cols, rows};
+ const struct RectCoords srcCoords = {srcOffsetX, srcOffsetY, srcEndX, srcEndY}; // for non-isolated border: offsetX, offsetY, wholeX, wholeY
const int local_id = get_local_id(0);
const int x = local_id + (LOCAL_SIZE - (KERNEL_SIZE_X - 1)) * get_group_id(0) - ANCHOR_X;
int2 srcPos = (int2)(srcCoords.x1 + x, srcCoords.y1 + y - ANCHOR_Y);
- int2 pos = (int2)(dstCoords.x1 + x, dstCoords.y1 + y);
+ int2 pos = (int2)(x, y);
__global TYPE* dstPtr = (__global TYPE*)((__global char*)dstptr + pos.y * dststep + dstoffset + pos.x * sizeof(TYPE)); // Pointer can be out of bounds!
- bool writeResult = (local_id >= ANCHOR_X && local_id < LOCAL_SIZE - (KERNEL_SIZE_X - 1 - ANCHOR_X) &&
- pos.x >= dstCoords.x1 && pos.x < dstCoords.x2);
+ bool writeResult = ((local_id >= ANCHOR_X) && (local_id < LOCAL_SIZE - (KERNEL_SIZE_X - 1 - ANCHOR_X)) &&
+ (pos.x >= 0) && (pos.x < cols));
#if BLOCK_SIZE_Y > 1
bool readAllpixels = true;
int sy_index = 0; // current index in data[] array
- dstCoords.y2 = min(dstCoords.y2, pos.y + BLOCK_SIZE_Y);
+ dstRowsMax = min(rows, pos.y + BLOCK_SIZE_Y);
for (;
- pos.y < dstCoords.y2;
+ pos.y < dstRowsMax;
pos.y++,
dstPtr = (__global TYPE*)((__global char*)dstptr + dststep))
#endif
{
- ASSERT(pos.y < dstCoords.y2);
+ ASSERT(pos.y < dstRowsMax);
for (
#if BLOCK_SIZE_Y > 1
#endif
sy++, srcPos.y++)
{
- data[sy + sy_index] = readSrcPixel(srcPos, srcptr, srcstep, srcoffset, srcCoords
+ data[sy + sy_index] = readSrcPixel(srcPos, srcptr, srcstep, srcCoords
#ifdef BORDER_CONSTANT
, borderValue
#endif
if (writeResult)
{
- ASSERT(pos.y >= dstCoords.y1 && pos.y < dstCoords.y2);
*dstPtr = CONVERT_TO_TYPE(total_sum);
}
namespace cvtest {
namespace ocl {
-enum
-{
- noType = -1,
-};
-
-
/////////////////////////////////////////////////////////////////////////////////////////////////
// Filter2D
-PARAM_TEST_CASE(Filter2D, MatDepth, Channels, BorderType, bool)
+PARAM_TEST_CASE(Filter2D, MatDepth, Channels, BorderType, bool, bool)
{
- static const int kernelMinSize = 1;
+ static const int kernelMinSize = 2;
static const int kernelMaxSize = 10;
int type;
virtual void SetUp()
{
type = CV_MAKE_TYPE(GET_PARAM(0), GET_PARAM(1));
- borderType = GET_PARAM(2);
- useRoi = GET_PARAM(3);
+ borderType = GET_PARAM(2) | (GET_PARAM(3) ? BORDER_ISOLATED : 0);
+ useRoi = GET_PARAM(4);
}
void random_roi()
dsize = randomSize(1, MAX_VALUE);
Size ksize = randomSize(kernelMinSize, kernelMaxSize);
- kernel = randomMat(ksize, CV_MAKE_TYPE(((CV_64F == CV_MAT_DEPTH(type)) ? CV_64F : CV_32F), 1), -MAX_VALUE, MAX_VALUE);
+ Mat temp = randomMat(ksize, CV_MAKE_TYPE(((CV_64F == CV_MAT_DEPTH(type)) ? CV_64F : CV_32F), 1), -MAX_VALUE, MAX_VALUE);
+ cv::normalize(temp, kernel, 1.0, 0.0, NORM_L1);
- Size roiSize = randomSize(1, MAX_VALUE);
+ //Size roiSize = randomSize(ksize.width, MAX_VALUE, ksize.height, MAX_VALUE);
+ Size roiSize(1024, 1024);
Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
randomSubMat(src, src_roi, roiSize, srcBorder, type, -MAX_VALUE, MAX_VALUE);
Border dstBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
randomSubMat(dst, dst_roi, dsize, dstBorder, type, -MAX_VALUE, MAX_VALUE);
- anchor.x = anchor.y = -1;
+ anchor.x = randomInt(-1, ksize.width);
+ anchor.y = randomInt(-1, ksize.height);
UMAT_UPLOAD_INPUT_PARAMETER(src)
UMAT_UPLOAD_OUTPUT_PARAMETER(dst)
(BorderType)BORDER_REPLICATE,
(BorderType)BORDER_REFLECT,
(BorderType)BORDER_REFLECT_101),
- Bool())
+ Bool(), // BORDER_ISOLATED
+ Bool() // ROI
+ )
);