#endif
#endif
+#if cn != 3
+#define loadpix(addr) *(__global const ST *)(addr)
+#define storepix(val, addr) *(__global DT *)(addr) = val
+#define SRCSIZE (int)sizeof(ST)
+#define DSTSIZE (int)sizeof(DT)
+#else
+#define loadpix(addr) vload3(0, (__global const ST1 *)(addr))
+#define storepix(val, addr) vstore3(val, 0, (__global DT1 *)(addr))
+#define SRCSIZE (int)sizeof(ST1)*cn
+#define DSTSIZE (int)sizeof(DT1)*cn
+#endif
+
#ifdef BORDER_CONSTANT
#elif defined BORDER_REPLICATE
#define EXTRAPOLATE(x, y, minX, minY, maxX, maxY) \
if (pos.x >= 0 && pos.y >= 0 && pos.x < srcCoords.x2 && pos.y < srcCoords.y2)
#endif
{
- int src_index = mad24(pos.y, src_step, pos.x * (int)sizeof(ST));
- WT value = convertToWT(*(__global const ST *)(srcptr + src_index));
+ int src_index = mad24(pos.y, src_step, pos.x * SRCSIZE);
+ WT value = convertToWT(loadpix(srcptr + src_index));
return PROCESS_ELEM(value);
}
#endif
srcCoords.x2, srcCoords.y2);
- int src_index = mad24(selected_row, src_step, selected_col * (int)sizeof(ST));
- WT value = convertToWT(*(__global const ST *)(srcptr + src_index));
+ int src_index = mad24(selected_row, src_step, selected_col * SRCSIZE);
+ WT value = convertToWT(loadpix(srcptr + src_index));
return PROCESS_ELEM(value);
#endif
sumOfCols[local_id] = tmp_sum;
barrier(CLK_LOCAL_MEM_FENCE);
- int dst_index = mad24(y, dst_step, x * (int)sizeof(DT) + dst_offset);
+ int dst_index = mad24(y, dst_step, mad24(x, DSTSIZE, dst_offset));
__global DT * dst = (__global DT *)(dstptr + dst_index);
int sy_index = 0; // current index in data[] array
total_sum += sumOfCols[local_id + sx - ANCHOR_X];
#ifdef NORMALIZE
- dst[0] = convertToDT((WT)(alpha) * total_sum);
+ DT dstval = convertToDT((WT)(alpha) * total_sum);
#else
- dst[0] = convertToDT(total_sum);
+ DT dstval = convertToDT(total_sum);
#endif
+ storepix(dstval, dst);
}
barrier(CLK_LOCAL_MEM_FENCE);
//M*/
#include "precomp.hpp"
+#define CV_OPENCL_RUN_ASSERT
#include "opencl_kernels.hpp"
/*
if (ddepth < 0)
ddepth = sdepth;
- if (!(cn == 1 || cn == 2 || cn == 4) || (!doubleSupport && (sdepth == CV_64F || ddepth == CV_64F)) ||
+ if (cn > 4 || (!doubleSupport && (sdepth == CV_64F || ddepth == CV_64F)) ||
_src.offset() % esz != 0 || _src.step() % esz != 0)
+ {
+ printf("!!!!!!!!!!!!!!!!!!!!!!!\n");
return false;
+ }
if (anchor.x < 0)
anchor.x = ksize.width / 2;
return false;
char cvt[2][50];
- String opts = format("-D LOCAL_SIZE_X=%d -D BLOCK_SIZE_Y=%d -D ST=%s -D DT=%s -D WT=%s -D convertToDT=%s -D convertToWT=%s "
- "-D ANCHOR_X=%d -D ANCHOR_Y=%d -D KERNEL_SIZE_X=%d -D KERNEL_SIZE_Y=%d -D %s%s%s%s%s",
+ String opts = format("-D LOCAL_SIZE_X=%d -D BLOCK_SIZE_Y=%d -D ST=%s -D DT=%s -D WT=%s -D convertToDT=%s -D convertToWT=%s"
+ " -D ANCHOR_X=%d -D ANCHOR_Y=%d -D KERNEL_SIZE_X=%d -D KERNEL_SIZE_Y=%d -D %s%s%s%s%s"
+ " -D ST1=%s -D DT1=%s -D cn=%d",
BLOCK_SIZE_X, BLOCK_SIZE_Y, ocl::typeToStr(type), ocl::typeToStr(CV_MAKE_TYPE(ddepth, cn)),
ocl::typeToStr(CV_MAKE_TYPE(wdepth, cn)),
ocl::convertTypeStr(wdepth, ddepth, cn, cvt[0]),
ocl::convertTypeStr(sdepth, wdepth, cn, cvt[1]),
anchor.x, anchor.y, ksize.width, ksize.height, borderMap[borderType],
isolated ? " -D BORDER_ISOLATED" : "", doubleSupport ? " -D DOUBLE_SUPPORT" : "",
- normalize ? " -D NORMALIZE" : "", sqr ? " -D SQR" : "");
+ normalize ? " -D NORMALIZE" : "", sqr ? " -D SQR" : "",
+ ocl::typeToStr(sdepth), ocl::typeToStr(ddepth), cn);
localsize[0] = BLOCK_SIZE_X;
globalsize[0] = DIVUP(size.width, BLOCK_SIZE_X - (ksize.width - 1)) * BLOCK_SIZE_X;
OCL_INSTANTIATE_TEST_CASE_P(ImageProc, BoxFilter,
Combine(
Values(CV_8U, CV_16U, CV_16S, CV_32S, CV_32F),
- Values(1, 2, 4),
+ OCL_ALL_CHANNELS,
Values((BorderType)BORDER_CONSTANT,
(BorderType)BORDER_REPLICATE,
(BorderType)BORDER_REFLECT,
OCL_INSTANTIATE_TEST_CASE_P(ImageProc, SqrBoxFilter,
Combine(
Values(CV_8U, CV_16U, CV_16S, CV_32F, CV_64F),
- Values(1, 2, 4),
+ OCL_ALL_CHANNELS,
Values((BorderType)BORDER_CONSTANT,
(BorderType)BORDER_REPLICATE,
(BorderType)BORDER_REFLECT,