From dca401d4ccb2e4feaa2fd7c7d8aaa9fc06f70012 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Thu, 27 Feb 2014 16:25:26 +0400 Subject: [PATCH] ocl: pyrUp/pyrDown 3-channel --- modules/imgproc/perf/opencl/perf_pyramid.cpp | 6 +-- modules/imgproc/src/opencl/pyr_down.cl | 76 ++++++++++++++++------------ modules/imgproc/src/opencl/pyr_up.cl | 18 +++++-- modules/imgproc/src/pyramids.cpp | 36 ++++++++----- modules/imgproc/test/ocl/test_pyramids.cpp | 4 +- 5 files changed, 84 insertions(+), 56 deletions(-) diff --git a/modules/imgproc/perf/opencl/perf_pyramid.cpp b/modules/imgproc/perf/opencl/perf_pyramid.cpp index 55bb067..5975845 100644 --- a/modules/imgproc/perf/opencl/perf_pyramid.cpp +++ b/modules/imgproc/perf/opencl/perf_pyramid.cpp @@ -57,7 +57,7 @@ namespace ocl { typedef Size_MatType PyrDownFixture; OCL_PERF_TEST_P(PyrDownFixture, PyrDown, - ::testing::Combine(OCL_TEST_SIZES, OCL_TEST_TYPES)) + ::testing::Combine(OCL_TEST_SIZES, OCL_TEST_TYPES_134)) { const Size_MatType_t params = GetParam(); const Size srcSize = get<0>(params); @@ -81,7 +81,7 @@ OCL_PERF_TEST_P(PyrDownFixture, PyrDown, typedef Size_MatType PyrUpFixture; OCL_PERF_TEST_P(PyrUpFixture, PyrUp, - ::testing::Combine(OCL_TEST_SIZES, OCL_TEST_TYPES)) + ::testing::Combine(OCL_TEST_SIZES, OCL_TEST_TYPES_134)) { const Size_MatType_t params = GetParam(); const Size srcSize = get<0>(params); @@ -105,7 +105,7 @@ OCL_PERF_TEST_P(PyrUpFixture, PyrUp, typedef Size_MatType BuildPyramidFixture; OCL_PERF_TEST_P(BuildPyramidFixture, BuildPyramid, - ::testing::Combine(OCL_TEST_SIZES, OCL_TEST_TYPES)) + ::testing::Combine(OCL_TEST_SIZES, OCL_TEST_TYPES_134)) { const Size_MatType_t params = GetParam(); const Size srcSize = get<0>(params); diff --git a/modules/imgproc/src/opencl/pyr_down.cl b/modules/imgproc/src/opencl/pyr_down.cl index 745813f..6ba0cc6 100644 --- a/modules/imgproc/src/opencl/pyr_down.cl +++ b/modules/imgproc/src/opencl/pyr_down.cl @@ -51,6 +51,16 @@ #endif #endif +#if cn != 3 +#define loadpix(addr) *(__global const T*)(addr) +#define storepix(val, addr) *(__global T*)(addr) = (val) +#define PIXSIZE ((int)sizeof(T)) +#else +#define loadpix(addr) vload3(0, (__global const T1*)(addr)) +#define storepix(val, addr) vstore3((val), 0, (__global T1*)(addr)) +#define PIXSIZE ((int)sizeof(T1)*3) +#endif + #define noconvert inline int idx_row_low(int y, int last_row) @@ -90,8 +100,8 @@ __kernel void pyrDown(__global const uchar * src, int src_step, int src_offset, const int y = get_group_id(1); __local FT smem[256 + 4]; - __global T * dstData = (__global T *)(dst + dst_offset); - __global const uchar * srcData = (__global const uchar*)(src + src_offset); + __global uchar * dstData = dst + dst_offset; + __global const uchar * srcData = src + src_offset; FT sum; FT co1 = 0.375f; @@ -104,11 +114,11 @@ __kernel void pyrDown(__global const uchar * src, int src_step, int src_offset, if (src_y >= 2 && src_y < src_rows - 2 && x >= 2 && x < src_cols - 2) { - sum = co3 * convertToFT(((__global T*)(srcData + (src_y - 2) * src_step))[x]); - sum = sum + co2 * convertToFT(((__global T*)(srcData + (src_y - 1) * src_step))[x]); - sum = sum + co1 * convertToFT(((__global T*)(srcData + (src_y ) * src_step))[x]); - sum = sum + co2 * convertToFT(((__global T*)(srcData + (src_y + 1) * src_step))[x]); - sum = sum + co3 * convertToFT(((__global T*)(srcData + (src_y + 2) * src_step))[x]); + sum = co3 * convertToFT(loadpix(srcData + (src_y - 2) * src_step + x * PIXSIZE)); + sum = sum + co2 * convertToFT(loadpix(srcData + (src_y - 1) * src_step + x * PIXSIZE)); + sum = sum + co1 * convertToFT(loadpix(srcData + (src_y ) * src_step + x * PIXSIZE)); + sum = sum + co2 * convertToFT(loadpix(srcData + (src_y + 1) * src_step + x * PIXSIZE)); + sum = sum + co3 * convertToFT(loadpix(srcData + (src_y + 2) * src_step + x * PIXSIZE)); smem[2 + get_local_id(0)] = sum; @@ -116,11 +126,11 @@ __kernel void pyrDown(__global const uchar * src, int src_step, int src_offset, { const int left_x = x - 2; - sum = co3 * convertToFT(((__global T*)(srcData + (src_y - 2) * src_step))[left_x]); - sum = sum + co2 * convertToFT(((__global T*)(srcData + (src_y - 1) * src_step))[left_x]); - sum = sum + co1 * convertToFT(((__global T*)(srcData + (src_y ) * src_step))[left_x]); - sum = sum + co2 * convertToFT(((__global T*)(srcData + (src_y + 1) * src_step))[left_x]); - sum = sum + co3 * convertToFT(((__global T*)(srcData + (src_y + 2) * src_step))[left_x]); + sum = co3 * convertToFT(loadpix(srcData + (src_y - 2) * src_step + left_x * PIXSIZE)); + sum = sum + co2 * convertToFT(loadpix(srcData + (src_y - 1) * src_step + left_x * PIXSIZE)); + sum = sum + co1 * convertToFT(loadpix(srcData + (src_y ) * src_step + left_x * PIXSIZE)); + sum = sum + co2 * convertToFT(loadpix(srcData + (src_y + 1) * src_step + left_x * PIXSIZE)); + sum = sum + co3 * convertToFT(loadpix(srcData + (src_y + 2) * src_step + left_x * PIXSIZE)); smem[get_local_id(0)] = sum; } @@ -129,11 +139,11 @@ __kernel void pyrDown(__global const uchar * src, int src_step, int src_offset, { const int right_x = x + 2; - sum = co3 * convertToFT(((__global T*)(srcData + (src_y - 2) * src_step))[right_x]); - sum = sum + co2 * convertToFT(((__global T*)(srcData + (src_y - 1) * src_step))[right_x]); - sum = sum + co1 * convertToFT(((__global T*)(srcData + (src_y ) * src_step))[right_x]); - sum = sum + co2 * convertToFT(((__global T*)(srcData + (src_y + 1) * src_step))[right_x]); - sum = sum + co3 * convertToFT(((__global T*)(srcData + (src_y + 2) * src_step))[right_x]); + sum = co3 * convertToFT(loadpix(srcData + (src_y - 2) * src_step + right_x * PIXSIZE)); + sum = sum + co2 * convertToFT(loadpix(srcData + (src_y - 1) * src_step + right_x * PIXSIZE)); + sum = sum + co1 * convertToFT(loadpix(srcData + (src_y ) * src_step + right_x * PIXSIZE)); + sum = sum + co2 * convertToFT(loadpix(srcData + (src_y + 1) * src_step + right_x * PIXSIZE)); + sum = sum + co3 * convertToFT(loadpix(srcData + (src_y + 2) * src_step + right_x * PIXSIZE)); smem[4 + get_local_id(0)] = sum; } @@ -142,11 +152,11 @@ __kernel void pyrDown(__global const uchar * src, int src_step, int src_offset, { int col = idx_col(x, last_col); - sum = co3 * convertToFT(((__global T*)(srcData + idx_row(src_y - 2, last_row) * src_step))[col]); - sum = sum + co2 * convertToFT(((__global T*)(srcData + idx_row(src_y - 1, last_row) * src_step))[col]); - sum = sum + co1 * convertToFT(((__global T*)(srcData + idx_row(src_y , last_row) * src_step))[col]); - sum = sum + co2 * convertToFT(((__global T*)(srcData + idx_row(src_y + 1, last_row) * src_step))[col]); - sum = sum + co3 * convertToFT(((__global T*)(srcData + idx_row(src_y + 2, last_row) * src_step))[col]); + sum = co3 * convertToFT(loadpix(srcData + idx_row(src_y - 2, last_row) * src_step + col * PIXSIZE)); + sum = sum + co2 * convertToFT(loadpix(srcData + idx_row(src_y - 1, last_row) * src_step + col * PIXSIZE)); + sum = sum + co1 * convertToFT(loadpix(srcData + idx_row(src_y , last_row) * src_step + col * PIXSIZE)); + sum = sum + co2 * convertToFT(loadpix(srcData + idx_row(src_y + 1, last_row) * src_step + col * PIXSIZE)); + sum = sum + co3 * convertToFT(loadpix(srcData + idx_row(src_y + 2, last_row) * src_step + col * PIXSIZE)); smem[2 + get_local_id(0)] = sum; @@ -156,11 +166,11 @@ __kernel void pyrDown(__global const uchar * src, int src_step, int src_offset, col = idx_col(left_x, last_col); - sum = co3 * convertToFT(((__global T*)(srcData + idx_row(src_y - 2, last_row) * src_step))[col]); - sum = sum + co2 * convertToFT(((__global T*)(srcData + idx_row(src_y - 1, last_row) * src_step))[col]); - sum = sum + co1 * convertToFT(((__global T*)(srcData + idx_row(src_y , last_row) * src_step))[col]); - sum = sum + co2 * convertToFT(((__global T*)(srcData + idx_row(src_y + 1, last_row) * src_step))[col]); - sum = sum + co3 * convertToFT(((__global T*)(srcData + idx_row(src_y + 2, last_row) * src_step))[col]); + sum = co3 * convertToFT(loadpix(srcData + idx_row(src_y - 2, last_row) * src_step + col * PIXSIZE)); + sum = sum + co2 * convertToFT(loadpix(srcData + idx_row(src_y - 1, last_row) * src_step + col * PIXSIZE)); + sum = sum + co1 * convertToFT(loadpix(srcData + idx_row(src_y , last_row) * src_step + col * PIXSIZE)); + sum = sum + co2 * convertToFT(loadpix(srcData + idx_row(src_y + 1, last_row) * src_step + col * PIXSIZE)); + sum = sum + co3 * convertToFT(loadpix(srcData + idx_row(src_y + 2, last_row) * src_step + col * PIXSIZE)); smem[get_local_id(0)] = sum; } @@ -171,11 +181,11 @@ __kernel void pyrDown(__global const uchar * src, int src_step, int src_offset, col = idx_col(right_x, last_col); - sum = co3 * convertToFT(((__global T*)(srcData + idx_row(src_y - 2, last_row) * src_step))[col]); - sum = sum + co2 * convertToFT(((__global T*)(srcData + idx_row(src_y - 1, last_row) * src_step))[col]); - sum = sum + co1 * convertToFT(((__global T*)(srcData + idx_row(src_y , last_row) * src_step))[col]); - sum = sum + co2 * convertToFT(((__global T*)(srcData + idx_row(src_y + 1, last_row) * src_step))[col]); - sum = sum + co3 * convertToFT(((__global T*)(srcData + idx_row(src_y + 2, last_row) * src_step))[col]); + sum = co3 * convertToFT(loadpix(srcData + idx_row(src_y - 2, last_row) * src_step + col * PIXSIZE)); + sum = sum + co2 * convertToFT(loadpix(srcData + idx_row(src_y - 1, last_row) * src_step + col * PIXSIZE)); + sum = sum + co1 * convertToFT(loadpix(srcData + idx_row(src_y , last_row) * src_step + col * PIXSIZE)); + sum = sum + co2 * convertToFT(loadpix(srcData + idx_row(src_y + 1, last_row) * src_step + col * PIXSIZE)); + sum = sum + co3 * convertToFT(loadpix(srcData + idx_row(src_y + 2, last_row) * src_step + col * PIXSIZE)); smem[4 + get_local_id(0)] = sum; } @@ -196,7 +206,7 @@ __kernel void pyrDown(__global const uchar * src, int src_step, int src_offset, const int dst_x = (get_group_id(0) * get_local_size(0) + tid2) / 2; if (dst_x < dst_cols) - dstData[y * dst_step / ((int)sizeof(T)) + dst_x] = convertToT(sum); + storepix(convertToT(sum), dstData + y * dst_step + dst_x * PIXSIZE); } } diff --git a/modules/imgproc/src/opencl/pyr_up.cl b/modules/imgproc/src/opencl/pyr_up.cl index b8cf6ab..d754a70 100644 --- a/modules/imgproc/src/opencl/pyr_up.cl +++ b/modules/imgproc/src/opencl/pyr_up.cl @@ -58,6 +58,16 @@ #endif #endif +#if cn != 3 +#define loadpix(addr) *(__global const T*)(addr) +#define storepix(val, addr) *(__global T*)(addr) = (val) +#define PIXSIZE ((int)sizeof(T)) +#else +#define loadpix(addr) vload3(0, (__global const T1*)(addr)) +#define storepix(val, addr) vstore3((val), 0, (__global T1*)(addr)) +#define PIXSIZE ((int)sizeof(T1)*3) +#endif + #define noconvert @@ -76,8 +86,8 @@ __kernel void pyrUp(__global const uchar * src, int src_step, int src_offset, in __local FT s_srcPatch[10][10]; __local FT s_dstPatch[20][16]; - __global T * dstData = (__global T *)(dst + dst_offset); - __global const T * srcData = (__global const T *)(src + src_offset); + __global uchar * dstData = dst + dst_offset; + __global const uchar * srcData = src + src_offset; if( tidx < 10 && tidy < 10 ) { @@ -90,7 +100,7 @@ __kernel void pyrUp(__global const uchar * src, int src_step, int src_offset, in srcy = abs(srcy); srcy = min(src_rows - 1, srcy); - s_srcPatch[tidy][tidx] = convertToFT(srcData[srcx + srcy * src_step / (int) sizeof(T)]); + s_srcPatch[tidy][tidx] = convertToFT(loadpix(srcData + srcy * src_step + srcx * PIXSIZE)); } barrier(CLK_LOCAL_MEM_FENCE); @@ -155,5 +165,5 @@ __kernel void pyrUp(__global const uchar * src, int src_step, int src_offset, in sum = sum + co3 * s_dstPatch[2 + tidy + 2][tidx]; if ((x < dst_cols) && (y < dst_rows)) - dstData[x + y * dst_step / (int)sizeof(T)] = convertToT(4.0f * sum); + storepix(convertToT(4.0f * sum), dstData + y * dst_step + x * PIXSIZE); } diff --git a/modules/imgproc/src/pyramids.cpp b/modules/imgproc/src/pyramids.cpp index 23a132f..f898c8f 100644 --- a/modules/imgproc/src/pyramids.cpp +++ b/modules/imgproc/src/pyramids.cpp @@ -407,7 +407,7 @@ static bool ocl_pyrDown( InputArray _src, OutputArray _dst, const Size& _dsz, in { int type = _src.type(), depth = CV_MAT_DEPTH(type), channels = CV_MAT_CN(type); - if ((channels != 1 && channels != 2 && channels != 4) || borderType != BORDER_DEFAULT) + if (channels > 4 || borderType != BORDER_DEFAULT) return false; bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; @@ -426,12 +426,16 @@ static bool ocl_pyrDown( InputArray _src, OutputArray _dst, const Size& _dsz, in int float_depth = depth == CV_64F ? CV_64F : CV_32F; char cvt[2][50]; - ocl::Kernel k("pyrDown", ocl::imgproc::pyr_down_oclsrc, - format("-D T=%s -D FT=%s -D convertToT=%s -D convertToFT=%s%s", - ocl::typeToStr(type), ocl::typeToStr(CV_MAKETYPE(float_depth, channels)), - ocl::convertTypeStr(float_depth, depth, channels, cvt[0]), - ocl::convertTypeStr(depth, float_depth, channels, cvt[1]), - doubleSupport ? " -D DOUBLE_SUPPORT" : "")); + String buildOptions = format( + "-D T=%s -D FT=%s -D convertToT=%s -D convertToFT=%s%s " + "-D T1=%s -D cn=%d", + ocl::typeToStr(type), ocl::typeToStr(CV_MAKETYPE(float_depth, channels)), + ocl::convertTypeStr(float_depth, depth, channels, cvt[0]), + ocl::convertTypeStr(depth, float_depth, channels, cvt[1]), + doubleSupport ? " -D DOUBLE_SUPPORT" : "", + ocl::typeToStr(depth), channels + ); + ocl::Kernel k("pyrDown", ocl::imgproc::pyr_down_oclsrc, buildOptions); if (k.empty()) return false; @@ -446,7 +450,7 @@ static bool ocl_pyrUp( InputArray _src, OutputArray _dst, const Size& _dsz, int { int type = _src.type(), depth = CV_MAT_DEPTH(type), channels = CV_MAT_CN(type); - if ((channels != 1 && channels != 2 && channels != 4) || borderType != BORDER_DEFAULT) + if (channels > 4 || borderType != BORDER_DEFAULT) return false; bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; @@ -464,12 +468,16 @@ static bool ocl_pyrUp( InputArray _src, OutputArray _dst, const Size& _dsz, int int float_depth = depth == CV_64F ? CV_64F : CV_32F; char cvt[2][50]; - ocl::Kernel k("pyrUp", ocl::imgproc::pyr_up_oclsrc, - format("-D T=%s -D FT=%s -D convertToT=%s -D convertToFT=%s%s", - ocl::typeToStr(type), ocl::typeToStr(CV_MAKETYPE(float_depth, channels)), - ocl::convertTypeStr(float_depth, depth, channels, cvt[0]), - ocl::convertTypeStr(depth, float_depth, channels, cvt[1]), - doubleSupport ? " -D DOUBLE_SUPPORT" : "")); + String buildOptions = format( + "-D T=%s -D FT=%s -D convertToT=%s -D convertToFT=%s%s " + "-D T1=%s -D cn=%d", + ocl::typeToStr(type), ocl::typeToStr(CV_MAKETYPE(float_depth, channels)), + ocl::convertTypeStr(float_depth, depth, channels, cvt[0]), + ocl::convertTypeStr(depth, float_depth, channels, cvt[1]), + doubleSupport ? " -D DOUBLE_SUPPORT" : "", + ocl::typeToStr(depth), channels + ); + ocl::Kernel k("pyrUp", ocl::imgproc::pyr_up_oclsrc, buildOptions); if (k.empty()) return false; diff --git a/modules/imgproc/test/ocl/test_pyramids.cpp b/modules/imgproc/test/ocl/test_pyramids.cpp index 3dd7a41..d6174a5 100644 --- a/modules/imgproc/test/ocl/test_pyramids.cpp +++ b/modules/imgproc/test/ocl/test_pyramids.cpp @@ -108,7 +108,7 @@ OCL_TEST_P(PyrDown, Mat) OCL_INSTANTIATE_TEST_CASE_P(ImgprocPyr, PyrDown, Combine( Values(CV_8U, CV_16U, CV_16S, CV_32F, CV_64F), - Values(1, 2, 4), + Values(1, 2, 3, 4), Bool() )); @@ -133,7 +133,7 @@ OCL_TEST_P(PyrUp, Mat) OCL_INSTANTIATE_TEST_CASE_P(ImgprocPyr, PyrUp, Combine( Values(CV_8U, CV_16U, CV_16S, CV_32F, CV_64F), - Values(1, 2, 4), + Values(1, 2, 3, 4), Bool() )); -- 2.7.4