From 31f864a22b0af93646cd148ee75fc7893cbd00ca Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Tue, 25 Mar 2014 20:06:26 +0400 Subject: [PATCH] integer cv::resize (INTER_LINEAR && CV_8UC(cn)) --- modules/imgproc/src/imgwarp.cpp | 233 +++++++++++++++++++++------------ modules/imgproc/src/opencl/resize.cl | 164 +++++++++++++---------- modules/imgproc/test/ocl/test_warp.cpp | 9 +- 3 files changed, 255 insertions(+), 151 deletions(-) diff --git a/modules/imgproc/src/imgwarp.cpp b/modules/imgproc/src/imgwarp.cpp index 51ee5bc..da392b0 100644 --- a/modules/imgproc/src/imgwarp.cpp +++ b/modules/imgproc/src/imgwarp.cpp @@ -1917,71 +1917,73 @@ class IPPresizeInvoker : public ParallelLoopBody { public: - IPPresizeInvoker(Mat &_src, Mat &_dst, double _inv_scale_x, double _inv_scale_y, int _mode, bool *_ok) : - ParallelLoopBody(), src(_src), dst(_dst), inv_scale_x(_inv_scale_x), inv_scale_y(_inv_scale_y), mode(_mode), ok(_ok) - { - *ok = true; - IppiSize srcSize, dstSize; - int type = src.type(); - int specSize = 0, initSize = 0; - srcSize.width = src.cols; - srcSize.height = src.rows; - dstSize.width = dst.cols; - dstSize.height = dst.rows; - - switch (type) - { - case CV_8UC1: SET_IPP_RESIZE_PTR(8u,C1); break; - case CV_8UC3: SET_IPP_RESIZE_PTR(8u,C3); break; - case CV_8UC4: SET_IPP_RESIZE_PTR(8u,C4); break; - case CV_16UC1: SET_IPP_RESIZE_PTR(16u,C1); break; - case CV_16UC3: SET_IPP_RESIZE_PTR(16u,C3); break; - case CV_16UC4: SET_IPP_RESIZE_PTR(16u,C4); break; - case CV_16SC1: SET_IPP_RESIZE_PTR(16s,C1); break; - case CV_16SC3: SET_IPP_RESIZE_PTR(16s,C3); break; - case CV_16SC4: SET_IPP_RESIZE_PTR(16s,C4); break; - case CV_32FC1: SET_IPP_RESIZE_PTR(32f,C1); break; - case CV_32FC3: SET_IPP_RESIZE_PTR(32f,C3); break; - case CV_32FC4: SET_IPP_RESIZE_PTR(32f,C4); break; - case CV_64FC1: SET_IPP_RESIZE_LINEAR_FUNC_64_PTR(64f,C1); break; - case CV_64FC3: SET_IPP_RESIZE_LINEAR_FUNC_64_PTR(64f,C3); break; - case CV_64FC4: SET_IPP_RESIZE_LINEAR_FUNC_64_PTR(64f,C4); break; - default: { *ok = false; return;} break; - } - } + IPPresizeInvoker(const Mat & _src, Mat & _dst, double _inv_scale_x, double _inv_scale_y, int _mode, bool *_ok) : + ParallelLoopBody(), src(_src), dst(_dst), inv_scale_x(_inv_scale_x), inv_scale_y(_inv_scale_y), mode(_mode), ok(_ok) + { + *ok = true; + IppiSize srcSize, dstSize; + int type = src.type(); + int specSize = 0, initSize = 0; + srcSize.width = src.cols; + srcSize.height = src.rows; + dstSize.width = dst.cols; + dstSize.height = dst.rows; - ~IPPresizeInvoker() - { - } + switch (type) + { + case CV_8UC1: SET_IPP_RESIZE_PTR(8u,C1); break; + case CV_8UC3: SET_IPP_RESIZE_PTR(8u,C3); break; + case CV_8UC4: SET_IPP_RESIZE_PTR(8u,C4); break; + case CV_16UC1: SET_IPP_RESIZE_PTR(16u,C1); break; + case CV_16UC3: SET_IPP_RESIZE_PTR(16u,C3); break; + case CV_16UC4: SET_IPP_RESIZE_PTR(16u,C4); break; + case CV_16SC1: SET_IPP_RESIZE_PTR(16s,C1); break; + case CV_16SC3: SET_IPP_RESIZE_PTR(16s,C3); break; + case CV_16SC4: SET_IPP_RESIZE_PTR(16s,C4); break; + case CV_32FC1: SET_IPP_RESIZE_PTR(32f,C1); break; + case CV_32FC3: SET_IPP_RESIZE_PTR(32f,C3); break; + case CV_32FC4: SET_IPP_RESIZE_PTR(32f,C4); break; + case CV_64FC1: SET_IPP_RESIZE_LINEAR_FUNC_64_PTR(64f,C1); break; + case CV_64FC3: SET_IPP_RESIZE_LINEAR_FUNC_64_PTR(64f,C3); break; + case CV_64FC4: SET_IPP_RESIZE_LINEAR_FUNC_64_PTR(64f,C4); break; + default: { *ok = false; return; } break; + } + } - virtual void operator() (const Range& range) const - { - if (*ok == false) return; + ~IPPresizeInvoker() + { + } - int cn = src.channels(); - int dsty = min(cvRound(range.start * inv_scale_y), dst.rows); - int dstwidth = min(cvRound(src.cols * inv_scale_x), dst.cols); - int dstheight = min(cvRound(range.end * inv_scale_y), dst.rows); + virtual void operator() (const Range& range) const + { + if (*ok == false) + return; - IppiPoint dstOffset = { 0, dsty }, srcOffset = {0, 0}; - IppiSize dstSize = { dstwidth, dstheight - dsty }; - int bufsize = 0, itemSize = (int)src.elemSize1(); + int cn = src.channels(); + int dsty = min(cvRound(range.start * inv_scale_y), dst.rows); + int dstwidth = min(cvRound(src.cols * inv_scale_x), dst.cols); + int dstheight = min(cvRound(range.end * inv_scale_y), dst.rows); - CHECK_IPP_STATUS(getBufferSizeFunc(pSpec, dstSize, cn, &bufsize)); - CHECK_IPP_STATUS(getSrcOffsetFunc(pSpec, dstOffset, &srcOffset)); + IppiPoint dstOffset = { 0, dsty }, srcOffset = {0, 0}; + IppiSize dstSize = { dstwidth, dstheight - dsty }; + int bufsize = 0, itemSize = (int)src.elemSize1(); - Ipp8u* pSrc = (Ipp8u*)src.data + (int)src.step[0] * srcOffset.y + srcOffset.x * cn * itemSize; - Ipp8u* pDst = (Ipp8u*)dst.data + (int)dst.step[0] * dstOffset.y + dstOffset.x * cn * itemSize; + CHECK_IPP_STATUS(getBufferSizeFunc(pSpec, dstSize, cn, &bufsize)); + CHECK_IPP_STATUS(getSrcOffsetFunc(pSpec, dstOffset, &srcOffset)); - AutoBuffer buf(bufsize + 64); - uchar* bufptr = alignPtr((uchar*)buf, 32); + Ipp8u* pSrc = (Ipp8u*)src.data + (int)src.step[0] * srcOffset.y + srcOffset.x * cn * itemSize; + Ipp8u* pDst = (Ipp8u*)dst.data + (int)dst.step[0] * dstOffset.y + dstOffset.x * cn * itemSize; + + AutoBuffer buf(bufsize + 64); + uchar* bufptr = alignPtr((uchar*)buf, 32); + + if( func( pSrc, (int)src.step[0], pDst, (int)dst.step[0], dstOffset, dstSize, ippBorderRepl, 0, pSpec, bufptr ) < 0 ) + *ok = false; + } - if( func( pSrc, (int)src.step[0], pDst, (int)dst.step[0], dstOffset, dstSize, ippBorderRepl, 0, pSpec, bufptr ) < 0 ) - *ok = false; - } private: - Mat &src; - Mat &dst; + Mat & src; + Mat & dst; double inv_scale_x; double inv_scale_y; void *pSpec; @@ -1993,12 +1995,13 @@ private: bool *ok; const IPPresizeInvoker& operator= (const IPPresizeInvoker&); }; + #endif #ifdef HAVE_OPENCL static void ocl_computeResizeAreaTabs(int ssize, int dsize, double scale, int * const map_tab, - float * const alpha_tab, int * const ofs_tab) + float * const alpha_tab, int * const ofs_tab) { int k = 0, dx = 0; for ( ; dx < dsize; dx++) @@ -2049,8 +2052,16 @@ static bool ocl_resize( InputArray _src, OutputArray _dst, Size dsize, { int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); - double inv_fx = 1. / fx, inv_fy = 1. / fy; + double inv_fx = 1.0 / fx, inv_fy = 1.0 / fy; float inv_fxf = (float)inv_fx, inv_fyf = (float)inv_fy; + int iscale_x = saturate_cast(inv_fx), iscale_y = saturate_cast(inv_fx); + bool is_area_fast = std::abs(inv_fx - iscale_x) < DBL_EPSILON && + std::abs(inv_fy - iscale_y) < DBL_EPSILON; + + // in case of scale_x && scale_y is equal to 2 + // INTER_AREA (fast) also is equal to INTER_LINEAR + if( interpolation == INTER_LINEAR && is_area_fast && iscale_x == 2 && iscale_y == 2 ) + /*interpolation = INTER_AREA*/(void)0; // INTER_AREA is slower if( !(cn <= 4 && (interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || @@ -2061,39 +2072,105 @@ static bool ocl_resize( InputArray _src, OutputArray _dst, Size dsize, _dst.create(dsize, type); UMat dst = _dst.getUMat(); + Size ssize = src.size(); ocl::Kernel k; size_t globalsize[] = { dst.cols, dst.rows }; if (interpolation == INTER_LINEAR) { - int wdepth = std::max(depth, CV_32S); - int wtype = CV_MAKETYPE(wdepth, cn); char buf[2][32]; - k.create("resizeLN", ocl::imgproc::resize_oclsrc, - format("-D INTER_LINEAR -D depth=%d -D PIXTYPE=%s -D PIXTYPE1=%s " - "-D WORKTYPE=%s -D convertToWT=%s -D convertToDT=%s -D cn=%d", - depth, ocl::typeToStr(type), ocl::typeToStr(depth), ocl::typeToStr(wtype), - ocl::convertTypeStr(depth, wdepth, cn, buf[0]), - ocl::convertTypeStr(wdepth, depth, cn, buf[1]), - cn)); + + // integer path is slower because of CPU part, so it's disabled + if (depth == CV_8U && ((void)0, 0)) + { + AutoBuffer _buffer((dsize.width + dsize.height)*(sizeof(int) + sizeof(short)*2)); + int* xofs = (int*)(uchar*)_buffer, * yofs = xofs + dsize.width; + short* ialpha = (short*)(yofs + dsize.height), * ibeta = ialpha + dsize.width*2; + float fxx, fyy; + int sx, sy; + + for (int dx = 0; dx < dsize.width; dx++) + { + fxx = (float)((dx+0.5)*inv_fx - 0.5); + sx = cvFloor(fxx); + fxx -= sx; + + if (sx < 0) + fxx = 0, sx = 0; + + if (sx >= ssize.width-1) + fxx = 0, sx = ssize.width-1; + + xofs[dx] = sx; + ialpha[dx*2 + 0] = saturate_cast((1.f - fxx) * INTER_RESIZE_COEF_SCALE); + ialpha[dx*2 + 1] = saturate_cast(fxx * INTER_RESIZE_COEF_SCALE); + } + + for (int dy = 0; dy < dsize.height; dy++) + { + fyy = (float)((dy+0.5)*inv_fy - 0.5); + sy = cvFloor(fyy); + fyy -= sy; + + yofs[dy] = sy; + ibeta[dy*2 + 0] = saturate_cast((1.f - fyy) * INTER_RESIZE_COEF_SCALE); + ibeta[dy*2 + 1] = saturate_cast(fyy * INTER_RESIZE_COEF_SCALE); + } + + int wdepth = std::max(depth, CV_32S), wtype = CV_MAKETYPE(wdepth, cn); + UMat coeffs; + Mat(1, static_cast(_buffer.size()), CV_8UC1, (uchar *)_buffer).copyTo(coeffs); + + k.create("resizeLN", ocl::imgproc::resize_oclsrc, + format("-D INTER_LINEAR_INTEGER -D depth=%d -D T=%s -D T1=%s " + "-D WT=%s -D convertToWT=%s -D convertToDT=%s -D cn=%d " + "-D INTER_RESIZE_COEF_BITS=%d", + depth, ocl::typeToStr(type), ocl::typeToStr(depth), ocl::typeToStr(wtype), + ocl::convertTypeStr(depth, wdepth, cn, buf[0]), + ocl::convertTypeStr(wdepth, depth, cn, buf[1]), + cn, INTER_RESIZE_COEF_BITS)); + if (k.empty()) + return false; + + k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst), + ocl::KernelArg::PtrReadOnly(coeffs)); + } + else + { + int wdepth = std::max(depth, CV_32S), wtype = CV_MAKETYPE(wdepth, cn); + k.create("resizeLN", ocl::imgproc::resize_oclsrc, + format("-D INTER_LINEAR -D depth=%d -D T=%s -D T1=%s " + "-D WT=%s -D convertToWT=%s -D convertToDT=%s -D cn=%d " + "-D INTER_RESIZE_COEF_BITS=%d", + depth, ocl::typeToStr(type), ocl::typeToStr(depth), ocl::typeToStr(wtype), + ocl::convertTypeStr(depth, wdepth, cn, buf[0]), + ocl::convertTypeStr(wdepth, depth, cn, buf[1]), + cn, INTER_RESIZE_COEF_BITS)); + if (k.empty()) + return false; + + k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst), + (float)inv_fx, (float)inv_fy); + } } else if (interpolation == INTER_NEAREST) { k.create("resizeNN", ocl::imgproc::resize_oclsrc, - format("-D INTER_NEAREST -D PIXTYPE=%s -D PIXTYPE1=%s -D cn=%d", + format("-D INTER_NEAREST -D T=%s -D T1=%s -D cn=%d", ocl::memopTypeToStr(type), ocl::memopTypeToStr(depth), cn)); + if (k.empty()) + return false; + + k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst), + (float)inv_fx, (float)inv_fy); } else if (interpolation == INTER_AREA) { - int iscale_x = saturate_cast(inv_fx); - int iscale_y = saturate_cast(inv_fy); - bool is_area_fast = std::abs(inv_fx - iscale_x) < DBL_EPSILON && - std::abs(inv_fy - iscale_y) < DBL_EPSILON; int wdepth = std::max(depth, is_area_fast ? CV_32S : CV_32F); int wtype = CV_MAKE_TYPE(wdepth, cn); char cvt[2][40]; - String buildOption = format("-D INTER_AREA -D PIXTYPE=%s -D PIXTYPE1=%s -D WTV=%s -D convertToWTV=%s -D cn=%d", + String buildOption = format("-D INTER_AREA -D T=%s -D T1=%s -D WTV=%s -D convertToWTV=%s -D cn=%d", ocl::typeToStr(type), ocl::typeToStr(depth), ocl::typeToStr(wtype), ocl::convertTypeStr(depth, wdepth, cn, cvt[0]), cn); @@ -2103,7 +2180,7 @@ static bool ocl_resize( InputArray _src, OutputArray _dst, Size dsize, if (is_area_fast) { int wdepth2 = std::max(CV_32F, depth), wtype2 = CV_MAKE_TYPE(wdepth2, cn); - buildOption = buildOption + format(" -D convertToPIXTYPE=%s -D WT2V=%s -D convertToWT2V=%s -D INTER_AREA_FAST" + buildOption = buildOption + format(" -D convertToT=%s -D WT2V=%s -D convertToWT2V=%s -D INTER_AREA_FAST" " -D XSCALE=%d -D YSCALE=%d -D SCALE=%ff", ocl::convertTypeStr(wdepth2, depth, cn, cvt[0]), ocl::typeToStr(wtype2), ocl::convertTypeStr(wdepth, wdepth2, cn, cvt[1]), @@ -2126,12 +2203,11 @@ static bool ocl_resize( InputArray _src, OutputArray _dst, Size dsize, } else { - buildOption = buildOption + format(" -D convertToPIXTYPE=%s", ocl::convertTypeStr(wdepth, depth, cn, cvt[0])); + buildOption = buildOption + format(" -D convertToT=%s", ocl::convertTypeStr(wdepth, depth, cn, cvt[0])); k.create("resizeAREA", ocl::imgproc::resize_oclsrc, buildOption); if (k.empty()) return false; - Size ssize = src.size(); int xytab_size = (ssize.width + ssize.height) << 1; int tabofs_size = dsize.height + dsize.width + 2; @@ -2161,11 +2237,6 @@ static bool ocl_resize( InputArray _src, OutputArray _dst, Size dsize, return k.run(2, globalsize, NULL, false); } - if( k.empty() ) - return false; - k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst), - (float)inv_fx, (float)inv_fy); - return k.run(2, globalsize, 0, false); } diff --git a/modules/imgproc/src/opencl/resize.cl b/modules/imgproc/src/opencl/resize.cl index a142d78..d656bf6 100644 --- a/modules/imgproc/src/opencl/resize.cl +++ b/modules/imgproc/src/opencl/resize.cl @@ -43,110 +43,140 @@ // //M*/ -#if defined DOUBLE_SUPPORT +#ifdef DOUBLE_SUPPORT +#ifdef cl_amd_fp64 +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#elif defined (cl_khr_fp64) #pragma OPENCL EXTENSION cl_khr_fp64:enable #endif +#endif -#define INTER_RESIZE_COEF_BITS 11 #define INTER_RESIZE_COEF_SCALE (1 << INTER_RESIZE_COEF_BITS) #define CAST_BITS (INTER_RESIZE_COEF_BITS << 1) #define INC(x,l) min(x+1,l-1) - -#define noconvert(x) (x) +#define noconvert #if cn != 3 -#define loadpix(addr) *(__global const PIXTYPE*)(addr) -#define storepix(val, addr) *(__global PIXTYPE*)(addr) = val -#define PIXSIZE ((int)sizeof(PIXTYPE)) +#define loadpix(addr) *(__global const T *)(addr) +#define storepix(val, addr) *(__global T *)(addr) = val +#define TSIZE (int)sizeof(T) #else -#define loadpix(addr) vload3(0, (__global const PIXTYPE1*)(addr)) -#define storepix(val, addr) vstore3(val, 0, (__global PIXTYPE1*)(addr)) -#define PIXSIZE ((int)sizeof(PIXTYPE1)*3) +#define loadpix(addr) vload3(0, (__global const T1 *)(addr)) +#define storepix(val, addr) vstore3(val, 0, (__global T1 *)(addr)) +#define TSIZE (int)sizeof(T1)*cn #endif -#if defined INTER_LINEAR +#ifdef INTER_LINEAR_INTEGER -__kernel void resizeLN(__global const uchar* srcptr, int srcstep, int srcoffset, - int srcrows, int srccols, - __global uchar* dstptr, int dststep, int dstoffset, - int dstrows, int dstcols, - float ifx, float ify) +__kernel void resizeLN(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols, + __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols, + __global const uchar * buffer) { int dx = get_global_id(0); int dy = get_global_id(1); - float sx = ((dx+0.5f) * ifx - 0.5f), sy = ((dy+0.5f) * ify - 0.5f); - int x = floor(sx), y = floor(sy); - - float u = sx - x, v = sy - y; + if (dx < dst_cols && dy < dst_rows) + { + __global const int * xofs = (__global const int *)(buffer), * yofs = xofs + dst_cols; + __global const short * ialpha = (__global const short *)(yofs + dst_rows); + __global const short * ibeta = ialpha + ((dst_cols + dy) << 1); + ialpha += dx << 1; + + int sx0 = xofs[dx], sy0 = clamp(yofs[dy], 0, src_rows - 1), + sy1 = clamp(yofs[dy] + 1, 0, src_rows - 1); + short a0 = ialpha[0], a1 = ialpha[1]; + short b0 = ibeta[0], b1 = ibeta[1]; + + int src_index0 = mad24(sy0, src_step, mad24(sx0, TSIZE, src_offset)), + src_index1 = mad24(sy1, src_step, mad24(sx0, TSIZE, src_offset)); + WT data0 = convertToWT(loadpix(srcptr + src_index0)); + WT data1 = convertToWT(loadpix(srcptr + src_index0 + TSIZE)); + WT data2 = convertToWT(loadpix(srcptr + src_index1)); + WT data3 = convertToWT(loadpix(srcptr + src_index1 + TSIZE)); + + WT val = ( (((data0 * a0 + data1 * a1) >> 4) * b0) >> 16) + + ( (((data2 * a0 + data3 * a1) >> 4) * b1) >> 16); + + storepix(convertToDT((val + 2) >> 2), + dstptr + mad24(dy, dst_step, mad24(dx, TSIZE, dst_offset))); + } +} - if ( x<0 ) x=0,u=0; - if ( x>=srccols ) x=srccols-1,u=0; - if ( y<0 ) y=0,v=0; - if ( y>=srcrows ) y=srcrows-1,v=0; +#elif defined INTER_LINEAR - int y_ = INC(y,srcrows); - int x_ = INC(x,srccols); +__kernel void resizeLN(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols, + __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols, + float ifx, float ify) +{ + int dx = get_global_id(0); + int dy = get_global_id(1); -#if depth <= 4 + if (dx < dst_cols && dy < dst_rows) + { + float sx = ((dx+0.5f) * ifx - 0.5f), sy = ((dy+0.5f) * ify - 0.5f); + int x = floor(sx), y = floor(sy); - u = u * INTER_RESIZE_COEF_SCALE; - v = v * INTER_RESIZE_COEF_SCALE; + float u = sx - x, v = sy - y; - int U = rint(u); - int V = rint(v); - int U1 = rint(INTER_RESIZE_COEF_SCALE - u); - int V1 = rint(INTER_RESIZE_COEF_SCALE - v); + if ( x<0 ) x=0,u=0; + if ( x>=src_cols ) x=src_cols-1,u=0; + if ( y<0 ) y=0,v=0; + if ( y>=src_rows ) y=src_rows-1,v=0; - WORKTYPE data0 = convertToWT(loadpix(srcptr + mad24(y, srcstep, srcoffset + x*PIXSIZE))); - WORKTYPE data1 = convertToWT(loadpix(srcptr + mad24(y, srcstep, srcoffset + x_*PIXSIZE))); - WORKTYPE data2 = convertToWT(loadpix(srcptr + mad24(y_, srcstep, srcoffset + x*PIXSIZE))); - WORKTYPE data3 = convertToWT(loadpix(srcptr + mad24(y_, srcstep, srcoffset + x_*PIXSIZE))); + int y_ = INC(y, src_rows); + int x_ = INC(x, src_cols); - WORKTYPE val = mul24((WORKTYPE)mul24(U1, V1), data0) + mul24((WORKTYPE)mul24(U, V1), data1) + - mul24((WORKTYPE)mul24(U1, V), data2) + mul24((WORKTYPE)mul24(U, V), data3); +#if depth <= 4 + u = u * INTER_RESIZE_COEF_SCALE; + v = v * INTER_RESIZE_COEF_SCALE; - PIXTYPE uval = convertToDT((val + (1<<(CAST_BITS-1)))>>CAST_BITS); + int U = rint(u); + int V = rint(v); + int U1 = rint(INTER_RESIZE_COEF_SCALE - u); + int V1 = rint(INTER_RESIZE_COEF_SCALE - v); -#else - float u1 = 1.f - u; - float v1 = 1.f - v; - WORKTYPE data0 = convertToWT(loadpix(srcptr + mad24(y, srcstep, srcoffset + x*PIXSIZE))); - WORKTYPE data1 = convertToWT(loadpix(srcptr + mad24(y, srcstep, srcoffset + x_*PIXSIZE))); - WORKTYPE data2 = convertToWT(loadpix(srcptr + mad24(y_, srcstep, srcoffset + x*PIXSIZE))); - WORKTYPE data3 = convertToWT(loadpix(srcptr + mad24(y_, srcstep, srcoffset + x_*PIXSIZE))); + WT data0 = convertToWT(loadpix(srcptr + mad24(y, src_step, mad24(x, TSIZE, src_offset)))); + WT data1 = convertToWT(loadpix(srcptr + mad24(y, src_step, mad24(x_, TSIZE, src_offset)))); + WT data2 = convertToWT(loadpix(srcptr + mad24(y_, src_step, mad24(x, TSIZE, src_offset)))); + WT data3 = convertToWT(loadpix(srcptr + mad24(y_, src_step, mad24(x_, TSIZE, src_offset)))); - PIXTYPE uval = u1 * v1 * data0 + u * v1 * data1 + u1 * v *data2 + u * v *data3; + WT val = mul24((WT)mul24(U1, V1), data0) + mul24((WT)mul24(U, V1), data1) + + mul24((WT)mul24(U1, V), data2) + mul24((WT)mul24(U, V), data3); + T uval = convertToDT((val + (1<<(CAST_BITS-1)))>>CAST_BITS); +#else + float u1 = 1.f - u; + float v1 = 1.f - v; + WT data0 = convertToWT(loadpix(srcptr + mad24(y, src_step, mad24(x, TSIZE, src_offset)))); + WT data1 = convertToWT(loadpix(srcptr + mad24(y, src_step, mad24(x_, TSIZE, src_offset)))); + WT data2 = convertToWT(loadpix(srcptr + mad24(y_, src_step, mad24(x, TSIZE, src_offset)))); + WT data3 = convertToWT(loadpix(srcptr + mad24(y_, src_step, mad24(x_, TSIZE, src_offset)))); + + T uval = u1 * v1 * data0 + u * v1 * data1 + u1 * v *data2 + u * v *data3; #endif - - if(dx < dstcols && dy < dstrows) - { - storepix(uval, dstptr + mad24(dy, dststep, dstoffset + dx*PIXSIZE)); + storepix(uval, dstptr + mad24(dy, dst_step, mad24(dx, TSIZE, dst_offset))); } } #elif defined INTER_NEAREST -__kernel void resizeNN(__global const uchar* srcptr, int srcstep, int srcoffset, - int srcrows, int srccols, - __global uchar* dstptr, int dststep, int dstoffset, - int dstrows, int dstcols, +__kernel void resizeNN(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols, + __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols, float ifx, float ify) { int dx = get_global_id(0); int dy = get_global_id(1); - if( dx < dstcols && dy < dstrows ) + if (dx < dst_cols && dy < dst_rows) { - float s1 = dx*ifx; - float s2 = dy*ify; - int sx = min(convert_int_rtz(s1), srccols-1); - int sy = min(convert_int_rtz(s2), srcrows-1); + float s1 = dx * ifx; + float s2 = dy * ify; + int sx = min(convert_int_rtz(s1), src_cols - 1); + int sy = min(convert_int_rtz(s2), src_rows - 1); - storepix(loadpix(srcptr + mad24(sy, srcstep, srcoffset + sx*PIXSIZE)), - dstptr + mad24(dy, dststep, dstoffset + dx*PIXSIZE)); + storepix(loadpix(srcptr + mad24(sy, src_step, mad24(sx, TSIZE, src_offset))), + dstptr + mad24(dy, dst_step, mad24(dx, TSIZE, dst_offset))); } } @@ -179,10 +209,10 @@ __kernel void resizeAREA_FAST(__global const uchar * src, int src_step, int src_ int src_index = mad24(symap_tab[y + sy], src_step, src_offset); #pragma unroll for (int x = 0; x < XSCALE; ++x) - sum += convertToWTV(loadpix(src + src_index + sxmap_tab[sx + x]*PIXSIZE)); + sum += convertToWTV(loadpix(src + mad24(sxmap_tab[sx + x], TSIZE, src_index))); } - storepix(convertToPIXTYPE(convertToWT2V(sum) * (WT2V)(SCALE)), dst + dst_index + dx*PIXSIZE); + storepix(convertToT(convertToWT2V(sum) * (WT2V)(SCALE)), dst + mad24(dx, TSIZE, dst_index)); } } @@ -224,12 +254,12 @@ __kernel void resizeAREA(__global const uchar * src, int src_step, int src_offse for (int sx = sx0, xk = xk0; sx <= sx1; ++sx, ++xk) { WTV alpha = (WTV)(xalpha_tab[xk]); - buf += convertToWTV(loadpix(src + src_index + sx*PIXSIZE)) * alpha; + buf += convertToWTV(loadpix(src + mad24(sx, TSIZE, src_index))) * alpha; } sum += buf * beta; } - storepix(convertToPIXTYPE(sum), dst + dst_index + dx*PIXSIZE); + storepix(convertToT(sum), dst + mad24(dx, TSIZE, dst_index)); } } diff --git a/modules/imgproc/test/ocl/test_warp.cpp b/modules/imgproc/test/ocl/test_warp.cpp index 8c82d82..f9ccef8 100644 --- a/modules/imgproc/test/ocl/test_warp.cpp +++ b/modules/imgproc/test/ocl/test_warp.cpp @@ -210,12 +210,15 @@ OCL_TEST_P(Resize, Mat) { for (int j = 0; j < test_loop_times; j++) { + int depth = CV_MAT_DEPTH(type); + double eps = depth <= CV_32S ? 1 : 1e-2; + random_roi(); OCL_OFF(cv::resize(src_roi, dst_roi, Size(), fx, fy, interpolation)); OCL_ON(cv::resize(usrc_roi, udst_roi, Size(), fx, fy, interpolation)); - Near(1.0); + Near(eps); } } @@ -328,8 +331,8 @@ OCL_INSTANTIATE_TEST_CASE_P(ImgprocWarp, WarpPerspective, Combine( OCL_INSTANTIATE_TEST_CASE_P(ImgprocWarp, Resize, Combine( Values(CV_8UC1, CV_8UC4, CV_16UC2, CV_32FC1, CV_32FC4), - Values(0.5, 1.5, 2.0), - Values(0.5, 1.5, 2.0), + Values(0.5, 1.5, 2.0, 0.2), + Values(0.5, 1.5, 2.0, 0.2), Values((Interpolation)INTER_NEAREST, (Interpolation)INTER_LINEAR), Bool())); -- 2.7.4