From: Ilya Lavrenov Date: Wed, 27 Nov 2013 14:30:07 +0000 (+0400) Subject: added cv::resize INTER_AREA to T-API X-Git-Tag: accepted/tizen/6.0/unified/20201030.111113~3670^2 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=09795e3265f7f8af264a1a7f3452ffcfe8cfa2d9;p=platform%2Fupstream%2Fopencv.git added cv::resize INTER_AREA to T-API --- diff --git a/modules/core/include/opencv2/core/ocl.hpp b/modules/core/include/opencv2/core/ocl.hpp index 971e4de..f50ed37 100644 --- a/modules/core/include/opencv2/core/ocl.hpp +++ b/modules/core/include/opencv2/core/ocl.hpp @@ -250,8 +250,12 @@ public: KernelArg(); static KernelArg Local() { return KernelArg(LOCAL, 0); } - static KernelArg PtrOnly(const UMat & m) - { return KernelArg(PTR_ONLY, (UMat*)&m); } + static KernelArg PtrWriteOnly(const UMat& m) + { return KernelArg(PTR_ONLY+WRITE_ONLY, (UMat*)&m); } + static KernelArg PtrReadOnly(const UMat& m) + { return KernelArg(PTR_ONLY+READ_ONLY, (UMat*)&m); } + static KernelArg PtrReadWrite(const UMat& m) + { return KernelArg(PTR_ONLY+READ_WRITE, (UMat*)&m); } static KernelArg ReadWrite(const UMat& m, int wscale=1) { return KernelArg(READ_WRITE, (UMat*)&m, wscale); } static KernelArg ReadWriteNoSize(const UMat& m, int wscale=1) diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp index 48f44a6..1d4c419 100644 --- a/modules/core/src/ocl.cpp +++ b/modules/core/src/ocl.cpp @@ -2197,10 +2197,10 @@ int Kernel::set(int i, const UMat& m) int Kernel::set(int i, const KernelArg& arg) { CV_Assert( i >= 0 ); - if( i == 0 ) - p->cleanupUMats(); if( !p || !p->handle ) return -1; + if( i == 0 ) + p->cleanupUMats(); if( arg.m ) { int accessFlags = ((arg.flags & KernelArg::READ_ONLY) ? ACCESS_READ : 0) + @@ -2222,7 +2222,7 @@ int Kernel::set(int i, const KernelArg& arg) { int cols = u2d.cols*arg.wscale; clSetKernelArg(p->handle, (cl_uint)i, sizeof(u2d.rows), &u2d.rows); - clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u2d.cols), &cols); + clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(cols), &cols); i += 2; } } @@ -2256,10 +2256,17 @@ bool Kernel::run(int dims, size_t globalsize[], size_t localsize[], { if(!p || !p->handle || p->e != 0) return false; + + AutoBuffer _globalSize(dims); + size_t * globalSizePtr = (size_t *)_globalSize; + for (int i = 0; i < dims; ++i) + globalSizePtr[i] = localsize == NULL ? globalsize[i] : + ((globalsize[i] + localsize[i] - 1) / localsize[i]) * localsize[i]; + cl_command_queue qq = getQueue(q); size_t offset[CV_MAX_DIM] = {0}; cl_int retval = clEnqueueNDRangeKernel(qq, p->handle, (cl_uint)dims, - offset, globalsize, localsize, 0, 0, + offset, globalSizePtr, localsize, 0, 0, sync ? 0 : &p->e); if( sync || retval < 0 ) { @@ -2350,6 +2357,7 @@ struct Program::Impl void** deviceList = deviceListBuf; for( i = 0; i < n; i++ ) deviceList[i] = ctx.device(i).ptr(); + retval = clBuildProgram(handle, n, (const cl_device_id*)deviceList, buildflags.c_str(), 0, 0); diff --git a/modules/core/test/test_umat.cpp b/modules/core/test/test_umat.cpp index 0b01120..d7efaa0 100644 --- a/modules/core/test/test_umat.cpp +++ b/modules/core/test/test_umat.cpp @@ -107,8 +107,8 @@ bool CV_UMatTest::TestUMat() ra += Scalar::all(1.f); { - Mat temp = ura.getMat(ACCESS_RW); - temp += Scalar::all(1.f); + Mat temp = ura.getMat(ACCESS_RW); + temp += Scalar::all(1.f); } ra.copyTo(rb); CHECK_DIFF(ra, rb); @@ -146,8 +146,8 @@ bool CV_UMatTest::TestUMat() CHECK_DIFF(rc0, rc); { - UMat tmp = rc0.getUMat(ACCESS_WRITE); - cv::max(ura, urb, tmp); + UMat tmp = rc0.getUMat(ACCESS_WRITE); + cv::max(ura, urb, tmp); } CHECK_DIFF(rc0, rc); diff --git a/modules/imgproc/src/color.cpp b/modules/imgproc/src/color.cpp index 13f474d..fb2627a 100644 --- a/modules/imgproc/src/color.cpp +++ b/modules/imgproc/src/color.cpp @@ -2875,7 +2875,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) k.create("RGB2XYZ", ocl::imgproc::cvtcolor_oclsrc, format("-D depth=%d -D scn=%d -D dcn=3 -D bidx=%d", depth, scn, bidx)); - k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst), ocl::KernelArg::PtrOnly(c)); + k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst), ocl::KernelArg::PtrReadOnly(c)); return k.run(2, globalsize, 0, false); } case COLOR_XYZ2BGR: case COLOR_XYZ2RGB: @@ -2924,7 +2924,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) k.create("XYZ2RGB", ocl::imgproc::cvtcolor_oclsrc, format("-D depth=%d -D scn=3 -D dcn=%d -D bidx=%d", depth, dcn, bidx)); - k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst), ocl::KernelArg::PtrOnly(c)); + k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst), ocl::KernelArg::PtrReadOnly(c)); return k.run(2, globalsize, 0, false); } case COLOR_BGR2HSV: case COLOR_RGB2HSV: case COLOR_BGR2HSV_FULL: case COLOR_RGB2HSV_FULL: @@ -2980,8 +2980,8 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) depth, hrange, bidx, scn)); k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst), - ocl::KernelArg::PtrOnly(sdiv_data), hrange == 256 ? ocl::KernelArg::PtrOnly(hdiv_data256) : - ocl::KernelArg::PtrOnly(hdiv_data180)); + ocl::KernelArg::PtrReadOnly(sdiv_data), hrange == 256 ? ocl::KernelArg::PtrReadOnly(hdiv_data256) : + ocl::KernelArg::PtrReadOnly(hdiv_data180)); return k.run(2, globalsize, NULL, false); } diff --git a/modules/imgproc/src/imgwarp.cpp b/modules/imgproc/src/imgwarp.cpp index b62d872..15d7c6a 100644 --- a/modules/imgproc/src/imgwarp.cpp +++ b/modules/imgproc/src/imgwarp.cpp @@ -48,8 +48,6 @@ #include "precomp.hpp" #include "opencl_kernels.hpp" -#include -#include #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) static IppStatus sts = ippInit(); @@ -1902,18 +1900,72 @@ private: }; #endif +static void ocl_computeResizeAreaTabs(int ssize, int dsize, double scale, int * const map_tab, + float * const alpha_tab, int * const ofs_tab) +{ + int k = 0, dx = 0; + for ( ; dx < dsize; dx++) + { + ofs_tab[dx] = k; + + double fsx1 = dx * scale; + double fsx2 = fsx1 + scale; + double cellWidth = std::min(scale, ssize - fsx1); + + int sx1 = cvCeil(fsx1), sx2 = cvFloor(fsx2); + + sx2 = std::min(sx2, ssize - 1); + sx1 = std::min(sx1, sx2); + + if (sx1 - fsx1 > 1e-3) + { + map_tab[k] = sx1 - 1; + alpha_tab[k++] = (float)((sx1 - fsx1) / cellWidth); + } + + for (int sx = sx1; sx < sx2; sx++) + { + map_tab[k] = sx; + alpha_tab[k++] = float(1.0 / cellWidth); + } + + if (fsx2 - sx2 > 1e-3) + { + map_tab[k] = sx2; + alpha_tab[k++] = (float)(std::min(std::min(fsx2 - sx2, 1.), cellWidth) / cellWidth); + } + } + ofs_tab[dx] = k; +} + +static void ocl_computeResizeAreaFastTabs(int * dmap_tab, int * smap_tab, int scale, int dcols, int scol) +{ + for (int i = 0; i < dcols; ++i) + dmap_tab[i] = scale * i; + + for (int i = 0, size = dcols * scale; i < size; ++i) + smap_tab[i] = std::min(scol - 1, i); +} + static bool ocl_resize( InputArray _src, OutputArray _dst, Size dsize, double fx, double fy, int interpolation) { int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); - if( !(cn <= 4 && - (interpolation == INTER_NEAREST || - (interpolation == INTER_LINEAR))) ) + + double inv_fx = 1. / fx, inv_fy = 1. / fy; + float inv_fxf = (float)inv_fx, inv_fyf = (float)inv_fy; + + if( cn == 3 || !(cn <= 4 && + (interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || + (interpolation == INTER_AREA && inv_fx >= 1 && inv_fy >= 1) )) ) return false; + UMat src = _src.getUMat(); _dst.create(dsize, type); UMat dst = _dst.getUMat(); + ocl::Kernel k; + size_t globalsize[] = { dst.cols, dst.rows }; if (interpolation == INTER_LINEAR) { @@ -1929,14 +1981,86 @@ static bool ocl_resize( InputArray _src, OutputArray _dst, Size dsize, else if (interpolation == INTER_NEAREST) { k.create("resizeNN", ocl::imgproc::resize_oclsrc, - format("-D INTER_NEAREST -D PIXTYPE=%s", ocl::memopTypeToStr(type) )); + format("-D INTER_NEAREST -D PIXTYPE=%s -D cn", ocl::memopTypeToStr(type), cn)); + } + else if (interpolation == INTER_AREA) + { + int iscale_x = saturate_cast(inv_fx); + int iscale_y = saturate_cast(inv_fy); + bool is_area_fast = std::abs(inv_fx - iscale_x) < DBL_EPSILON && + std::abs(inv_fy - iscale_y) < DBL_EPSILON; + int wdepth = std::max(depth, is_area_fast ? CV_32S : CV_32F); + int wtype = CV_MAKE_TYPE(wdepth, cn); + + char cvt[2][40]; + String buildOption = format("-D INTER_AREA -D T=%s -D WTV=%s -D convertToWTV=%s", + ocl::typeToStr(type), ocl::typeToStr(wtype), + ocl::convertTypeStr(depth, wdepth, cn, cvt[0])); + + UMat alphaOcl, tabofsOcl, mapOcl; + UMat dmap, smap; + + if (is_area_fast) + { + int wdepth2 = std::max(CV_32F, depth), wtype2 = CV_MAKE_TYPE(wdepth2, cn); + buildOption = buildOption + format(" -D convertToT=%s -D WT2V=%s -D convertToWT2V=%s -D INTER_AREA_FAST" + " -D XSCALE=%d -D YSCALE=%d -D SCALE=%f", + ocl::convertTypeStr(wdepth2, depth, cn, cvt[0]), + ocl::typeToStr(wtype2), ocl::convertTypeStr(wdepth, wdepth2, cn, cvt[1]), + iscale_x, iscale_y, 1.0f / (iscale_x * iscale_y)); + + k.create("resizeAREA_FAST", ocl::imgproc::resize_oclsrc, buildOption); + + int smap_tab_size = dst.cols * iscale_x + dst.rows * iscale_y; + AutoBuffer dmap_tab(dst.cols + dst.rows), smap_tab(smap_tab_size); + int * dxmap_tab = dmap_tab, * dymap_tab = dxmap_tab + dst.cols; + int * sxmap_tab = smap_tab, * symap_tab = smap_tab + dst.cols * iscale_y; + + ocl_computeResizeAreaFastTabs(dxmap_tab, sxmap_tab, iscale_x, dst.cols, src.cols); + ocl_computeResizeAreaFastTabs(dymap_tab, symap_tab, iscale_y, dst.rows, src.rows); + + Mat(1, dst.cols + dst.rows, CV_32SC1, (void *)dmap_tab).copyTo(dmap); + Mat(1, smap_tab_size, CV_32SC1, (void *)smap_tab).copyTo(smap); + } + else + { + buildOption = buildOption + format(" -D convertToT=%s", ocl::convertTypeStr(wdepth, depth, cn, cvt[0])); + k.create("resizeAREA", ocl::imgproc::resize_oclsrc, buildOption); + + Size ssize = src.size(); + int xytab_size = (ssize.width + ssize.height) << 1; + int tabofs_size = dsize.height + dsize.width + 2; + + AutoBuffer _xymap_tab(xytab_size), _xyofs_tab(tabofs_size); + AutoBuffer _xyalpha_tab(xytab_size); + int * xmap_tab = _xymap_tab, * ymap_tab = _xymap_tab + (ssize.width << 1); + float * xalpha_tab = _xyalpha_tab, * yalpha_tab = _xyalpha_tab + (ssize.width << 1); + int * xofs_tab = _xyofs_tab, * yofs_tab = _xyofs_tab + dsize.width + 1; + + ocl_computeResizeAreaTabs(ssize.width, dsize.width, inv_fx, xmap_tab, xalpha_tab, xofs_tab); + ocl_computeResizeAreaTabs(ssize.height, dsize.height, inv_fy, ymap_tab, yalpha_tab, yofs_tab); + + // loading precomputed arrays to GPU + Mat(1, xytab_size, CV_32FC1, (void *)_xyalpha_tab).copyTo(alphaOcl); + Mat(1, xytab_size, CV_32SC1, (void *)_xymap_tab).copyTo(mapOcl); + Mat(1, tabofs_size, CV_32SC1, (void *)_xyofs_tab).copyTo(tabofsOcl); + } + + ocl::KernelArg srcarg = ocl::KernelArg::ReadOnly(src), dstarg = ocl::KernelArg::WriteOnly(dst); + + if (is_area_fast) + k.args(srcarg, dstarg, ocl::KernelArg::PtrReadOnly(dmap), ocl::KernelArg::PtrReadOnly(smap)); + else + k.args(srcarg, dstarg, inv_fxf, inv_fyf, ocl::KernelArg::PtrReadOnly(tabofsOcl), + ocl::KernelArg::PtrReadOnly(mapOcl), ocl::KernelArg::PtrReadOnly(alphaOcl)); + + return k.run(2, globalsize, NULL, false); } if( k.empty() ) return false; k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst), - (float)(1./fx), (float)(1./fy)); - size_t globalsize[] = { dst.cols, dst.rows }; + (float)inv_fx, (float)inv_fy); return k.run(2, globalsize, 0, false); } @@ -2069,7 +2193,7 @@ void cv::resize( InputArray _src, OutputArray _dst, Size dsize, } if( ocl::useOpenCL() && _dst.kind() == _InputArray::UMAT && - ocl_resize(_src, _dst, dsize, inv_scale_x, inv_scale_y, interpolation) ) + ocl_resize(_src, _dst, dsize, inv_scale_x, inv_scale_y, interpolation)) return; Mat src = _src.getMat(); diff --git a/modules/imgproc/src/opencl/resize.cl b/modules/imgproc/src/opencl/resize.cl index f7dea1d..9249161 100644 --- a/modules/imgproc/src/opencl/resize.cl +++ b/modules/imgproc/src/opencl/resize.cl @@ -43,16 +43,8 @@ // //M*/ - -// resize kernel -// Currently, CV_8UC1 CV_8UC4 CV_32FC1 and CV_32FC4are supported. -// We shall support other types later if necessary. - #if defined DOUBLE_SUPPORT #pragma OPENCL EXTENSION cl_khr_fp64:enable -#define F double -#else -#define F float #endif #define INTER_RESIZE_COEF_BITS 11 @@ -141,8 +133,8 @@ __kernel void resizeNN(__global const uchar* srcptr, int srcstep, int srcoffset, if( dx < dstcols && dy < dstrows ) { - F s1 = dx*ifx; - F s2 = dy*ify; + float s1 = dx*ifx; + float s2 = dy*ify; int sx = min(convert_int_rtz(s1), srccols-1); int sy = min(convert_int_rtz(s2), srcrows-1); @@ -153,4 +145,91 @@ __kernel void resizeNN(__global const uchar* srcptr, int srcstep, int srcoffset, } } +#elif defined INTER_AREA + +#define TSIZE ((int)(sizeof(T))) + +#ifdef INTER_AREA_FAST + +__kernel void resizeAREA_FAST(__global const uchar * src, int src_step, int src_offset, int src_rows, int src_cols, + __global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols, + __global const int * dmap_tab, __global const int * smap_tab) +{ + int dx = get_global_id(0); + int dy = get_global_id(1); + + if (dx < dst_cols && dy < dst_rows) + { + int dst_index = mad24(dy, dst_step, dst_offset); + + __global const int * xmap_tab = dmap_tab; + __global const int * ymap_tab = dmap_tab + dst_cols; + __global const int * sxmap_tab = smap_tab; + __global const int * symap_tab = smap_tab + XSCALE * dst_cols; + + int sx = xmap_tab[dx], sy = ymap_tab[dy]; + WTV sum = (WTV)(0); + + #pragma unroll + for (int y = 0; y < YSCALE; ++y) + { + int src_index = mad24(symap_tab[y + sy], src_step, src_offset); + #pragma unroll + for (int x = 0; x < XSCALE; ++x) + sum += convertToWTV(((__global const T*)(src + src_index))[sxmap_tab[sx + x]]); + } + + ((__global T*)(dst + dst_index))[dx] = convertToT(convertToWT2V(sum) * (WT2V)(SCALE)); + } +} + +#else + +__kernel void resizeAREA(__global const uchar * src, int src_step, int src_offset, int src_rows, int src_cols, + __global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols, + float ifx, float ify, __global const int * ofs_tab, + __global const int * map_tab, __global const float * alpha_tab) +{ + int dx = get_global_id(0); + int dy = get_global_id(1); + + if (dx < dst_cols && dy < dst_rows) + { + int dst_index = mad24(dy, dst_step, dst_offset); + + __global const int * xmap_tab = map_tab; + __global const int * ymap_tab = (__global const int *)(map_tab + (src_cols << 1)); + __global const float * xalpha_tab = alpha_tab; + __global const float * yalpha_tab = (__global const float *)(alpha_tab + (src_cols << 1)); + __global const int * xofs_tab = ofs_tab; + __global const int * yofs_tab = (__global const int *)(ofs_tab + dst_cols + 1); + + int xk0 = xofs_tab[dx], xk1 = xofs_tab[dx + 1]; + int yk0 = yofs_tab[dy], yk1 = yofs_tab[dy + 1]; + + int sy0 = ymap_tab[yk0], sy1 = ymap_tab[yk1 - 1]; + int sx0 = xmap_tab[xk0], sx1 = xmap_tab[xk1 - 1]; + + WTV sum = (WTV)(0), buf; + int src_index = mad24(sy0, src_step, src_offset); + + for (int sy = sy0, yk = yk0; sy <= sy1; ++sy, src_index += src_step, ++yk) + { + WTV beta = (WTV)(yalpha_tab[yk]); + buf = (WTV)(0); + + for (int sx = sx0, xk = xk0; sx <= sx1; ++sx, ++xk) + { + WTV alpha = (WTV)(xalpha_tab[xk]); + buf += convertToWTV(((__global const T*)(src + src_index))[sx]) * alpha; + } + sum += buf * beta; + } + + ((__global T*)(dst + dst_index))[dx] = convertToT(sum); + } +} + +#endif + #endif diff --git a/modules/imgproc/test/ocl/test_warp.cpp b/modules/imgproc/test/ocl/test_warp.cpp index 80a55dc..47971aa 100644 --- a/modules/imgproc/test/ocl/test_warp.cpp +++ b/modules/imgproc/test/ocl/test_warp.cpp @@ -127,13 +127,20 @@ OCL_TEST_P(Resize, Mat) ///////////////////////////////////////////////////////////////////////////////////// -OCL_INSTANTIATE_TEST_CASE_P(ImgprocWarp, Resize, Combine( - Values(CV_8UC1, CV_8UC4, CV_16UC2, CV_32FC1, CV_32FC4), - Values(0.5, 1.5, 2.0), - Values(0.5, 1.5, 2.0), +OCL_INSTANTIATE_TEST_CASE_P(ImgprocWarpResize, Resize, Combine( + Values((MatType)CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), + Values(0.7, 0.4, 2.0), + Values(0.3, 0.6, 2.0), Values((Interpolation)INTER_NEAREST, (Interpolation)INTER_LINEAR), Bool())); +OCL_INSTANTIATE_TEST_CASE_P(ImgprocWarpResizeArea, Resize, Combine( + Values((MatType)CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), + Values(0.7, 0.4, 0.5), + Values(0.3, 0.6, 0.5), + Values((Interpolation)INTER_AREA), + Bool())); + } } // namespace cvtest::ocl #endif // HAVE_OPENCL diff --git a/modules/ts/include/opencv2/ts/ocl_test.hpp b/modules/ts/include/opencv2/ts/ocl_test.hpp index 008ce37..e109695 100644 --- a/modules/ts/include/opencv2/ts/ocl_test.hpp +++ b/modules/ts/include/opencv2/ts/ocl_test.hpp @@ -305,7 +305,7 @@ IMPLEMENT_PARAM_CLASS(Channels, int) #define OCL_ALL_DEPTHS Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F) #define OCL_ALL_CHANNELS Values(1, 2, 3, 4) -CV_ENUM(Interpolation, INTER_NEAREST, INTER_LINEAR, INTER_CUBIC) +CV_ENUM(Interpolation, INTER_NEAREST, INTER_LINEAR, INTER_CUBIC, INTER_AREA) #define OCL_INSTANTIATE_TEST_CASE_P(prefix, test_case_name, generator) \ INSTANTIATE_TEST_CASE_P(OCL_ ## prefix, test_case_name, generator)