From: Aaron Kunze Date: Mon, 14 Apr 2014 23:09:17 +0000 (-0700) Subject: Optimizes OpenCL resize and optical flow to use image extension. X-Git-Tag: submit/tizen_ivi/20141117.190038~2^2~463^2 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=ebbac3c66d6ad6f9bb1eb3932e6aad5a1c42ae98;p=profile%2Fivi%2Fopencv.git Optimizes OpenCL resize and optical flow to use image extension. --- diff --git a/modules/core/include/opencv2/core/ocl.hpp b/modules/core/include/opencv2/core/ocl.hpp index fdb6f9a..63cb926 100644 --- a/modules/core/include/opencv2/core/ocl.hpp +++ b/modules/core/include/opencv2/core/ocl.hpp @@ -151,6 +151,10 @@ public: bool imageSupport() const; + bool imageFromBufferSupport() const; + uint imagePitchAlignment() const; + uint imageBaseAddressAlignment() const; + size_t image2DMaxWidth() const; size_t image2DMaxHeight() const; @@ -602,12 +606,25 @@ class CV_EXPORTS Image2D { public: Image2D(); - explicit Image2D(const UMat &src); + + // src: The UMat from which to get image properties and data + // norm: Flag to enable the use of normalized channel data types + // alias: Flag indicating that the image should alias the src UMat. + // If true, changes to the image or src will be reflected in + // both objects. + explicit Image2D(const UMat &src, bool norm = false, bool alias = false); Image2D(const Image2D & i); ~Image2D(); Image2D & operator = (const Image2D & i); + // Indicates if creating an aliased image should succeed. Depends on the + // underlying platform and the dimensions of the UMat. + static bool canCreateAlias(const UMat &u); + + // Indicates if the image format is supported. + static bool isFormatSupported(int depth, int cn, bool norm); + void* ptr() const; protected: struct Impl; diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp index 712bbd9..c5a6b22 100644 --- a/modules/core/src/ocl.cpp +++ b/modules/core/src/ocl.cpp @@ -882,7 +882,6 @@ OCL_FUNC_P(cl_mem, clCreateImage2D, cl_int *errcode_ret), (context, flags, image_format, image_width, image_height, image_row_pitch, host_ptr, errcode_ret)) -/* OCL_FUNC(cl_int, clGetSupportedImageFormats, (cl_context context, cl_mem_flags flags, @@ -892,6 +891,7 @@ OCL_FUNC(cl_int, clGetSupportedImageFormats, cl_uint * num_image_formats), (context, flags, image_type, num_entries, image_formats, num_image_formats)) +/* OCL_FUNC(cl_int, clGetMemObjectInfo, (cl_mem memobj, cl_mem_info param_name, @@ -1912,6 +1912,38 @@ bool Device::hostUnifiedMemory() const bool Device::imageSupport() const { return p ? p->getBoolProp(CL_DEVICE_IMAGE_SUPPORT) : false; } +bool Device::imageFromBufferSupport() const +{ + bool ret = false; + if (p) + { + size_t pos = p->getStrProp(CL_DEVICE_EXTENSIONS).find("cl_khr_image2d_from_buffer"); + if (pos != String::npos) + { + ret = true; + } + } + return ret; +} + +uint Device::imagePitchAlignment() const +{ +#ifdef CL_DEVICE_IMAGE_PITCH_ALIGNMENT + return p ? p->getProp(CL_DEVICE_IMAGE_PITCH_ALIGNMENT) : 0; +#else + return 0; +#endif +} + +uint Device::imageBaseAddressAlignment() const +{ +#ifdef CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT + return p ? p->getProp(CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT) : 0; +#else + return 0; +#endif +} + size_t Device::image2DMaxWidth() const { return p ? p->getProp(CL_DEVICE_IMAGE2D_MAX_WIDTH) : 0; } @@ -2705,9 +2737,15 @@ struct Kernel::Impl haveTempDstUMats = true; } + void addImage(const Image2D& image) + { + images.push_back(image); + } + void finit() { cleanupUMats(); + images.clear(); if(e) { clReleaseEvent(e); e = 0; } release(); } @@ -2725,6 +2763,7 @@ struct Kernel::Impl enum { MAX_ARRS = 16 }; UMatData* u[MAX_ARRS]; int nu; + std::list images; bool haveTempDstUMats; }; @@ -2838,6 +2877,7 @@ int Kernel::set(int i, const void* value, size_t sz) int Kernel::set(int i, const Image2D& image2D) { + p->addImage(image2D); cl_mem h = (cl_mem)image2D.ptr(); return set(i, &h, sizeof(h)); } @@ -4408,11 +4448,11 @@ int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3, struct Image2D::Impl { - Impl(const UMat &src) + Impl(const UMat &src, bool norm, bool alias) { handle = 0; refcount = 1; - init(src); + init(src, norm, alias); } ~Impl() @@ -4421,24 +4461,55 @@ struct Image2D::Impl clReleaseMemObject(handle); } - void init(const UMat &src) + static cl_image_format getImageFormat(int depth, int cn, bool norm) { - CV_Assert(ocl::Device::getDefault().imageSupport()); - cl_image_format format; - int err, depth = src.depth(), cn = src.channels(); - CV_Assert(cn <= 4); - static const int channelTypes[] = { CL_UNSIGNED_INT8, CL_SIGNED_INT8, CL_UNSIGNED_INT16, CL_SIGNED_INT16, CL_SIGNED_INT32, CL_FLOAT, -1, -1 }; + static const int channelTypesNorm[] = { CL_UNORM_INT8, CL_SNORM_INT8, CL_UNORM_INT16, + CL_SNORM_INT16, -1, -1, -1, -1 }; static const int channelOrders[] = { -1, CL_R, CL_RG, -1, CL_RGBA }; - int channelType = channelTypes[depth], channelOrder = channelOrders[cn]; - if (channelType < 0 || channelOrder < 0) - CV_Error(Error::OpenCLApiCallError, "Image format is not supported"); - + int channelType = norm ? channelTypesNorm[depth] : channelTypes[depth]; + int channelOrder = channelOrders[cn]; format.image_channel_data_type = (cl_channel_type)channelType; format.image_channel_order = (cl_channel_order)channelOrder; + return format; + } + + static bool isFormatSupported(cl_image_format format) + { + cl_context context = (cl_context)Context::getDefault().ptr(); + // Figure out how many formats are supported by this context. + cl_uint numFormats = 0; + cl_int err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, + CL_MEM_OBJECT_IMAGE2D, numFormats, + NULL, &numFormats); + AutoBuffer formats(numFormats); + err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, + CL_MEM_OBJECT_IMAGE2D, numFormats, + formats, NULL); + CV_OclDbgAssert(err == CL_SUCCESS); + for (cl_uint i = 0; i < numFormats; ++i) + { + if (!memcmp(&formats[i], &format, sizeof(format))) + { + return true; + } + } + return false; + } + + void init(const UMat &src, bool norm, bool alias) + { + CV_Assert(ocl::Device::getDefault().imageSupport()); + + int err, depth = src.depth(), cn = src.channels(); + CV_Assert(cn <= 4); + cl_image_format format = getImageFormat(depth, cn, norm); + + if (!isFormatSupported(format)) + CV_Error(Error::OpenCLApiCallError, "Image format is not supported"); cl_context context = (cl_context)Context::getDefault().ptr(); cl_command_queue queue = (cl_command_queue)Queue::getDefault().ptr(); @@ -4448,6 +4519,7 @@ struct Image2D::Impl // run on OpenCL 1.1 platform if library binaries are compiled with OpenCL 1.2 support const Device & d = ocl::Device::getDefault(); int minor = d.deviceVersionMinor(), major = d.deviceVersionMajor(); + CV_Assert(!alias || canCreateAlias(src)); if (1 < major || (1 == major && 2 <= minor)) { cl_image_desc desc; @@ -4456,9 +4528,9 @@ struct Image2D::Impl desc.image_height = src.rows; desc.image_depth = 0; desc.image_array_size = 1; - desc.image_row_pitch = 0; + desc.image_row_pitch = alias ? src.step[0] : 0; desc.image_slice_pitch = 0; - desc.buffer = NULL; + desc.buffer = alias ? (cl_mem)src.handle(ACCESS_RW) : 0; desc.num_mip_levels = 0; desc.num_samples = 0; handle = clCreateImage(context, CL_MEM_READ_WRITE, &format, &desc, NULL, &err); @@ -4467,6 +4539,7 @@ struct Image2D::Impl #endif { CV_SUPPRESS_DEPRECATED_START + CV_Assert(!alias); // This is an OpenCL 1.2 extension handle = clCreateImage2D(context, CL_MEM_READ_WRITE, &format, src.cols, src.rows, 0, NULL, &err); CV_SUPPRESS_DEPRECATED_END } @@ -4476,7 +4549,7 @@ struct Image2D::Impl size_t region[] = { src.cols, src.rows, 1 }; cl_mem devData; - if (!src.isContinuous()) + if (!alias && !src.isContinuous()) { devData = clCreateBuffer(context, CL_MEM_READ_ONLY, src.cols * src.rows * src.elemSize(), NULL, &err); CV_OclDbgAssert(err == CL_SUCCESS); @@ -4487,14 +4560,19 @@ struct Image2D::Impl CV_OclDbgAssert(clFlush(queue) == CL_SUCCESS); } else + { devData = (cl_mem)src.handle(ACCESS_READ); + } CV_Assert(devData != NULL); - CV_OclDbgAssert(clEnqueueCopyBufferToImage(queue, devData, handle, 0, origin, region, 0, NULL, 0) == CL_SUCCESS); - if (!src.isContinuous()) + if (!alias) { - CV_OclDbgAssert(clFlush(queue) == CL_SUCCESS); - CV_OclDbgAssert(clReleaseMemObject(devData) == CL_SUCCESS); + CV_OclDbgAssert(clEnqueueCopyBufferToImage(queue, devData, handle, 0, origin, region, 0, NULL, 0) == CL_SUCCESS); + if (!src.isContinuous()) + { + CV_OclDbgAssert(clFlush(queue) == CL_SUCCESS); + CV_OclDbgAssert(clReleaseMemObject(devData) == CL_SUCCESS); + } } } @@ -4508,9 +4586,37 @@ Image2D::Image2D() p = NULL; } -Image2D::Image2D(const UMat &src) +Image2D::Image2D(const UMat &src, bool norm, bool alias) { - p = new Impl(src); + p = new Impl(src, norm, alias); +} + +bool Image2D::canCreateAlias(const UMat &m) +{ + bool ret = false; + const Device & d = ocl::Device::getDefault(); + if (d.imageFromBufferSupport()) + { + // This is the required pitch alignment in pixels + uint pitchAlign = d.imagePitchAlignment(); + if (pitchAlign && !(m.step % (pitchAlign * m.elemSize()))) + { + // We don't currently handle the case where the buffer was created + // with CL_MEM_USE_HOST_PTR + if (!m.u->tempUMat()) + { + ret = true; + } + } + } + return ret; +} + +bool Image2D::isFormatSupported(int depth, int cn, bool norm) +{ + cl_image_format format = Impl::getImageFormat(depth, cn, norm); + + return Impl::isFormatSupported(format); } Image2D::Image2D(const Image2D & i) diff --git a/modules/imgproc/src/imgwarp.cpp b/modules/imgproc/src/imgwarp.cpp index 45a66bd..03a9d79 100644 --- a/modules/imgproc/src/imgwarp.cpp +++ b/modules/imgproc/src/imgwarp.cpp @@ -2037,15 +2037,6 @@ static void ocl_computeResizeAreaTabs(int ssize, int dsize, double scale, int * ofs_tab[dx] = k; } -static void ocl_computeResizeAreaFastTabs(int * dmap_tab, int * smap_tab, int scale, int dcols, int scol) -{ - for (int i = 0; i < dcols; ++i) - dmap_tab[i] = scale * i; - - for (int i = 0, size = dcols * scale; i < size; ++i) - smap_tab[i] = std::min(scol - 1, i); -} - static bool ocl_resize( InputArray _src, OutputArray _dst, Size dsize, double fx, double fy, int interpolation) { @@ -2075,7 +2066,39 @@ static bool ocl_resize( InputArray _src, OutputArray _dst, Size dsize, ocl::Kernel k; size_t globalsize[] = { dst.cols, dst.rows }; - if (interpolation == INTER_LINEAR) + ocl::Image2D srcImage; + + // See if this could be done with a sampler. We stick with integer + // datatypes because the observed error is low. + bool useSampler = (interpolation == INTER_LINEAR && ocl::Device::getDefault().imageSupport() && + ocl::Image2D::canCreateAlias(src) && depth <= 4 && + ocl::Image2D::isFormatSupported(depth, cn, true)); + if (useSampler) + { + int wdepth = std::max(depth, CV_32S); + char buf[2][32]; + cv::String compileOpts = format("-D USE_SAMPLER -D depth=%d -D T=%s -D T1=%s " + "-D convertToDT=%s -D cn=%d", + depth, ocl::typeToStr(type), ocl::typeToStr(depth), + ocl::convertTypeStr(wdepth, depth, cn, buf[1]), + cn); + k.create("resizeSampler", ocl::imgproc::resize_oclsrc, compileOpts); + + if(k.empty()) + { + useSampler = false; + } + else + { + // Convert the input into an OpenCL image type, using normalized channel data types + // and aliasing the UMat. + srcImage = ocl::Image2D(src, true, true); + k.args(srcImage, ocl::KernelArg::WriteOnly(dst), + (float)inv_fx, (float)inv_fy); + } + } + + if (interpolation == INTER_LINEAR && !useSampler) { char buf[2][32]; @@ -2180,25 +2203,14 @@ static bool ocl_resize( InputArray _src, OutputArray _dst, Size dsize, { int wdepth2 = std::max(CV_32F, depth), wtype2 = CV_MAKE_TYPE(wdepth2, cn); buildOption = buildOption + format(" -D convertToT=%s -D WT2V=%s -D convertToWT2V=%s -D INTER_AREA_FAST" - " -D XSCALE=%d -D YSCALE=%d -D SCALE=%ff", - ocl::convertTypeStr(wdepth2, depth, cn, cvt[0]), - ocl::typeToStr(wtype2), ocl::convertTypeStr(wdepth, wdepth2, cn, cvt[1]), - iscale_x, iscale_y, 1.0f / (iscale_x * iscale_y)); + " -D XSCALE=%d -D YSCALE=%d -D SCALE=%ff", + ocl::convertTypeStr(wdepth2, depth, cn, cvt[0]), + ocl::typeToStr(wtype2), ocl::convertTypeStr(wdepth, wdepth2, cn, cvt[1]), + iscale_x, iscale_y, 1.0f / (iscale_x * iscale_y)); k.create("resizeAREA_FAST", ocl::imgproc::resize_oclsrc, buildOption); if (k.empty()) return false; - - int smap_tab_size = dst.cols * iscale_x + dst.rows * iscale_y; - AutoBuffer dmap_tab(dst.cols + dst.rows), smap_tab(smap_tab_size); - int * dxmap_tab = dmap_tab, * dymap_tab = dxmap_tab + dst.cols; - int * sxmap_tab = smap_tab, * symap_tab = smap_tab + dst.cols * iscale_y; - - ocl_computeResizeAreaFastTabs(dxmap_tab, sxmap_tab, iscale_x, dst.cols, src.cols); - ocl_computeResizeAreaFastTabs(dymap_tab, symap_tab, iscale_y, dst.rows, src.rows); - - Mat(1, dst.cols + dst.rows, CV_32SC1, (void *)dmap_tab).copyTo(dmap); - Mat(1, smap_tab_size, CV_32SC1, (void *)smap_tab).copyTo(smap); } else { @@ -2228,7 +2240,7 @@ static bool ocl_resize( InputArray _src, OutputArray _dst, Size dsize, ocl::KernelArg srcarg = ocl::KernelArg::ReadOnly(src), dstarg = ocl::KernelArg::WriteOnly(dst); if (is_area_fast) - k.args(srcarg, dstarg, ocl::KernelArg::PtrReadOnly(dmap), ocl::KernelArg::PtrReadOnly(smap)); + k.args(srcarg, dstarg); else k.args(srcarg, dstarg, inv_fxf, inv_fyf, ocl::KernelArg::PtrReadOnly(tabofsOcl), ocl::KernelArg::PtrReadOnly(mapOcl), ocl::KernelArg::PtrReadOnly(alphaOcl)); diff --git a/modules/imgproc/src/opencl/resize.cl b/modules/imgproc/src/opencl/resize.cl index d656bf6..55ef069 100644 --- a/modules/imgproc/src/opencl/resize.cl +++ b/modules/imgproc/src/opencl/resize.cl @@ -67,7 +67,64 @@ #define TSIZE (int)sizeof(T1)*cn #endif -#ifdef INTER_LINEAR_INTEGER +#if defined USE_SAMPLER + +#if cn == 1 +#define READ_IMAGE(X,Y,Z) read_imagef(X,Y,Z).x +#elif cn == 2 +#define READ_IMAGE(X,Y,Z) read_imagef(X,Y,Z).xy +#elif cn == 3 +#define READ_IMAGE(X,Y,Z) read_imagef(X,Y,Z).xyz +#elif cn == 4 +#define READ_IMAGE(X,Y,Z) read_imagef(X,Y,Z) +#endif + +#define __CAT(x, y) x##y +#define CAT(x, y) __CAT(x, y) +#define INTERMEDIATE_TYPE CAT(float, cn) +#define float1 float + +#if depth == 0 +#define RESULT_SCALE 255.0f +#elif depth == 1 +#define RESULT_SCALE 127.0f +#elif depth == 2 +#define RESULT_SCALE 65535.0f +#elif depth == 3 +#define RESULT_SCALE 32767.0f +#else +#define RESULT_SCALE 1.0f +#endif + +__kernel void resizeSampler(__read_only image2d_t srcImage, + __global uchar* dstptr, int dststep, int dstoffset, + int dstrows, int dstcols, + float ifx, float ify) +{ + const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | + CLK_ADDRESS_CLAMP_TO_EDGE | + CLK_FILTER_LINEAR; + + int dx = get_global_id(0); + int dy = get_global_id(1); + + float sx = ((dx+0.5f) * ifx), sy = ((dy+0.5f) * ify); + + INTERMEDIATE_TYPE intermediate = READ_IMAGE(srcImage, sampler, (float2)(sx, sy)); + +#if depth <= 4 + T uval = convertToDT(round(intermediate * RESULT_SCALE)); +#else + T uval = convertToDT(intermediate * RESULT_SCALE); +#endif + + if(dx < dstcols && dy < dstrows) + { + storepix(uval, dstptr + mad24(dy, dststep, dstoffset + dx*TSIZE)); + } +} + +#elif defined INTER_LINEAR_INTEGER __kernel void resizeLN(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols, __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols, @@ -185,8 +242,7 @@ __kernel void resizeNN(__global const uchar * srcptr, int src_step, int src_offs #ifdef INTER_AREA_FAST __kernel void resizeAREA_FAST(__global const uchar * src, int src_step, int src_offset, int src_rows, int src_cols, - __global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols, - __global const int * dmap_tab, __global const int * smap_tab) + __global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols) { int dx = get_global_id(0); int dy = get_global_id(1); @@ -195,21 +251,21 @@ __kernel void resizeAREA_FAST(__global const uchar * src, int src_step, int src_ { int dst_index = mad24(dy, dst_step, dst_offset); - __global const int * xmap_tab = dmap_tab; - __global const int * ymap_tab = dmap_tab + dst_cols; - __global const int * sxmap_tab = smap_tab; - __global const int * symap_tab = smap_tab + XSCALE * dst_cols; - - int sx = xmap_tab[dx], sy = ymap_tab[dy]; + int sx = XSCALE * dx; + int sy = YSCALE * dy; WTV sum = (WTV)(0); #pragma unroll - for (int y = 0; y < YSCALE; ++y) + for (int py = 0; py < YSCALE; ++py) { - int src_index = mad24(symap_tab[y + sy], src_step, src_offset); + int y = min(sy + py, src_rows - 1); + int src_index = mad24(y, src_step, src_offset); #pragma unroll - for (int x = 0; x < XSCALE; ++x) - sum += convertToWTV(loadpix(src + mad24(sxmap_tab[sx + x], TSIZE, src_index))); + for (int px = 0; px < XSCALE; ++px) + { + int x = min(sx + px, src_cols - 1); + sum += convertToWTV(loadpix(src + src_index + x*TSIZE)); + } } storepix(convertToT(convertToWT2V(sum) * (WT2V)(SCALE)), dst + mad24(dx, TSIZE, dst_index)); diff --git a/modules/imgproc/test/ocl/test_warp.cpp b/modules/imgproc/test/ocl/test_warp.cpp index f9ccef8..d59cf75 100644 --- a/modules/imgproc/test/ocl/test_warp.cpp +++ b/modules/imgproc/test/ocl/test_warp.cpp @@ -158,9 +158,10 @@ OCL_TEST_P(WarpPerspective, Mat) ///////////////////////////////////////////////////////////////////////////////////////////////// //// resize -PARAM_TEST_CASE(Resize, MatType, double, double, Interpolation, bool) +PARAM_TEST_CASE(Resize, MatType, double, double, Interpolation, bool, int) { int type, interpolation; + int widthMultiple; double fx, fy; bool useRoi; @@ -174,6 +175,7 @@ PARAM_TEST_CASE(Resize, MatType, double, double, Interpolation, bool) fy = GET_PARAM(2); interpolation = GET_PARAM(3); useRoi = GET_PARAM(4); + widthMultiple = GET_PARAM(5); } void random_roi() @@ -181,6 +183,9 @@ PARAM_TEST_CASE(Resize, MatType, double, double, Interpolation, bool) CV_Assert(fx > 0 && fy > 0); Size srcRoiSize = randomSize(1, MAX_VALUE), dstRoiSize; + // Make sure the width is a multiple of the requested value, and no more + srcRoiSize.width &= ~((widthMultiple * 2) - 1); + srcRoiSize.width += widthMultiple; dstRoiSize.width = cvRound(srcRoiSize.width * fx); dstRoiSize.height = cvRound(srcRoiSize.height * fy); @@ -334,14 +339,16 @@ OCL_INSTANTIATE_TEST_CASE_P(ImgprocWarp, Resize, Combine( Values(0.5, 1.5, 2.0, 0.2), Values(0.5, 1.5, 2.0, 0.2), Values((Interpolation)INTER_NEAREST, (Interpolation)INTER_LINEAR), - Bool())); + Bool(), + Values(1, 16))); OCL_INSTANTIATE_TEST_CASE_P(ImgprocWarpResizeArea, Resize, Combine( Values((MatType)CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(0.7, 0.4, 0.5), Values(0.3, 0.6, 0.5), Values((Interpolation)INTER_AREA), - Bool())); + Bool(), + Values(1, 16))); OCL_INSTANTIATE_TEST_CASE_P(ImgprocWarp, Remap_INTER_LINEAR, Combine( Values(CV_8U, CV_16U, CV_32F), diff --git a/modules/video/src/lkpyramid.cpp b/modules/video/src/lkpyramid.cpp index 5b653c9..4f0f313 100644 --- a/modules/video/src/lkpyramid.cpp +++ b/modules/video/src/lkpyramid.cpp @@ -890,6 +890,26 @@ namespace cv std::vector prevPyr; prevPyr.resize(maxLevel + 1); std::vector nextPyr; nextPyr.resize(maxLevel + 1); + // allocate buffers with aligned pitch to be able to use cl_khr_image2d_from_buffer extention + // This is the required pitch alignment in pixels + int pitchAlign = (int)ocl::Device::getDefault().imagePitchAlignment(); + if (pitchAlign>0) + { + prevPyr[0] = UMat(prevImg.rows,(prevImg.cols+pitchAlign-1)&(-pitchAlign),prevImg.type()).colRange(0,prevImg.cols); + nextPyr[0] = UMat(nextImg.rows,(nextImg.cols+pitchAlign-1)&(-pitchAlign),nextImg.type()).colRange(0,nextImg.cols); + for (int level = 1; level <= maxLevel; ++level) + { + int cols,rows; + // allocate buffers with aligned pitch to be able to use image on buffer extention + cols = (prevPyr[level - 1].cols+1)/2; + rows = (prevPyr[level - 1].rows+1)/2; + prevPyr[level] = UMat(rows,(cols+pitchAlign-1)&(-pitchAlign),prevPyr[level-1].type()).colRange(0,cols); + cols = (nextPyr[level - 1].cols+1)/2; + rows = (nextPyr[level - 1].rows+1)/2; + nextPyr[level] = UMat(rows,(cols+pitchAlign-1)&(-pitchAlign),nextPyr[level-1].type()).colRange(0,cols); + } + } + prevImg.convertTo(prevPyr[0], CV_32F); nextImg.convertTo(nextPyr[0], CV_32F); @@ -969,8 +989,10 @@ namespace cv if (!kernel.create("lkSparse", cv::ocl::video::pyrlk_oclsrc, build_options)) return false; - ocl::Image2D imageI(I); - ocl::Image2D imageJ(J); + CV_Assert(I.depth() == CV_32F && J.depth() == CV_32F); + ocl::Image2D imageI(I, false, ocl::Image2D::canCreateAlias(I)); + ocl::Image2D imageJ(J, false, ocl::Image2D::canCreateAlias(J)); + int idxArg = 0; idxArg = kernel.set(idxArg, imageI); //image2d_t I idxArg = kernel.set(idxArg, imageJ); //image2d_t J @@ -1070,7 +1092,9 @@ void cv::calcOpticalFlowPyrLK( InputArray _prevImg, InputArray _nextImg, TermCriteria criteria, int flags, double minEigThreshold ) { - bool use_opencl = ocl::useOpenCL() && (_prevImg.isUMat() || _nextImg.isUMat()); + bool use_opencl = ocl::useOpenCL() && + (_prevImg.isUMat() || _nextImg.isUMat()) && + ocl::Image2D::isFormatSupported(CV_32F, 1, false); if ( use_opencl && ocl_calcOpticalFlowPyrLK(_prevImg, _nextImg, _prevPts, _nextPts, _status, _err, winSize, maxLevel, criteria, flags/*, minEigThreshold*/)) return; diff --git a/modules/video/src/tvl1flow.cpp b/modules/video/src/tvl1flow.cpp index fad73ef..914f09c 100644 --- a/modules/video/src/tvl1flow.cpp +++ b/modules/video/src/tvl1flow.cpp @@ -352,7 +352,9 @@ OpticalFlowDual_TVL1::OpticalFlowDual_TVL1() void OpticalFlowDual_TVL1::calc(InputArray _I0, InputArray _I1, InputOutputArray _flow) { - CV_OCL_RUN(_flow.isUMat(), calc_ocl(_I0, _I1, _flow)) + CV_OCL_RUN(_flow.isUMat() && + ocl::Image2D::isFormatSupported(CV_32F, 1, false), + calc_ocl(_I0, _I1, _flow)) Mat I0 = _I0.getMat(); Mat I1 = _I1.getMat();