From f55c85fed38bb117f83c8b50c084d0305b6b4e06 Mon Sep 17 00:00:00 2001 From: Konstantin Matskevich Date: Wed, 18 Dec 2013 09:37:57 +0400 Subject: [PATCH] morphology --- modules/imgproc/src/morph.cpp | 221 ++++++++++++++++++++++++++---- modules/imgproc/src/opencl/morph.cl | 125 +++++++++++++++++ modules/imgproc/test/ocl/test_filters.cpp | 94 +++++++++++++ 3 files changed, 412 insertions(+), 28 deletions(-) create mode 100644 modules/imgproc/src/opencl/morph.cl diff --git a/modules/imgproc/src/morph.cpp b/modules/imgproc/src/morph.cpp index 845e001..6be60dc 100644 --- a/modules/imgproc/src/morph.cpp +++ b/modules/imgproc/src/morph.cpp @@ -43,6 +43,7 @@ #include "precomp.hpp" #include #include +#include "opencl_kernels.hpp" /****************************************************************************************\ Basic Morphological Operations: Erosion & Dilation @@ -1283,11 +1284,124 @@ static bool IPPMorphOp(int op, InputArray _src, OutputArray _dst, } #endif +static const char* op2str[] = {"ERODE", "DILATE"}; + +static bool ocl_morphology_op(InputArray _src, OutputArray _dst, InputArray _kernel, Size &ksize, const Point anchor, int iterations, int op) +{ + bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; + + if (_src.depth() == CV_64F && !doubleSupport) + return false; + + UMat kernel8U; + _kernel.getUMat().convertTo(kernel8U, CV_8U); + UMat kernel = kernel8U.reshape(1, 1); + + bool rectKernel = true; + for(int i = 0; i < kernel.rows * kernel.cols; ++i) + if(kernel.getMat(ACCESS_READ).at(i) != 1) + rectKernel = false; + + UMat src = _src.getUMat(); + +#ifdef ANDROID + size_t localThreads[3] = {16, 8, 1}; +#else + size_t localThreads[3] = {16, 16, 1}; +#endif + size_t globalThreads[3] = {(src.cols + localThreads[0] - 1) / localThreads[0] *localThreads[0], (src.rows + localThreads[1] - 1) / localThreads[1] *localThreads[1], 1}; + + if(localThreads[0]*localThreads[1] * 2 < (localThreads[0] + ksize.width - 1) * (localThreads[1] + ksize.height - 1)) + return false; + + char s[64]; + + switch (src.type()) + { + case CV_8UC1: + sprintf(s, "-D VAL=%s -D GENTYPE=uchar", (op==MORPH_ERODE) ? "255" : "0"); + break; + case CV_8UC4: + sprintf(s, "-D VAL=%s -D GENTYPE=uchar4", (op==MORPH_ERODE) ? "255" : "0"); + break; + case CV_32FC1: + sprintf(s, "-D VAL=%s -D GENTYPE=float", (op==MORPH_ERODE) ? "FLT_MAX" : "-FLT_MAX"); + break; + case CV_32FC4: + sprintf(s, "-D VAL=%s -D GENTYPE=float4", (op==MORPH_ERODE) ? "FLT_MAX" : "-FLT_MAX"); + break; + case CV_64FC1: + sprintf(s, "-D VAL=%s -D GENTYPE=double", (op==MORPH_ERODE) ? "DBL_MAX" : "-DBL_MAX"); + break; + case CV_64FC4: + sprintf(s, "-D VAL=%s -D GENTYPE=double4", (op==MORPH_ERODE) ? "DBL_MAX" : "-DBL_MAX"); + break; + default: + CV_Error(Error::StsUnsupportedFormat, "unsupported type"); + } + + char compile_option[128]; + sprintf(compile_option, "-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D %s %s %s %s", + anchor.x, anchor.y, (int)localThreads[0], (int)localThreads[1], op2str[op], doubleSupport?"-D DOUBLE_SUPPORT" :"", rectKernel?"-D RECTKERNEL":"", s); + + ocl::Kernel k( "morph", ocl::imgproc::morph_oclsrc, compile_option); + if (k.empty()) + return false; + + _dst.create(src.size(), src.type()); + UMat dst = _dst.getUMat(); + + for(int i = 0; i< iterations; i++) + { + UMat source; + Size wholesize; + Point ofs; + if( i == 0) + source = src; + else + { + int cols = dst.cols, rows = dst.rows; + dst.locateROI(wholesize,ofs); + dst.adjustROI(ofs.y, wholesize.height - rows - ofs.y, ofs.x, wholesize.width - cols - ofs.x); + dst.copyTo(source); + dst.adjustROI(-ofs.y, -wholesize.height + rows + ofs.y, -ofs.x, -wholesize.width + cols + ofs.x); + source.adjustROI(-ofs.y, -wholesize.height + rows + ofs.y, -ofs.x, -wholesize.width + cols + ofs.x); + } + + source.locateROI(wholesize, ofs); + int wholecols = wholesize.width, wholerows = wholesize.height; + + int idxArg = 0; + idxArg = k.set(idxArg, ocl::KernelArg::PtrReadOnly(source)); + idxArg = k.set(idxArg, ocl::KernelArg::PtrWriteOnly(dst)); + idxArg = k.set(idxArg, (int)( (source.offset / source.elemSize())%(source.step / source.elemSize()) ) ); + idxArg = k.set(idxArg, (int)( (source.offset / source.elemSize())/(source.step / source.elemSize()) ) ); + idxArg = k.set(idxArg, source.cols); + idxArg = k.set(idxArg, source.rows); + idxArg = k.set(idxArg, (int)(source.step / source.elemSize())); + idxArg = k.set(idxArg, (int)(dst.step / dst.elemSize())); + idxArg = k.set(idxArg, ocl::KernelArg::PtrReadOnly(kernel)); + idxArg = k.set(idxArg, wholecols); + idxArg = k.set(idxArg, wholerows); + idxArg = k.set(idxArg, (int)( dst.offset / dst.elemSize() ) ); + + if (!k.run(2, globalThreads, localThreads, true)) + return false; + } + return true; +} + static void morphOp( int op, InputArray _src, OutputArray _dst, InputArray _kernel, Point anchor, int iterations, int borderType, const Scalar& borderValue ) { + bool useOpenCL = cv::ocl::useOpenCL() && _src.isUMat() && _src.size() == _dst.size() && _src.channels() == _dst.channels() && + _src.dims()<=2 && (_src.channels() == 1 || _src.channels() == 4) && (anchor.x == -1) && (anchor.y == -1) && + (_src.depth() == CV_8U || _src.depth() == CV_32F || _src.depth() == CV_64F ) && + (borderType == cv::BORDER_CONSTANT) && (borderValue == morphologyDefaultBorderValue()) && + (op == MORPH_ERODE || op == MORPH_DILATE); + Mat kernel = _kernel.getMat(); Size ksize = kernel.data ? kernel.size() : Size(3,3); anchor = normalizeAnchor(anchor, ksize); @@ -1299,13 +1413,11 @@ static void morphOp( int op, InputArray _src, OutputArray _dst, return; #endif - Mat src = _src.getMat(); - - _dst.create( src.size(), src.type() ); - Mat dst = _dst.getMat(); - if( iterations == 0 || kernel.rows*kernel.cols == 1 ) { + Mat src = _src.getMat(); + _dst.create( src.size(), src.type() ); + Mat dst = _dst.getMat(); src.copyTo(dst); return; } @@ -1326,6 +1438,14 @@ static void morphOp( int op, InputArray _src, OutputArray _dst, iterations = 1; } + if (useOpenCL && ocl_morphology_op(_src, _dst, kernel, ksize, anchor, iterations, op) ) + return; + + Mat src = _src.getMat(); + + _dst.create( src.size(), src.type() ); + Mat dst = _dst.getMat(); + int nStripes = 1; #if defined HAVE_TEGRA_OPTIMIZATION if (src.data != dst.data && iterations == 1 && //NOTE: threads are not used for inplace processing @@ -1362,49 +1482,94 @@ void cv::dilate( InputArray src, OutputArray dst, InputArray kernel, morphOp( MORPH_DILATE, src, dst, kernel, anchor, iterations, borderType, borderValue ); } - void cv::morphologyEx( InputArray _src, OutputArray _dst, int op, InputArray kernel, Point anchor, int iterations, int borderType, const Scalar& borderValue ) { - Mat src = _src.getMat(), temp; - _dst.create(src.size(), src.type()); - Mat dst = _dst.getMat(); + bool use_opencl = cv::ocl::useOpenCL() && _src.isUMat() && _src.size() == _dst.size() && _src.channels() == _dst.channels() && + _src.dims()<=2 && (_src.channels() == 1 || _src.channels() == 4) && (anchor.x == -1) && (anchor.y == -1) && + (_src.depth() == CV_8U || _src.depth() == CV_32F || _src.depth() == CV_64F ) && + (borderType == cv::BORDER_CONSTANT) && (borderValue == morphologyDefaultBorderValue()); + + _dst.create(_src.size(), _src.type()); + Mat src, dst, temp; + UMat usrc, udst, utemp; switch( op ) { case MORPH_ERODE: - erode( src, dst, kernel, anchor, iterations, borderType, borderValue ); + erode( _src, _dst, kernel, anchor, iterations, borderType, borderValue ); break; case MORPH_DILATE: - dilate( src, dst, kernel, anchor, iterations, borderType, borderValue ); + dilate( _src, _dst, kernel, anchor, iterations, borderType, borderValue ); break; case MORPH_OPEN: - erode( src, dst, kernel, anchor, iterations, borderType, borderValue ); - dilate( dst, dst, kernel, anchor, iterations, borderType, borderValue ); + erode( _src, _dst, kernel, anchor, iterations, borderType, borderValue ); + dilate( _dst, _dst, kernel, anchor, iterations, borderType, borderValue ); break; case CV_MOP_CLOSE: - dilate( src, dst, kernel, anchor, iterations, borderType, borderValue ); - erode( dst, dst, kernel, anchor, iterations, borderType, borderValue ); + dilate( _src, _dst, kernel, anchor, iterations, borderType, borderValue ); + erode( _dst, _dst, kernel, anchor, iterations, borderType, borderValue ); break; case CV_MOP_GRADIENT: - erode( src, temp, kernel, anchor, iterations, borderType, borderValue ); - dilate( src, dst, kernel, anchor, iterations, borderType, borderValue ); - dst -= temp; + erode( _src, use_opencl ? (cv::OutputArray)utemp : (cv::OutputArray)temp, kernel, anchor, iterations, borderType, borderValue ); + dilate( _src, _dst, kernel, anchor, iterations, borderType, borderValue ); + if(use_opencl) + { + udst = _dst.getUMat(); + subtract(udst, utemp, udst); + } + else + { + dst = _dst.getMat(); + dst -= temp; + } break; case CV_MOP_TOPHAT: - if( src.data != dst.data ) - temp = dst; - erode( src, temp, kernel, anchor, iterations, borderType, borderValue ); - dilate( temp, temp, kernel, anchor, iterations, borderType, borderValue ); - dst = src - temp; + if(use_opencl) + { + usrc = _src.getUMat(); + udst = _dst.getUMat(); + if( usrc.u != udst.u ) + utemp = udst; + } + else + { + src = _src.getMat(); + dst = _dst.getMat(); + if( src.data != dst.data ) + temp = dst; + } + erode( _src, use_opencl ? (cv::OutputArray)utemp : (cv::OutputArray)temp, kernel, anchor, iterations, borderType, borderValue ); + dilate( use_opencl ? (cv::OutputArray)utemp : (cv::OutputArray)temp, use_opencl ? (cv::OutputArray)utemp : (cv::OutputArray)temp, kernel, + anchor, iterations, borderType, borderValue ); + if(use_opencl) + subtract(usrc, utemp, udst); + else + dst = src - temp; break; case CV_MOP_BLACKHAT: - if( src.data != dst.data ) - temp = dst; - dilate( src, temp, kernel, anchor, iterations, borderType, borderValue ); - erode( temp, temp, kernel, anchor, iterations, borderType, borderValue ); - dst = temp - src; + if(use_opencl) + { + usrc = _src.getUMat(); + udst = _dst.getUMat(); + if( usrc.u != udst.u ) + utemp = udst; + } + else + { + src = _src.getMat(); + dst = _dst.getMat(); + if( src.data != dst.data ) + temp = dst; + } + dilate( _src, use_opencl ? (cv::OutputArray)utemp : (cv::OutputArray)temp, kernel, anchor, iterations, borderType, borderValue ); + erode( use_opencl ? (cv::OutputArray)utemp : (cv::OutputArray)temp, use_opencl ? (cv::OutputArray)utemp : (cv::OutputArray)temp, kernel, + anchor, iterations, borderType, borderValue ); + if(use_opencl) + subtract(utemp, usrc, udst); + else + dst = temp - src; break; default: CV_Error( CV_StsBadArg, "unknown morphological operation" ); diff --git a/modules/imgproc/src/opencl/morph.cl b/modules/imgproc/src/opencl/morph.cl new file mode 100644 index 0000000..69257ac --- /dev/null +++ b/modules/imgproc/src/opencl/morph.cl @@ -0,0 +1,125 @@ +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Niko Li, newlife20080214@gmail.com +// Zero Lin, zero.lin@amd.com +// Yao Wang, bitwangyaoyao@gmail.com +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +// + +#ifdef DOUBLE_SUPPORT +#ifdef cl_amd_fp64 +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#elif defined (cl_khr_fp64) +#pragma OPENCL EXTENSION cl_khr_fp64:enable +#endif +#endif + +#ifdef ERODE +#define MORPH_OP(A,B) min((A),(B)) +#endif +#ifdef DILATE +#define MORPH_OP(A,B) max((A),(B)) +#endif +//BORDER_CONSTANT: iiiiii|abcdefgh|iiiiiii +#define ELEM(i,l_edge,r_edge,elem1,elem2) (i)<(l_edge) | (i) >= (r_edge) ? (elem1) : (elem2) + +__kernel void morph(__global const GENTYPE * restrict src, + __global GENTYPE *dst, + int src_offset_x, int src_offset_y, + int cols, int rows, + int src_step_in_pixel, int dst_step_in_pixel, + __constant uchar * mat_kernel, + int src_whole_cols, int src_whole_rows, + int dst_offset_in_pixel) +{ + int l_x = get_local_id(0); + int l_y = get_local_id(1); + int x = get_group_id(0)*LSIZE0; + int y = get_group_id(1)*LSIZE1; + int start_x = x+src_offset_x-RADIUSX; + int end_x = x + src_offset_x+LSIZE0+RADIUSX; + int width = end_x -(x+src_offset_x-RADIUSX)+1; + int start_y = y+src_offset_y-RADIUSY; + int point1 = mad24(l_y,LSIZE0,l_x); + int point2 = point1 + LSIZE0*LSIZE1; + int tl_x = point1 % width; + int tl_y = point1 / width; + int tl_x2 = point2 % width; + int tl_y2 = point2 / width; + int cur_x = start_x + tl_x; + int cur_y = start_y + tl_y; + int cur_x2 = start_x + tl_x2; + int cur_y2 = start_y + tl_y2; + int start_addr = mad24(cur_y,src_step_in_pixel,cur_x); + int start_addr2 = mad24(cur_y2,src_step_in_pixel,cur_x2); + GENTYPE temp0,temp1; + __local GENTYPE LDS_DAT[2*LSIZE1*LSIZE0]; + + int end_addr = mad24(src_whole_rows - 1,src_step_in_pixel,src_whole_cols); + //read pixels from src + start_addr = ((start_addr < end_addr) && (start_addr > 0)) ? start_addr : 0; + start_addr2 = ((start_addr2 < end_addr) && (start_addr2 > 0)) ? start_addr2 : 0; + temp0 = src[start_addr]; + temp1 = src[start_addr2]; + //judge if read out of boundary + temp0= ELEM(cur_x,0,src_whole_cols,(GENTYPE)VAL,temp0); + temp0= ELEM(cur_y,0,src_whole_rows,(GENTYPE)VAL,temp0); + + temp1= ELEM(cur_x2,0,src_whole_cols,(GENTYPE)VAL,temp1); + temp1= ELEM(cur_y2,0,src_whole_rows,(GENTYPE)VAL,temp1); + + LDS_DAT[point1] = temp0; + LDS_DAT[point2] = temp1; + barrier(CLK_LOCAL_MEM_FENCE); + GENTYPE res = (GENTYPE)VAL; + for(int i=0; i<2*RADIUSY+1; i++) + for(int j=0; j<2*RADIUSX+1; j++) + { + res = +#ifndef RECTKERNEL + mat_kernel[i*(2*RADIUSX+1)+j] ? +#endif + MORPH_OP(res,LDS_DAT[mad24(l_y+i,width,l_x+j)]) +#ifndef RECTKERNEL + :res +#endif + ; + } + int gidx = get_global_id(0); + int gidy = get_global_id(1); + int out_addr = mad24(gidy,dst_step_in_pixel,gidx+dst_offset_in_pixel); + if(gidx