X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=modules%2Focl%2Fsrc%2Fmatch_template.cpp;h=c95ad84215757dceabdf2737447bec34efa67845;hb=aacf188e837324b5e35dac9d2accaa332a83d346;hp=7c0a7ac5dbdbebcc52db91a7f0e2f4bc7f5e9ad9;hpb=43aec5ad4ad4460ad588d674263920df8c1ecb2e;p=profile%2Fivi%2Fopencv.git diff --git a/modules/ocl/src/match_template.cpp b/modules/ocl/src/match_template.cpp index 7c0a7ac..c95ad84 100644 --- a/modules/ocl/src/match_template.cpp +++ b/modules/ocl/src/match_template.cpp @@ -25,7 +25,7 @@ // // * Redistribution's in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation -// and/or other oclMaterials provided with the distribution. +// and/or other materials provided with the distribution. // // * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. @@ -44,22 +44,11 @@ //M*/ -#include #include "precomp.hpp" +#include "opencl_kernels.hpp" using namespace cv; using namespace cv::ocl; -using namespace std; - -//helper routines -namespace cv -{ - namespace ocl - { - ///////////////////////////OpenCL kernel strings/////////////////////////// - extern const char *match_template; - } -} namespace cv { @@ -99,11 +88,25 @@ namespace cv // Evaluates optimal template's area threshold. If // template's area is less than the threshold, we use naive match // template version, otherwise FFT-based (if available) - static bool useNaive(int , int , Size ) + static bool useNaive(int method, int depth, Size size) { - // FIXME! - // always use naive until convolve is imported +#ifdef HAVE_CLAMDFFT + if (method == TM_SQDIFF && (depth == CV_32F || !Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE))) + { + return true; + } + else if(method == TM_CCORR || (method == TM_SQDIFF && depth == CV_8U)) + { + return size.height < 18 && size.width < 18; + } + else + return false; +#else +#define UNUSED(x) (void)(x); + UNUSED(method) UNUSED(depth) UNUSED(size) +#undef UNUSED return true; +#endif } ////////////////////////////////////////////////////////////////////// @@ -112,7 +115,7 @@ namespace cv const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf & buf) { result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F); - if (useNaive(CV_TM_SQDIFF, image.depth(), templ.size())) + if (useNaive(TM_SQDIFF, image.depth(), templ.size())) { matchTemplateNaive_SQDIFF(image, templ, result, image.oclchannels()); return; @@ -132,20 +135,20 @@ namespace cv //port CUDA's matchTemplatePrepared_SQDIFF_8U Context *clCxt = image.clCxt; - string kernelName = "matchTemplate_Prepared_SQDIFF"; - vector< pair > args; - - args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data)); - args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data)); - args.push_back( make_pair( sizeof(cl_ulong), (void *)&templ_sqsum)); - args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows)); - args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols)); - args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows)); - args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols)); - args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset)); - args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step)); - args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset)); - args.push_back( make_pair( sizeof(cl_int), (void *)&result.step)); + String kernelName = "matchTemplate_Prepared_SQDIFF"; + std::vector< std::pair > args; + + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&result.data)); + args.push_back( std::make_pair( sizeof(cl_ulong), (void *)&templ_sqsum)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.rows)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.cols)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.rows)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.cols)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.offset)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.step)); size_t globalThreads[3] = {result.cols, result.rows, 1}; size_t localThreads[3] = {16, 16, 1}; @@ -166,20 +169,20 @@ namespace cv unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0]; Context *clCxt = image.clCxt; - string kernelName = "matchTemplate_Prepared_SQDIFF_NORMED"; - vector< pair > args; - - args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data)); - args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data)); - args.push_back( make_pair( sizeof(cl_ulong), (void *)&templ_sqsum)); - args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows)); - args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols)); - args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows)); - args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols)); - args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset)); - args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step)); - args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset)); - args.push_back( make_pair( sizeof(cl_int), (void *)&result.step)); + String kernelName = "matchTemplate_Prepared_SQDIFF_NORMED"; + std::vector< std::pair > args; + + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&result.data)); + args.push_back( std::make_pair( sizeof(cl_ulong), (void *)&templ_sqsum)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.rows)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.cols)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.rows)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.cols)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.offset)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.step)); size_t globalThreads[3] = {result.cols, result.rows, 1}; size_t localThreads[3] = {16, 16, 1}; @@ -196,25 +199,25 @@ namespace cv CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1); Context *clCxt = image.clCxt; - string kernelName = "matchTemplate_Naive_SQDIFF"; - - vector< pair > args; - - args.push_back( make_pair( sizeof(cl_mem), (void *)&image.data)); - args.push_back( make_pair( sizeof(cl_mem), (void *)&templ.data)); - args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data)); - args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows)); - args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols)); - args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows)); - args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols)); - args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows)); - args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols)); - args.push_back( make_pair( sizeof(cl_int), (void *)&image.offset)); - args.push_back( make_pair( sizeof(cl_int), (void *)&templ.offset)); - args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset)); - args.push_back( make_pair( sizeof(cl_int), (void *)&image.step)); - args.push_back( make_pair( sizeof(cl_int), (void *)&templ.step)); - args.push_back( make_pair( sizeof(cl_int), (void *)&result.step)); + String kernelName = "matchTemplate_Naive_SQDIFF"; + + std::vector< std::pair > args; + + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&image.data)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&templ.data)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&result.data)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.rows)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.cols)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.rows)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.cols)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.rows)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.cols)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.offset)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.offset)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.offset)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.step)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.step)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.step)); size_t globalThreads[3] = {result.cols, result.rows, 1}; size_t localThreads[3] = {16, 16, 1}; @@ -224,16 +227,25 @@ namespace cv ////////////////////////////////////////////////////////////////////// // CCORR void convolve_32F( - const oclMat &, const oclMat &, oclMat &, MatchTemplateBuf &) + const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf) { - CV_Error(-1, "convolve is not fully implemented yet"); + ConvolveBuf convolve_buf; + convolve_buf.user_block_size = buf.user_block_size; + if (image.oclchannels() == 1) + convolve(image, templ, result, true, convolve_buf); + else + { + oclMat result_; + convolve(image.reshape(1), templ.reshape(1), result_, true, convolve_buf); + extractFirstChannel_32F(result_, result); + } } void matchTemplate_CCORR( const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf) { result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F); - if (useNaive(CV_TM_CCORR, image.depth(), templ.size())) + if (useNaive(TM_CCORR, image.depth(), templ.size())) { matchTemplateNaive_CCORR(image, templ, result, image.oclchannels()); return; @@ -256,29 +268,32 @@ namespace cv void matchTemplate_CCORR_NORMED( const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf) { + cv::ocl::oclMat temp; matchTemplate_CCORR(image, templ, result, buf); buf.image_sums.resize(1); buf.image_sqsums.resize(1); - - integral(image.reshape(1), buf.image_sums[0], buf.image_sqsums[0]); - + integral(image.reshape(1), buf.image_sums[0], temp); + if(temp.depth() == CV_64F) + temp.convertTo(buf.image_sqsums[0], CV_32FC1); + else + buf.image_sqsums[0] = temp; unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0]; Context *clCxt = image.clCxt; - string kernelName = "normalizeKernel"; - vector< pair > args; - - args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data)); - args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data)); - args.push_back( make_pair( sizeof(cl_ulong), (void *)&templ_sqsum)); - args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows)); - args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols)); - args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows)); - args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols)); - args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset)); - args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step)); - args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset)); - args.push_back( make_pair( sizeof(cl_int), (void *)&result.step)); + String kernelName = "normalizeKernel"; + std::vector< std::pair > args; + + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&result.data)); + args.push_back( std::make_pair( sizeof(cl_ulong), (void *)&templ_sqsum)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.rows)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.cols)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.rows)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.cols)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.offset)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.step)); size_t globalThreads[3] = {result.cols, result.rows, 1}; size_t localThreads[3] = {16, 16, 1}; @@ -295,25 +310,25 @@ namespace cv CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1); Context *clCxt = image.clCxt; - string kernelName = "matchTemplate_Naive_CCORR"; - - vector< pair > args; - - args.push_back( make_pair( sizeof(cl_mem), (void *)&image.data)); - args.push_back( make_pair( sizeof(cl_mem), (void *)&templ.data)); - args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data)); - args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows)); - args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols)); - args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows)); - args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols)); - args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows)); - args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols)); - args.push_back( make_pair( sizeof(cl_int), (void *)&image.offset)); - args.push_back( make_pair( sizeof(cl_int), (void *)&templ.offset)); - args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset)); - args.push_back( make_pair( sizeof(cl_int), (void *)&image.step)); - args.push_back( make_pair( sizeof(cl_int), (void *)&templ.step)); - args.push_back( make_pair( sizeof(cl_int), (void *)&result.step)); + String kernelName = "matchTemplate_Naive_CCORR"; + + std::vector< std::pair > args; + + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&image.data)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&templ.data)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&result.data)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.rows)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.cols)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.rows)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.cols)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.rows)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.cols)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.offset)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.offset)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.offset)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.step)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.step)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.step)); size_t globalThreads[3] = {result.cols, result.rows, 1}; size_t localThreads[3] = {16, 16, 1}; @@ -329,22 +344,22 @@ namespace cv matchTemplate_CCORR(image, templ, result, buf); Context *clCxt = image.clCxt; - string kernelName; + String kernelName; kernelName = "matchTemplate_Prepared_CCOFF"; size_t globalThreads[3] = {result.cols, result.rows, 1}; size_t localThreads[3] = {16, 16, 1}; - vector< pair > args; - args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data) ); - args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows) ); - args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols) ); - args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows) ); - args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols) ); - args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows) ); - args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols) ); - args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset)); - args.push_back( make_pair( sizeof(cl_int), (void *)&result.step)); + std::vector< std::pair > args; + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&result.data) ); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.rows) ); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.cols) ); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.rows) ); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.cols) ); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.rows) ); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.cols) ); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.offset)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.step)); Vec4f templ_sum = Vec4f::all(0); // to be continued in the following section if(image.oclchannels() == 1) @@ -353,10 +368,10 @@ namespace cv integral(image, buf.image_sums[0]); templ_sum[0] = (float)sum(templ)[0] / templ.size().area(); - args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) ); - args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) ); - args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) ); - args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[0]) ); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) ); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) ); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) ); + args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[0]) ); } else { @@ -373,19 +388,19 @@ namespace cv switch(image.oclchannels()) { case 4: - args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) ); - args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[1].data) ); - args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[2].data) ); - args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[3].data) ); - args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) ); - args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) ); - args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[0]) ); - args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[1]) ); - args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[2]) ); - args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[3]) ); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) ); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sums[1].data) ); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sums[2].data) ); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sums[3].data) ); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) ); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) ); + args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[0]) ); + args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[1]) ); + args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[2]) ); + args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[3]) ); break; default: - CV_Error(CV_StsBadArg, "matchTemplate: unsupported number of channels"); + CV_Error(Error::StsBadArg, "matchTemplate: unsupported number of channels"); break; } } @@ -402,23 +417,23 @@ namespace cv float scale = 1.f / templ.size().area(); Context *clCxt = image.clCxt; - string kernelName; + String kernelName; kernelName = "matchTemplate_Prepared_CCOFF_NORMED"; size_t globalThreads[3] = {result.cols, result.rows, 1}; size_t localThreads[3] = {16, 16, 1}; - vector< pair > args; - args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data) ); - args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows) ); - args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols) ); - args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows) ); - args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols) ); - args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows) ); - args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols) ); - args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset)); - args.push_back( make_pair( sizeof(cl_int), (void *)&result.step)); - args.push_back( make_pair( sizeof(cl_float), (void *)&scale) ); + std::vector< std::pair > args; + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&result.data) ); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.rows) ); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.cols) ); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.rows) ); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.cols) ); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.rows) ); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.cols) ); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.offset)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.step)); + args.push_back( std::make_pair( sizeof(cl_float), (void *)&scale) ); Vec4f templ_sum = Vec4f::all(0); Vec4f templ_sqsum = Vec4f::all(0); @@ -427,7 +442,12 @@ namespace cv { buf.image_sums.resize(1); buf.image_sqsums.resize(1); - integral(image, buf.image_sums[0], buf.image_sqsums[0]); + cv::ocl::oclMat temp; + integral(image, buf.image_sums[0], temp); + if(temp.depth() == CV_64F) + temp.convertTo(buf.image_sqsums[0], CV_32FC1); + else + buf.image_sqsums[0] = temp; templ_sum[0] = (float)sum(templ)[0]; @@ -436,14 +456,14 @@ namespace cv templ_sqsum[0] -= scale * templ_sum[0] * templ_sum[0]; templ_sum[0] *= scale; - args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) ); - args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) ); - args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) ); - args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data) ); - args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset) ); - args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step) ); - args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[0]) ); - args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sqsum[0]) ); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) ); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) ); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) ); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data) ); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset) ); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step) ); + args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[0]) ); + args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sqsum[0]) ); } else { @@ -463,35 +483,39 @@ namespace cv templ_sum *= scale; buf.image_sums.resize(buf.images.size()); buf.image_sqsums.resize(buf.images.size()); - + cv::ocl::oclMat temp; for(int i = 0; i < image.oclchannels(); i ++) { - integral(buf.images[i], buf.image_sums[i], buf.image_sqsums[i]); + integral(buf.images[i], buf.image_sums[i], temp); + if(temp.depth() == CV_64F) + temp.convertTo(buf.image_sqsums[i], CV_32FC1); + else + buf.image_sqsums[i] = temp; } switch(image.oclchannels()) { case 4: - args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) ); - args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[1].data) ); - args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[2].data) ); - args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[3].data) ); - args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) ); - args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) ); - args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data) ); - args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[1].data) ); - args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[2].data) ); - args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[3].data) ); - args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset) ); - args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step) ); - args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[0]) ); - args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[1]) ); - args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[2]) ); - args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[3]) ); - args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sqsum_sum) ); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) ); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sums[1].data) ); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sums[2].data) ); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sums[3].data) ); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) ); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) ); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data) ); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[1].data) ); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[2].data) ); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[3].data) ); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset) ); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step) ); + args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[0]) ); + args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[1]) ); + args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[2]) ); + args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[3]) ); + args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sqsum_sum) ); break; default: - CV_Error(CV_StsBadArg, "matchTemplate: unsupported number of channels"); + CV_Error(Error::StsBadArg, "matchTemplate: unsupported number of channels"); break; } } @@ -500,21 +524,21 @@ namespace cv void extractFirstChannel_32F(const oclMat &image, oclMat &result) { Context *clCxt = image.clCxt; - string kernelName; + String kernelName; kernelName = "extractFirstChannel"; size_t globalThreads[3] = {result.cols, result.rows, 1}; size_t localThreads[3] = {16, 16, 1}; - vector< pair > args; - args.push_back( make_pair( sizeof(cl_mem), (void *)&image.data) ); - args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data) ); - args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows) ); - args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols) ); - args.push_back( make_pair( sizeof(cl_int), (void *)&image.offset)); - args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset)); - args.push_back( make_pair( sizeof(cl_int), (void *)&image.step)); - args.push_back( make_pair( sizeof(cl_int), (void *)&result.step)); + std::vector< std::pair > args; + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&image.data) ); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&result.data) ); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.rows) ); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.cols) ); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.offset)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.offset)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.step)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.step)); openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, -1, -1); }