//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
-// and/or other oclMaterials provided with the distribution.
+// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// Evaluates optimal template's area threshold. If
// template's area is less than the threshold, we use naive match
// template version, otherwise FFT-based (if available)
- static bool useNaive(int , int , Size )
+ static bool useNaive(int method, int depth, Size size)
{
- // FIXME!
- // always use naive until convolve is imported
+#ifdef HAVE_CLAMDFFT
+ if (method == TM_SQDIFF && (depth == CV_32F || !Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE)))
+ {
+ return true;
+ }
+ else if(method == TM_CCORR || (method == TM_SQDIFF && depth == CV_8U))
+ {
+ return size.height < 18 && size.width < 18;
+ }
+ else
+ return false;
+#else
+#define UNUSED(x) (void)(x);
+ UNUSED(method) UNUSED(depth) UNUSED(size)
+#undef UNUSED
return true;
+#endif
}
//////////////////////////////////////////////////////////////////////
const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf & buf)
{
result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
- if (useNaive(CV_TM_SQDIFF, image.depth(), templ.size()))
+ if (useNaive(TM_SQDIFF, image.depth(), templ.size()))
{
matchTemplateNaive_SQDIFF(image, templ, result, image.oclchannels());
return;
//port CUDA's matchTemplatePrepared_SQDIFF_8U
Context *clCxt = image.clCxt;
- string kernelName = "matchTemplate_Prepared_SQDIFF";
- vector< pair<size_t, const void *> > args;
-
- args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
- args.push_back( make_pair( sizeof(cl_ulong), (void *)&templ_sqsum));
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
- args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
- args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
- args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset));
- args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step));
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
+ String kernelName = "matchTemplate_Prepared_SQDIFF";
+ std::vector< std::pair<size_t, const void *> > args;
+
+ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data));
+ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&result.data));
+ args.push_back( std::make_pair( sizeof(cl_ulong), (void *)&templ_sqsum));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.rows));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.cols));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.rows));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.cols));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.offset));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.step));
size_t globalThreads[3] = {result.cols, result.rows, 1};
size_t localThreads[3] = {16, 16, 1};
unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
Context *clCxt = image.clCxt;
- string kernelName = "matchTemplate_Prepared_SQDIFF_NORMED";
- vector< pair<size_t, const void *> > args;
-
- args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
- args.push_back( make_pair( sizeof(cl_ulong), (void *)&templ_sqsum));
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
- args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
- args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
- args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset));
- args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step));
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
+ String kernelName = "matchTemplate_Prepared_SQDIFF_NORMED";
+ std::vector< std::pair<size_t, const void *> > args;
+
+ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data));
+ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&result.data));
+ args.push_back( std::make_pair( sizeof(cl_ulong), (void *)&templ_sqsum));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.rows));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.cols));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.rows));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.cols));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.offset));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.step));
size_t globalThreads[3] = {result.cols, result.rows, 1};
size_t localThreads[3] = {16, 16, 1};
CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1);
Context *clCxt = image.clCxt;
- string kernelName = "matchTemplate_Naive_SQDIFF";
-
- vector< pair<size_t, const void *> > args;
-
- args.push_back( make_pair( sizeof(cl_mem), (void *)&image.data));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&templ.data));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
- args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows));
- args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols));
- args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
- args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
- args.push_back( make_pair( sizeof(cl_int), (void *)&image.offset));
- args.push_back( make_pair( sizeof(cl_int), (void *)&templ.offset));
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
- args.push_back( make_pair( sizeof(cl_int), (void *)&image.step));
- args.push_back( make_pair( sizeof(cl_int), (void *)&templ.step));
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
+ String kernelName = "matchTemplate_Naive_SQDIFF";
+
+ std::vector< std::pair<size_t, const void *> > args;
+
+ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&image.data));
+ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&templ.data));
+ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&result.data));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.rows));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.cols));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.rows));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.cols));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.rows));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.cols));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.offset));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.offset));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.offset));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.step));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.step));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.step));
size_t globalThreads[3] = {result.cols, result.rows, 1};
size_t localThreads[3] = {16, 16, 1};
//////////////////////////////////////////////////////////////////////
// CCORR
void convolve_32F(
- const oclMat &, const oclMat &, oclMat &, MatchTemplateBuf &)
+ const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf)
{
- CV_Error(-1, "convolve is not fully implemented yet");
+ ConvolveBuf convolve_buf;
+ convolve_buf.user_block_size = buf.user_block_size;
+ if (image.oclchannels() == 1)
+ convolve(image, templ, result, true, convolve_buf);
+ else
+ {
+ oclMat result_;
+ convolve(image.reshape(1), templ.reshape(1), result_, true, convolve_buf);
+ extractFirstChannel_32F(result_, result);
+ }
}
void matchTemplate_CCORR(
const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf)
{
result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
- if (useNaive(CV_TM_CCORR, image.depth(), templ.size()))
+ if (useNaive(TM_CCORR, image.depth(), templ.size()))
{
matchTemplateNaive_CCORR(image, templ, result, image.oclchannels());
return;
void matchTemplate_CCORR_NORMED(
const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf)
{
+ cv::ocl::oclMat temp;
matchTemplate_CCORR(image, templ, result, buf);
buf.image_sums.resize(1);
buf.image_sqsums.resize(1);
-
- integral(image.reshape(1), buf.image_sums[0], buf.image_sqsums[0]);
-
+ integral(image.reshape(1), buf.image_sums[0], temp);
+ if(temp.depth() == CV_64F)
+ temp.convertTo(buf.image_sqsums[0], CV_32FC1);
+ else
+ buf.image_sqsums[0] = temp;
unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
Context *clCxt = image.clCxt;
- string kernelName = "normalizeKernel";
- vector< pair<size_t, const void *> > args;
-
- args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
- args.push_back( make_pair( sizeof(cl_ulong), (void *)&templ_sqsum));
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
- args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
- args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
- args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset));
- args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step));
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
+ String kernelName = "normalizeKernel";
+ std::vector< std::pair<size_t, const void *> > args;
+
+ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data));
+ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&result.data));
+ args.push_back( std::make_pair( sizeof(cl_ulong), (void *)&templ_sqsum));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.rows));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.cols));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.rows));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.cols));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.offset));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.step));
size_t globalThreads[3] = {result.cols, result.rows, 1};
size_t localThreads[3] = {16, 16, 1};
CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1);
Context *clCxt = image.clCxt;
- string kernelName = "matchTemplate_Naive_CCORR";
-
- vector< pair<size_t, const void *> > args;
-
- args.push_back( make_pair( sizeof(cl_mem), (void *)&image.data));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&templ.data));
- args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
- args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows));
- args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols));
- args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
- args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
- args.push_back( make_pair( sizeof(cl_int), (void *)&image.offset));
- args.push_back( make_pair( sizeof(cl_int), (void *)&templ.offset));
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
- args.push_back( make_pair( sizeof(cl_int), (void *)&image.step));
- args.push_back( make_pair( sizeof(cl_int), (void *)&templ.step));
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
+ String kernelName = "matchTemplate_Naive_CCORR";
+
+ std::vector< std::pair<size_t, const void *> > args;
+
+ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&image.data));
+ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&templ.data));
+ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&result.data));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.rows));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.cols));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.rows));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.cols));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.rows));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.cols));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.offset));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.offset));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.offset));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.step));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.step));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.step));
size_t globalThreads[3] = {result.cols, result.rows, 1};
size_t localThreads[3] = {16, 16, 1};
matchTemplate_CCORR(image, templ, result, buf);
Context *clCxt = image.clCxt;
- string kernelName;
+ String kernelName;
kernelName = "matchTemplate_Prepared_CCOFF";
size_t globalThreads[3] = {result.cols, result.rows, 1};
size_t localThreads[3] = {16, 16, 1};
- vector< pair<size_t, const void *> > args;
- args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
+ std::vector< std::pair<size_t, const void *> > args;
+ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&result.data) );
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.rows) );
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.cols) );
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.rows) );
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.cols) );
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.rows) );
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.cols) );
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.offset));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.step));
Vec4f templ_sum = Vec4f::all(0);
// to be continued in the following section
if(image.oclchannels() == 1)
integral(image, buf.image_sums[0]);
templ_sum[0] = (float)sum(templ)[0] / templ.size().area();
- args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
- args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[0]) );
+ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
+ args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[0]) );
}
else
{
switch(image.oclchannels())
{
case 4:
- args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
- args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[1].data) );
- args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[2].data) );
- args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[3].data) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
- args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[0]) );
- args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[1]) );
- args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[2]) );
- args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[3]) );
+ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
+ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sums[1].data) );
+ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sums[2].data) );
+ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sums[3].data) );
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
+ args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[0]) );
+ args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[1]) );
+ args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[2]) );
+ args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[3]) );
break;
default:
- CV_Error(CV_StsBadArg, "matchTemplate: unsupported number of channels");
+ CV_Error(Error::StsBadArg, "matchTemplate: unsupported number of channels");
break;
}
}
float scale = 1.f / templ.size().area();
Context *clCxt = image.clCxt;
- string kernelName;
+ String kernelName;
kernelName = "matchTemplate_Prepared_CCOFF_NORMED";
size_t globalThreads[3] = {result.cols, result.rows, 1};
size_t localThreads[3] = {16, 16, 1};
- vector< pair<size_t, const void *> > args;
- args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
- args.push_back( make_pair( sizeof(cl_float), (void *)&scale) );
+ std::vector< std::pair<size_t, const void *> > args;
+ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&result.data) );
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.rows) );
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.cols) );
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.rows) );
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.cols) );
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.rows) );
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.cols) );
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.offset));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.step));
+ args.push_back( std::make_pair( sizeof(cl_float), (void *)&scale) );
Vec4f templ_sum = Vec4f::all(0);
Vec4f templ_sqsum = Vec4f::all(0);
{
buf.image_sums.resize(1);
buf.image_sqsums.resize(1);
- integral(image, buf.image_sums[0], buf.image_sqsums[0]);
+ cv::ocl::oclMat temp;
+ integral(image, buf.image_sums[0], temp);
+ if(temp.depth() == CV_64F)
+ temp.convertTo(buf.image_sqsums[0], CV_32FC1);
+ else
+ buf.image_sqsums[0] = temp;
templ_sum[0] = (float)sum(templ)[0];
templ_sqsum[0] -= scale * templ_sum[0] * templ_sum[0];
templ_sum[0] *= scale;
- args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
- args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step) );
- args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[0]) );
- args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sqsum[0]) );
+ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
+ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data) );
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset) );
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step) );
+ args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[0]) );
+ args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sqsum[0]) );
}
else
{
templ_sum *= scale;
buf.image_sums.resize(buf.images.size());
buf.image_sqsums.resize(buf.images.size());
-
+ cv::ocl::oclMat temp;
for(int i = 0; i < image.oclchannels(); i ++)
{
- integral(buf.images[i], buf.image_sums[i], buf.image_sqsums[i]);
+ integral(buf.images[i], buf.image_sums[i], temp);
+ if(temp.depth() == CV_64F)
+ temp.convertTo(buf.image_sqsums[i], CV_32FC1);
+ else
+ buf.image_sqsums[i] = temp;
}
switch(image.oclchannels())
{
case 4:
- args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
- args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[1].data) );
- args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[2].data) );
- args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[3].data) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
- args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data) );
- args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[1].data) );
- args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[2].data) );
- args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[3].data) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step) );
- args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[0]) );
- args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[1]) );
- args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[2]) );
- args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[3]) );
- args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sqsum_sum) );
+ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
+ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sums[1].data) );
+ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sums[2].data) );
+ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sums[3].data) );
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
+ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data) );
+ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[1].data) );
+ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[2].data) );
+ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[3].data) );
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset) );
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step) );
+ args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[0]) );
+ args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[1]) );
+ args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[2]) );
+ args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[3]) );
+ args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sqsum_sum) );
break;
default:
- CV_Error(CV_StsBadArg, "matchTemplate: unsupported number of channels");
+ CV_Error(Error::StsBadArg, "matchTemplate: unsupported number of channels");
break;
}
}
void extractFirstChannel_32F(const oclMat &image, oclMat &result)
{
Context *clCxt = image.clCxt;
- string kernelName;
+ String kernelName;
kernelName = "extractFirstChannel";
size_t globalThreads[3] = {result.cols, result.rows, 1};
size_t localThreads[3] = {16, 16, 1};
- vector< pair<size_t, const void *> > args;
- args.push_back( make_pair( sizeof(cl_mem), (void *)&image.data) );
- args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols) );
- args.push_back( make_pair( sizeof(cl_int), (void *)&image.offset));
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
- args.push_back( make_pair( sizeof(cl_int), (void *)&image.step));
- args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
+ std::vector< std::pair<size_t, const void *> > args;
+ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&image.data) );
+ args.push_back( std::make_pair( sizeof(cl_mem), (void *)&result.data) );
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.rows) );
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.cols) );
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.offset));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.offset));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.step));
+ args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.step));
openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, -1, -1);
}