X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=modules%2Focl%2Fsrc%2Fmatch_template.cpp;h=c95ad84215757dceabdf2737447bec34efa67845;hb=aacf188e837324b5e35dac9d2accaa332a83d346;hp=7c0a7ac5dbdbebcc52db91a7f0e2f4bc7f5e9ad9;hpb=43aec5ad4ad4460ad588d674263920df8c1ecb2e;p=profile%2Fivi%2Fopencv.git

diff --git a/modules/ocl/src/match_template.cpp b/modules/ocl/src/match_template.cpp
index 7c0a7ac..c95ad84 100644
--- a/modules/ocl/src/match_template.cpp
+++ b/modules/ocl/src/match_template.cpp
@@ -25,7 +25,7 @@
 //
 //   * Redistribution's in binary form must reproduce the above copyright notice,
 //     this list of conditions and the following disclaimer in the documentation
-//     and/or other oclMaterials provided with the distribution.
+//     and/or other materials provided with the distribution.
 //
 //   * The name of the copyright holders may not be used to endorse or promote products
 //     derived from this software without specific prior written permission.
@@ -44,22 +44,11 @@
 //M*/
 
 
-#include <iomanip>
 #include "precomp.hpp"
+#include "opencl_kernels.hpp"
 
 using namespace cv;
 using namespace cv::ocl;
-using namespace std;
-
-//helper routines
-namespace cv
-{
-    namespace ocl
-    {
-        ///////////////////////////OpenCL kernel strings///////////////////////////
-        extern const char *match_template;
-    }
-}
 
 namespace cv
 {
@@ -99,11 +88,25 @@ namespace cv
         // Evaluates optimal template's area threshold. If
         // template's area is less  than the threshold, we use naive match
         // template version, otherwise FFT-based (if available)
-        static bool useNaive(int , int , Size )
+        static bool useNaive(int method, int depth, Size size)
         {
-            // FIXME!
-            //   always use naive until convolve is imported
+#ifdef HAVE_CLAMDFFT
+            if (method == TM_SQDIFF && (depth == CV_32F || !Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE)))
+            {
+                return true;
+            }
+            else if(method == TM_CCORR || (method == TM_SQDIFF && depth == CV_8U))
+            {
+                return size.height < 18 && size.width < 18;
+            }
+            else
+                return false;
+#else
+#define UNUSED(x) (void)(x);
+            UNUSED(method) UNUSED(depth) UNUSED(size)
+#undef  UNUSED
             return true;
+#endif
         }
 
         //////////////////////////////////////////////////////////////////////
@@ -112,7 +115,7 @@ namespace cv
             const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf & buf)
         {
             result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
-            if (useNaive(CV_TM_SQDIFF, image.depth(), templ.size()))
+            if (useNaive(TM_SQDIFF, image.depth(), templ.size()))
             {
                 matchTemplateNaive_SQDIFF(image, templ, result, image.oclchannels());
                 return;
@@ -132,20 +135,20 @@ namespace cv
 
                 //port CUDA's matchTemplatePrepared_SQDIFF_8U
                 Context *clCxt = image.clCxt;
-                string kernelName = "matchTemplate_Prepared_SQDIFF";
-                vector< pair<size_t, const void *> > args;
-
-                args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data));
-                args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
-                args.push_back( make_pair( sizeof(cl_ulong), (void *)&templ_sqsum));
-                args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
-                args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
-                args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
-                args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
-                args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset));
-                args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step));
-                args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
-                args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
+                String kernelName = "matchTemplate_Prepared_SQDIFF";
+                std::vector< std::pair<size_t, const void *> > args;
+
+                args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data));
+                args.push_back( std::make_pair( sizeof(cl_mem), (void *)&result.data));
+                args.push_back( std::make_pair( sizeof(cl_ulong), (void *)&templ_sqsum));
+                args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.rows));
+                args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.cols));
+                args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.rows));
+                args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.cols));
+                args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset));
+                args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step));
+                args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.offset));
+                args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.step));
 
                 size_t globalThreads[3] = {result.cols, result.rows, 1};
                 size_t localThreads[3]  = {16, 16, 1};
@@ -166,20 +169,20 @@ namespace cv
             unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
 
             Context *clCxt = image.clCxt;
-            string kernelName = "matchTemplate_Prepared_SQDIFF_NORMED";
-            vector< pair<size_t, const void *> > args;
-
-            args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data));
-            args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
-            args.push_back( make_pair( sizeof(cl_ulong), (void *)&templ_sqsum));
-            args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
-            args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
-            args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
-            args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
-            args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset));
-            args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step));
-            args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
-            args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
+            String kernelName = "matchTemplate_Prepared_SQDIFF_NORMED";
+            std::vector< std::pair<size_t, const void *> > args;
+
+            args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data));
+            args.push_back( std::make_pair( sizeof(cl_mem), (void *)&result.data));
+            args.push_back( std::make_pair( sizeof(cl_ulong), (void *)&templ_sqsum));
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.rows));
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.cols));
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.rows));
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.cols));
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset));
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step));
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.offset));
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.step));
 
             size_t globalThreads[3] = {result.cols, result.rows, 1};
             size_t localThreads[3]  = {16, 16, 1};
@@ -196,25 +199,25 @@ namespace cv
             CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1);
 
             Context *clCxt = image.clCxt;
-            string kernelName = "matchTemplate_Naive_SQDIFF";
-
-            vector< pair<size_t, const void *> > args;
-
-            args.push_back( make_pair( sizeof(cl_mem), (void *)&image.data));
-            args.push_back( make_pair( sizeof(cl_mem), (void *)&templ.data));
-            args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
-            args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows));
-            args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols));
-            args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
-            args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
-            args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
-            args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
-            args.push_back( make_pair( sizeof(cl_int), (void *)&image.offset));
-            args.push_back( make_pair( sizeof(cl_int), (void *)&templ.offset));
-            args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
-            args.push_back( make_pair( sizeof(cl_int), (void *)&image.step));
-            args.push_back( make_pair( sizeof(cl_int), (void *)&templ.step));
-            args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
+            String kernelName = "matchTemplate_Naive_SQDIFF";
+
+            std::vector< std::pair<size_t, const void *> > args;
+
+            args.push_back( std::make_pair( sizeof(cl_mem), (void *)&image.data));
+            args.push_back( std::make_pair( sizeof(cl_mem), (void *)&templ.data));
+            args.push_back( std::make_pair( sizeof(cl_mem), (void *)&result.data));
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.rows));
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.cols));
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.rows));
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.cols));
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.rows));
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.cols));
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.offset));
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.offset));
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.offset));
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.step));
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.step));
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.step));
 
             size_t globalThreads[3] = {result.cols, result.rows, 1};
             size_t localThreads[3]  = {16, 16, 1};
@@ -224,16 +227,25 @@ namespace cv
         //////////////////////////////////////////////////////////////////////
         // CCORR
         void convolve_32F(
-            const oclMat &, const oclMat &, oclMat &, MatchTemplateBuf &)
+            const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf)
         {
-            CV_Error(-1, "convolve is not fully implemented yet");
+            ConvolveBuf convolve_buf;
+            convolve_buf.user_block_size = buf.user_block_size;
+            if (image.oclchannels() == 1)
+                convolve(image, templ, result, true, convolve_buf);
+            else
+            {
+                oclMat result_;
+                convolve(image.reshape(1), templ.reshape(1), result_, true, convolve_buf);
+                extractFirstChannel_32F(result_, result);
+            }
         }
 
         void matchTemplate_CCORR(
             const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf)
         {
             result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
-            if (useNaive(CV_TM_CCORR, image.depth(), templ.size()))
+            if (useNaive(TM_CCORR, image.depth(), templ.size()))
             {
                 matchTemplateNaive_CCORR(image, templ, result, image.oclchannels());
                 return;
@@ -256,29 +268,32 @@ namespace cv
         void matchTemplate_CCORR_NORMED(
             const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf)
         {
+            cv::ocl::oclMat temp;
             matchTemplate_CCORR(image, templ, result, buf);
             buf.image_sums.resize(1);
             buf.image_sqsums.resize(1);
-
-            integral(image.reshape(1), buf.image_sums[0], buf.image_sqsums[0]);
-
+            integral(image.reshape(1), buf.image_sums[0], temp);
+            if(temp.depth() == CV_64F)
+                temp.convertTo(buf.image_sqsums[0], CV_32FC1);
+            else
+                buf.image_sqsums[0] = temp;
             unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
 
             Context *clCxt = image.clCxt;
-            string kernelName = "normalizeKernel";
-            vector< pair<size_t, const void *> > args;
-
-            args.push_back( make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data));
-            args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
-            args.push_back( make_pair( sizeof(cl_ulong), (void *)&templ_sqsum));
-            args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
-            args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
-            args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
-            args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
-            args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset));
-            args.push_back( make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step));
-            args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
-            args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
+            String kernelName = "normalizeKernel";
+            std::vector< std::pair<size_t, const void *> > args;
+
+            args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data));
+            args.push_back( std::make_pair( sizeof(cl_mem), (void *)&result.data));
+            args.push_back( std::make_pair( sizeof(cl_ulong), (void *)&templ_sqsum));
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.rows));
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.cols));
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.rows));
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.cols));
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset));
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step));
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.offset));
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.step));
 
             size_t globalThreads[3] = {result.cols, result.rows, 1};
             size_t localThreads[3]  = {16, 16, 1};
@@ -295,25 +310,25 @@ namespace cv
             CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1);
 
             Context *clCxt = image.clCxt;
-            string kernelName = "matchTemplate_Naive_CCORR";
-
-            vector< pair<size_t, const void *> > args;
-
-            args.push_back( make_pair( sizeof(cl_mem), (void *)&image.data));
-            args.push_back( make_pair( sizeof(cl_mem), (void *)&templ.data));
-            args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data));
-            args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows));
-            args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols));
-            args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows));
-            args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols));
-            args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows));
-            args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols));
-            args.push_back( make_pair( sizeof(cl_int), (void *)&image.offset));
-            args.push_back( make_pair( sizeof(cl_int), (void *)&templ.offset));
-            args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
-            args.push_back( make_pair( sizeof(cl_int), (void *)&image.step));
-            args.push_back( make_pair( sizeof(cl_int), (void *)&templ.step));
-            args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
+            String kernelName = "matchTemplate_Naive_CCORR";
+
+            std::vector< std::pair<size_t, const void *> > args;
+
+            args.push_back( std::make_pair( sizeof(cl_mem), (void *)&image.data));
+            args.push_back( std::make_pair( sizeof(cl_mem), (void *)&templ.data));
+            args.push_back( std::make_pair( sizeof(cl_mem), (void *)&result.data));
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.rows));
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.cols));
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.rows));
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.cols));
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.rows));
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.cols));
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.offset));
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.offset));
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.offset));
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.step));
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.step));
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.step));
 
             size_t globalThreads[3] = {result.cols, result.rows, 1};
             size_t localThreads[3]  = {16, 16, 1};
@@ -329,22 +344,22 @@ namespace cv
             matchTemplate_CCORR(image, templ, result, buf);
 
             Context *clCxt = image.clCxt;
-            string kernelName;
+            String kernelName;
 
             kernelName = "matchTemplate_Prepared_CCOFF";
             size_t globalThreads[3] = {result.cols, result.rows, 1};
             size_t localThreads[3]  = {16, 16, 1};
 
-            vector< pair<size_t, const void *> > args;
-            args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data) );
-            args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows) );
-            args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols) );
-            args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows) );
-            args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols) );
-            args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows) );
-            args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols) );
-            args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
-            args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
+            std::vector< std::pair<size_t, const void *> > args;
+            args.push_back( std::make_pair( sizeof(cl_mem), (void *)&result.data) );
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.rows) );
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.cols) );
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.rows) );
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.cols) );
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.rows) );
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.cols) );
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.offset));
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.step));
             Vec4f templ_sum = Vec4f::all(0);
             // to be continued in the following section
             if(image.oclchannels() == 1)
@@ -353,10 +368,10 @@ namespace cv
                 integral(image, buf.image_sums[0]);
 
                 templ_sum[0] = (float)sum(templ)[0] / templ.size().area();
-                args.push_back( make_pair( sizeof(cl_mem),  (void *)&buf.image_sums[0].data) );
-                args.push_back( make_pair( sizeof(cl_int),  (void *)&buf.image_sums[0].offset) );
-                args.push_back( make_pair( sizeof(cl_int),  (void *)&buf.image_sums[0].step) );
-                args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[0]) );
+                args.push_back( std::make_pair( sizeof(cl_mem),  (void *)&buf.image_sums[0].data) );
+                args.push_back( std::make_pair( sizeof(cl_int),  (void *)&buf.image_sums[0].offset) );
+                args.push_back( std::make_pair( sizeof(cl_int),  (void *)&buf.image_sums[0].step) );
+                args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[0]) );
             }
             else
             {
@@ -373,19 +388,19 @@ namespace cv
                 switch(image.oclchannels())
                 {
                 case 4:
-                    args.push_back( make_pair( sizeof(cl_mem),  (void *)&buf.image_sums[0].data) );
-                    args.push_back( make_pair( sizeof(cl_mem),  (void *)&buf.image_sums[1].data) );
-                    args.push_back( make_pair( sizeof(cl_mem),  (void *)&buf.image_sums[2].data) );
-                    args.push_back( make_pair( sizeof(cl_mem),  (void *)&buf.image_sums[3].data) );
-                    args.push_back( make_pair( sizeof(cl_int),  (void *)&buf.image_sums[0].offset) );
-                    args.push_back( make_pair( sizeof(cl_int),  (void *)&buf.image_sums[0].step) );
-                    args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[0]) );
-                    args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[1]) );
-                    args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[2]) );
-                    args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[3]) );
+                    args.push_back( std::make_pair( sizeof(cl_mem),  (void *)&buf.image_sums[0].data) );
+                    args.push_back( std::make_pair( sizeof(cl_mem),  (void *)&buf.image_sums[1].data) );
+                    args.push_back( std::make_pair( sizeof(cl_mem),  (void *)&buf.image_sums[2].data) );
+                    args.push_back( std::make_pair( sizeof(cl_mem),  (void *)&buf.image_sums[3].data) );
+                    args.push_back( std::make_pair( sizeof(cl_int),  (void *)&buf.image_sums[0].offset) );
+                    args.push_back( std::make_pair( sizeof(cl_int),  (void *)&buf.image_sums[0].step) );
+                    args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[0]) );
+                    args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[1]) );
+                    args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[2]) );
+                    args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[3]) );
                     break;
                 default:
-                    CV_Error(CV_StsBadArg, "matchTemplate: unsupported number of channels");
+                    CV_Error(Error::StsBadArg, "matchTemplate: unsupported number of channels");
                     break;
                 }
             }
@@ -402,23 +417,23 @@ namespace cv
             float scale = 1.f / templ.size().area();
 
             Context *clCxt = image.clCxt;
-            string kernelName;
+            String kernelName;
 
             kernelName = "matchTemplate_Prepared_CCOFF_NORMED";
             size_t globalThreads[3] = {result.cols, result.rows, 1};
             size_t localThreads[3]  = {16, 16, 1};
 
-            vector< pair<size_t, const void *> > args;
-            args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data) );
-            args.push_back( make_pair( sizeof(cl_int), (void *)&image.rows) );
-            args.push_back( make_pair( sizeof(cl_int), (void *)&image.cols) );
-            args.push_back( make_pair( sizeof(cl_int), (void *)&templ.rows) );
-            args.push_back( make_pair( sizeof(cl_int), (void *)&templ.cols) );
-            args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows) );
-            args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols) );
-            args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
-            args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
-            args.push_back( make_pair( sizeof(cl_float), (void *)&scale) );
+            std::vector< std::pair<size_t, const void *> > args;
+            args.push_back( std::make_pair( sizeof(cl_mem), (void *)&result.data) );
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.rows) );
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.cols) );
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.rows) );
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.cols) );
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.rows) );
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.cols) );
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.offset));
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.step));
+            args.push_back( std::make_pair( sizeof(cl_float), (void *)&scale) );
 
             Vec4f templ_sum   = Vec4f::all(0);
             Vec4f templ_sqsum = Vec4f::all(0);
@@ -427,7 +442,12 @@ namespace cv
             {
                 buf.image_sums.resize(1);
                 buf.image_sqsums.resize(1);
-                integral(image, buf.image_sums[0], buf.image_sqsums[0]);
+                cv::ocl::oclMat temp;
+                integral(image, buf.image_sums[0], temp);
+                if(temp.depth() == CV_64F)
+                    temp.convertTo(buf.image_sqsums[0], CV_32FC1);
+                else
+                    buf.image_sqsums[0] = temp;
 
                 templ_sum[0]   = (float)sum(templ)[0];
 
@@ -436,14 +456,14 @@ namespace cv
                 templ_sqsum[0] -= scale * templ_sum[0] * templ_sum[0];
                 templ_sum[0]   *= scale;
 
-                args.push_back( make_pair( sizeof(cl_mem),  (void *)&buf.image_sums[0].data) );
-                args.push_back( make_pair( sizeof(cl_int),  (void *)&buf.image_sums[0].offset) );
-                args.push_back( make_pair( sizeof(cl_int),  (void *)&buf.image_sums[0].step) );
-                args.push_back( make_pair( sizeof(cl_mem),  (void *)&buf.image_sqsums[0].data) );
-                args.push_back( make_pair( sizeof(cl_int),  (void *)&buf.image_sqsums[0].offset) );
-                args.push_back( make_pair( sizeof(cl_int),  (void *)&buf.image_sqsums[0].step) );
-                args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[0]) );
-                args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sqsum[0]) );
+                args.push_back( std::make_pair( sizeof(cl_mem),  (void *)&buf.image_sums[0].data) );
+                args.push_back( std::make_pair( sizeof(cl_int),  (void *)&buf.image_sums[0].offset) );
+                args.push_back( std::make_pair( sizeof(cl_int),  (void *)&buf.image_sums[0].step) );
+                args.push_back( std::make_pair( sizeof(cl_mem),  (void *)&buf.image_sqsums[0].data) );
+                args.push_back( std::make_pair( sizeof(cl_int),  (void *)&buf.image_sqsums[0].offset) );
+                args.push_back( std::make_pair( sizeof(cl_int),  (void *)&buf.image_sqsums[0].step) );
+                args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[0]) );
+                args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sqsum[0]) );
             }
             else
             {
@@ -463,35 +483,39 @@ namespace cv
                 templ_sum   *= scale;
                 buf.image_sums.resize(buf.images.size());
                 buf.image_sqsums.resize(buf.images.size());
-
+                cv::ocl::oclMat temp;
                 for(int i = 0; i < image.oclchannels(); i ++)
                 {
-                    integral(buf.images[i], buf.image_sums[i], buf.image_sqsums[i]);
+                    integral(buf.images[i], buf.image_sums[i], temp);
+                    if(temp.depth() == CV_64F)
+                        temp.convertTo(buf.image_sqsums[i], CV_32FC1);
+                    else
+                        buf.image_sqsums[i] = temp;
                 }
 
                 switch(image.oclchannels())
                 {
                 case 4:
-                    args.push_back( make_pair( sizeof(cl_mem),  (void *)&buf.image_sums[0].data) );
-                    args.push_back( make_pair( sizeof(cl_mem),  (void *)&buf.image_sums[1].data) );
-                    args.push_back( make_pair( sizeof(cl_mem),  (void *)&buf.image_sums[2].data) );
-                    args.push_back( make_pair( sizeof(cl_mem),  (void *)&buf.image_sums[3].data) );
-                    args.push_back( make_pair( sizeof(cl_int),  (void *)&buf.image_sums[0].offset) );
-                    args.push_back( make_pair( sizeof(cl_int),  (void *)&buf.image_sums[0].step) );
-                    args.push_back( make_pair( sizeof(cl_mem),  (void *)&buf.image_sqsums[0].data) );
-                    args.push_back( make_pair( sizeof(cl_mem),  (void *)&buf.image_sqsums[1].data) );
-                    args.push_back( make_pair( sizeof(cl_mem),  (void *)&buf.image_sqsums[2].data) );
-                    args.push_back( make_pair( sizeof(cl_mem),  (void *)&buf.image_sqsums[3].data) );
-                    args.push_back( make_pair( sizeof(cl_int),  (void *)&buf.image_sqsums[0].offset) );
-                    args.push_back( make_pair( sizeof(cl_int),  (void *)&buf.image_sqsums[0].step) );
-                    args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[0]) );
-                    args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[1]) );
-                    args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[2]) );
-                    args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sum[3]) );
-                    args.push_back( make_pair( sizeof(cl_float), (void *)&templ_sqsum_sum) );
+                    args.push_back( std::make_pair( sizeof(cl_mem),  (void *)&buf.image_sums[0].data) );
+                    args.push_back( std::make_pair( sizeof(cl_mem),  (void *)&buf.image_sums[1].data) );
+                    args.push_back( std::make_pair( sizeof(cl_mem),  (void *)&buf.image_sums[2].data) );
+                    args.push_back( std::make_pair( sizeof(cl_mem),  (void *)&buf.image_sums[3].data) );
+                    args.push_back( std::make_pair( sizeof(cl_int),  (void *)&buf.image_sums[0].offset) );
+                    args.push_back( std::make_pair( sizeof(cl_int),  (void *)&buf.image_sums[0].step) );
+                    args.push_back( std::make_pair( sizeof(cl_mem),  (void *)&buf.image_sqsums[0].data) );
+                    args.push_back( std::make_pair( sizeof(cl_mem),  (void *)&buf.image_sqsums[1].data) );
+                    args.push_back( std::make_pair( sizeof(cl_mem),  (void *)&buf.image_sqsums[2].data) );
+                    args.push_back( std::make_pair( sizeof(cl_mem),  (void *)&buf.image_sqsums[3].data) );
+                    args.push_back( std::make_pair( sizeof(cl_int),  (void *)&buf.image_sqsums[0].offset) );
+                    args.push_back( std::make_pair( sizeof(cl_int),  (void *)&buf.image_sqsums[0].step) );
+                    args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[0]) );
+                    args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[1]) );
+                    args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[2]) );
+                    args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[3]) );
+                    args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sqsum_sum) );
                     break;
                 default:
-                    CV_Error(CV_StsBadArg, "matchTemplate: unsupported number of channels");
+                    CV_Error(Error::StsBadArg, "matchTemplate: unsupported number of channels");
                     break;
                 }
             }
@@ -500,21 +524,21 @@ namespace cv
         void extractFirstChannel_32F(const oclMat &image, oclMat &result)
         {
             Context *clCxt = image.clCxt;
-            string kernelName;
+            String kernelName;
 
             kernelName = "extractFirstChannel";
             size_t globalThreads[3] = {result.cols, result.rows, 1};
             size_t localThreads[3]  = {16, 16, 1};
 
-            vector< pair<size_t, const void *> > args;
-            args.push_back( make_pair( sizeof(cl_mem), (void *)&image.data) );
-            args.push_back( make_pair( sizeof(cl_mem), (void *)&result.data) );
-            args.push_back( make_pair( sizeof(cl_int), (void *)&result.rows) );
-            args.push_back( make_pair( sizeof(cl_int), (void *)&result.cols) );
-            args.push_back( make_pair( sizeof(cl_int), (void *)&image.offset));
-            args.push_back( make_pair( sizeof(cl_int), (void *)&result.offset));
-            args.push_back( make_pair( sizeof(cl_int), (void *)&image.step));
-            args.push_back( make_pair( sizeof(cl_int), (void *)&result.step));
+            std::vector< std::pair<size_t, const void *> > args;
+            args.push_back( std::make_pair( sizeof(cl_mem), (void *)&image.data) );
+            args.push_back( std::make_pair( sizeof(cl_mem), (void *)&result.data) );
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.rows) );
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.cols) );
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.offset));
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.offset));
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.step));
+            args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.step));
 
             openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, -1, -1);
         }