ocl: fix compilation warnings, update openCLExecuteKernelInterop

author Alexander Alekhin <alexander.alekhin@itseez.com>

Wed, 9 Oct 2013 12:57:52 +0000 (16:57 +0400)

committer Alexander Alekhin <alexander.alekhin@itseez.com>

Wed, 9 Oct 2013 20:15:02 +0000 (00:15 +0400)
author Alexander Alekhin <alexander.alekhin@itseez.com>
Wed, 9 Oct 2013 12:57:52 +0000 (16:57 +0400)
committer Alexander Alekhin <alexander.alekhin@itseez.com>
Wed, 9 Oct 2013 20:15:02 +0000 (00:15 +0400)
diff --git a/cmake/cl2cpp.cmake b/cmake/cl2cpp.cmake

index 825172b..1916c3e 100644 (file)
--- a/cmake/cl2cpp.cmake
+++ b/cmake/cl2cpp.cmake
@@ -6,6 +6,7 @@ get_filename_component(OUTPUT_HPP_NAME "${OUTPUT_HPP}" NAME)
  
  set(STR_CPP "// This file is auto-generated. Do not edit!
  
+#include \"precomp.hpp\"
  #include \"${OUTPUT_HPP_NAME}\"
  
  namespace cv
@@ -16,6 +17,8 @@ namespace ocl
  
  set(STR_HPP "// This file is auto-generated. Do not edit!
  
+#include \"opencv2/ocl/private/util.hpp\"
+
  namespace cv
  {
  namespace ocl
diff --git a/modules/ocl/include/opencv2/ocl/ocl.hpp b/modules/ocl/include/opencv2/ocl/ocl.hpp

index 58719f1..b90f454 100644 (file)
--- a/modules/ocl/include/opencv2/ocl/ocl.hpp
+++ b/modules/ocl/include/opencv2/ocl/ocl.hpp
@@ -221,6 +221,33 @@ namespace cv
          //! set where binary cache to be saved to
          CV_EXPORTS void setBinaryPath(const char *path);
  
+        struct ProgramSource
+        {
+            const char* name;
+            const char* programStr;
+            const char* programHash;
+
+            // Cache in memory by name (should be unique). Caching on disk disabled.
+            inline ProgramSource(const char* _name, const char* _programStr)
+                : name(_name), programStr(_programStr), programHash(NULL)
+            {
+            }
+
+            // Cache in memory by name (should be unique). Caching on disk uses programHash mark.
+            inline ProgramSource(const char* _name, const char* _programStr, const char* _programHash)
+                : name(_name), programStr(_programStr), programHash(_programHash)
+            {
+            }
+        };
+
+        //! Calls OpenCL kernel. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing.
+        //! Deprecated, will be replaced
+        CV_EXPORTS void openCLExecuteKernelInterop(Context *clCxt,
+                const cv::ocl::ProgramSource& source, string kernelName,
+                size_t globalThreads[3], size_t localThreads[3],
+                std::vector< std::pair<size_t, const void *> > &args,
+                int channels, int depth, const char *build_options);
+
          class CV_EXPORTS oclMatExpr;
          //////////////////////////////// oclMat ////////////////////////////////
          class CV_EXPORTS oclMat
diff --git a/modules/ocl/include/opencv2/ocl/private/util.hpp b/modules/ocl/include/opencv2/ocl/private/util.hpp

index 30288a6..bb1a5ee 100644 (file)
--- a/modules/ocl/include/opencv2/ocl/private/util.hpp
+++ b/modules/ocl/include/opencv2/ocl/private/util.hpp
@@ -189,24 +189,6 @@ inline size_t roundUp(size_t sz, size_t n)
      return result;
  }
  
-//! Calls a kernel, by string. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing.
-CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt,
-        const cv::ocl::ProgramEntry* source, string kernelName,
-        size_t globalThreads[3], size_t localThreads[3],
-        std::vector< std::pair<size_t, const void *> > &args,
-        int channels, int depth, const char *build_options,
-        bool finish = true, bool measureKernelTime = false,
-        bool cleanUp = true);
-
-//! Calls a kernel, by file. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing.
-CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt,
-        const cv::ocl::ProgramEntry* source, const int numFiles, string kernelName,
-        size_t globalThreads[3], size_t localThreads[3],
-        std::vector< std::pair<size_t, const void *> > &args,
-        int channels, int depth, const char *build_options,
-        bool finish = true, bool measureKernelTime = false,
-        bool cleanUp = true);
-
  }//namespace ocl
  }//namespace cv
  
diff --git a/modules/ocl/src/cl_operations.cpp b/modules/ocl/src/cl_operations.cpp

index 25d7454..ed13be5 100644 (file)
--- a/modules/ocl/src/cl_operations.cpp
+++ b/modules/ocl/src/cl_operations.cpp
@@ -302,28 +302,27 @@ void openCLExecuteKernel(Context *ctx, const cv::ocl::ProgramEntry* source, stri
      total_kernel_time = 0;
      cout << "-------------------------------------" << endl;
  
-    cout << setiosflags(ios::left) << setw(15) << "excute time";
-    cout << setiosflags(ios::left) << setw(15) << "lauch time";
+    cout << setiosflags(ios::left) << setw(15) << "execute time";
+    cout << setiosflags(ios::left) << setw(15) << "launch time";
      cout << setiosflags(ios::left) << setw(15) << "kernel time" << endl;
      int i = 0;
      for(i = 0; i < RUN_TIMES; i++)
          openCLExecuteKernel_(ctx, source, kernelName, globalThreads, localThreads, args, channels, depth,
                               build_options);
  
-    cout << "average kernel excute time: " << total_execute_time / RUN_TIMES << endl; // "ms" << endl;
+    cout << "average kernel execute time: " << total_execute_time / RUN_TIMES << endl; // "ms" << endl;
      cout << "average kernel total time:  " << total_kernel_time / RUN_TIMES << endl; // "ms" << endl;
  #endif
  }
  
-double openCLExecuteKernelInterop(Context *ctx, const cv::ocl::ProgramEntry* source, string kernelName,
+void openCLExecuteKernelInterop(Context *ctx, const cv::ocl::ProgramSource& source, string kernelName,
                           size_t globalThreads[3], size_t localThreads[3],
-                         vector< pair<size_t, const void *> > &args, int channels, int depth, const char *build_options,
-                         bool finish, bool measureKernelTime, bool cleanUp)
+                         vector< pair<size_t, const void *> > &args, int channels, int depth, const char *build_options)
  
  {
      //construct kernel name
      //The rule is functionName_Cn_Dn, C represent Channels, D Represent DataType Depth, n represent an integer number
-    //for exmaple split_C2_D2, represent the split kernel with channels =2 and dataType Depth = 2(Data type is char)
+    //for example split_C2_D2, represent the split kernel with channels = 2 and dataType Depth = 2 (Data type is char)
      stringstream idxStr;
      if(channels != -1)
          idxStr << "_C" << channels;
@@ -331,63 +330,27 @@ double openCLExecuteKernelInterop(Context *ctx, const cv::ocl::ProgramEntry* sou
          idxStr << "_D" << depth;
      kernelName += idxStr.str();
  
-    cl_kernel kernel;
-    kernel = openCLGetKernelFromSource(ctx, source, kernelName, build_options);
-
-    double kernelTime = 0.0;
+    std::string name = std::string("custom_") + source.name;
+    ProgramEntry program = { name.c_str(), source.programStr, source.programHash };
+    cl_kernel kernel = openCLGetKernelFromSource(ctx, &program, kernelName, build_options);
  
-    if( globalThreads != NULL)
+    CV_Assert(globalThreads != NULL);
+    if ( localThreads != NULL)
      {
-        if ( localThreads != NULL)
-        {
-            globalThreads[0] = divUp(globalThreads[0], localThreads[0]) * localThreads[0];
-            globalThreads[1] = divUp(globalThreads[1], localThreads[1]) * localThreads[1];
-            globalThreads[2] = divUp(globalThreads[2], localThreads[2]) * localThreads[2];
-
-            //size_t blockSize = localThreads[0] * localThreads[1] * localThreads[2];
-            cv::ocl::openCLVerifyKernel(ctx, kernel, localThreads);
-        }
-        for(size_t i = 0; i < args.size(); i ++)
-            openCLSafeCall(clSetKernelArg(kernel, i, args[i].first, args[i].second));
-
-        if(measureKernelTime == false)
-        {
-            openCLSafeCall(clEnqueueNDRangeKernel(getClCommandQueue(ctx), kernel, 3, NULL, globalThreads,
-                            localThreads, 0, NULL, NULL));
-        }
-        else
-        {
-            cl_event event = NULL;
-            openCLSafeCall(clEnqueueNDRangeKernel(getClCommandQueue(ctx), kernel, 3, NULL, globalThreads,
-                            localThreads, 0, NULL, &event));
-
-            cl_ulong end_time, queue_time;
-
-            openCLSafeCall(clWaitForEvents(1, &event));
-
-            openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END,
-                            sizeof(cl_ulong), &end_time, 0));
-
-            openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_QUEUED,
-                            sizeof(cl_ulong), &queue_time, 0));
-
-            kernelTime = (double)(end_time - queue_time) / (1000 * 1000);
-
-            clReleaseEvent(event);
-        }
-    }
+        globalThreads[0] = roundUp(globalThreads[0], localThreads[0]);
+        globalThreads[1] = roundUp(globalThreads[1], localThreads[1]);
+        globalThreads[2] = roundUp(globalThreads[2], localThreads[2]);
  
-    if(finish)
-    {
-        clFinish(getClCommandQueue(ctx));
+        cv::ocl::openCLVerifyKernel(ctx, kernel, localThreads);
      }
+    for(size_t i = 0; i < args.size(); i ++)
+        openCLSafeCall(clSetKernelArg(kernel, i, args[i].first, args[i].second));
  
-    if(cleanUp)
-    {
-        openCLSafeCall(clReleaseKernel(kernel));
-    }
+    openCLSafeCall(clEnqueueNDRangeKernel(getClCommandQueue(ctx), kernel, 3, NULL, globalThreads,
+                    localThreads, 0, NULL, NULL));
  
-    return kernelTime;
+    clFinish(getClCommandQueue(ctx));
+    openCLSafeCall(clReleaseKernel(kernel));
  }
  
  cl_mem load_constant(cl_context context, cl_command_queue command_queue, const void *value,
diff --git a/modules/ocl/src/cl_programcache.cpp b/modules/ocl/src/cl_programcache.cpp

index a34f828..741df87 100644 (file)
--- a/modules/ocl/src/cl_programcache.cpp
+++ b/modules/ocl/src/cl_programcache.cpp
@@ -67,7 +67,6 @@
  
  namespace cv { namespace ocl {
  
-#define MAX_PROG_CACHE_SIZE 1024
  /*
   * The binary caching system to eliminate redundant program source compilation.
   * Strictly, this is not a cache because we do not implement evictions right now.
@@ -291,7 +290,7 @@ struct ProgramFileCache
      bool writeConfigurationToFile(const string& options, std::vector<char>& buf)
      {
          if (hash_ == NULL)
-            return true; // don't save dynamic kernels
+            return true; // don't save programs without hash
  
          if (!f.is_open())
          {
@@ -469,7 +468,7 @@ cl_program ProgramCache::getProgram(const Context *ctx, const cv::ocl::ProgramEn
  {
      stringstream src_sign;
  
-    src_sign << (int64)(source->programStr);
+    src_sign << source->name;
      src_sign << getClContext(ctx);
      if (NULL != build_options)
      {
@@ -514,15 +513,10 @@ cl_program ProgramCache::getProgram(const Context *ctx, const cv::ocl::ProgramEn
      cl_program program = programFileCache.getOrBuildProgram(ctx, source, all_build_options);
  
      //Cache the binary for future use if build_options is null
-    if( (this->cacheSize += 1) < MAX_PROG_CACHE_SIZE)
      {
          cv::AutoLock lockCache(mutexCache);
          this->addProgram(src_sign.str(), program);
      }
-    else
-    {
-        cout << "Warning: code cache has been full.\n";
-    }
      return program;
  }
  
diff --git a/modules/ocl/test/test_api.cpp b/modules/ocl/test/test_api.cpp

new file mode 100644 (file)

index 0000000..0b59fc6
--- /dev/null
+++ b/modules/ocl/test/test_api.cpp
@@ -0,0 +1,80 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "test_precomp.hpp"
+#include "opencv2/ocl/cl_runtime/cl_runtime.hpp" // for OpenCL types: cl_mem
+
+TEST(TestAPI, openCLExecuteKernelInterop)
+{
+    cv::RNG rng;
+    Size sz(10000, 1);
+    cv::Mat cpuMat = cvtest::randomMat(rng, sz, CV_32FC4, -10, 10, false);
+
+    cv::ocl::oclMat gpuMat(cpuMat);
+    cv::ocl::oclMat gpuMatDst(sz, CV_32FC4);
+
+    const char* kernelStr =
+"__kernel void test_kernel(__global float4* src, __global float4* dst) {\n"
+"    int x = get_global_id(0);\n"
+"    dst[x] = src[x];\n"
+"}\n";
+
+    cv::ocl::ProgramSource program("test_interop", kernelStr);
+
+    using namespace std;
+    vector<pair<size_t , const void *> > args;
+    args.push_back( make_pair( sizeof(cl_mem), (void *) &gpuMat.data ));
+    args.push_back( make_pair( sizeof(cl_mem), (void *) &gpuMatDst.data ));
+
+    size_t globalThreads[3] = { sz.width, 1, 1 };
+    cv::ocl::openCLExecuteKernelInterop(
+        gpuMat.clCxt,
+        program,
+        "test_kernel",
+        globalThreads, NULL, args,
+        -1, -1,
+        "");
+
+    cv::Mat dst;
+    gpuMatDst.download(dst);
+
+    EXPECT_LE(checkNorm(cpuMat, dst), 1e-3);
+}
author	Alexander Alekhin <alexander.alekhin@itseez.com>
	Wed, 9 Oct 2013 12:57:52 +0000 (16:57 +0400)
committer	Alexander Alekhin <alexander.alekhin@itseez.com>
	Wed, 9 Oct 2013 20:15:02 +0000 (00:15 +0400)
cmake/cl2cpp.cmake		patch \| blob \| history
modules/ocl/include/opencv2/ocl/ocl.hpp		patch \| blob \| history
modules/ocl/include/opencv2/ocl/private/util.hpp		patch \| blob \| history
modules/ocl/src/cl_operations.cpp		patch \| blob \| history
modules/ocl/src/cl_programcache.cpp		patch \| blob \| history
modules/ocl/test/test_api.cpp	[new file with mode: 0644]	patch \| blob