refactored and extended ocl::exp and ocl::log

author Ilya Lavrenov <ilya.lavrenov@itseez.com>

Tue, 24 Sep 2013 10:02:01 +0000 (14:02 +0400)

committer Ilya Lavrenov <ilya.lavrenov@itseez.com>

Tue, 24 Sep 2013 10:02:01 +0000 (14:02 +0400)
author Ilya Lavrenov <ilya.lavrenov@itseez.com>
Tue, 24 Sep 2013 10:02:01 +0000 (14:02 +0400)
committer Ilya Lavrenov <ilya.lavrenov@itseez.com>
Tue, 24 Sep 2013 10:02:01 +0000 (14:02 +0400)
diff --git a/modules/ocl/src/arithm.cpp b/modules/ocl/src/arithm.cpp

index 1effac2..97da8c0 100644 (file)
--- a/modules/ocl/src/arithm.cpp
+++ b/modules/ocl/src/arithm.cpp
@@ -817,39 +817,44 @@ void cv::ocl::LUT(const oclMat &src, const oclMat &lut, oclMat &dst)
  //////////////////////////////////////////////////////////////////////////////
  //////////////////////////////// exp log /////////////////////////////////////
  //////////////////////////////////////////////////////////////////////////////
+
  static void arithmetic_exp_log_run(const oclMat &src, oclMat &dst, string kernelName, const char **kernelString)
  {
-    dst.create(src.size(), src.type());
-    CV_Assert(src.cols == dst.cols &&
-              src.rows == dst.rows );
-
-    CV_Assert(src.type() == dst.type());
-    CV_Assert( src.type() == CV_32F || src.type() == CV_64F);
-
      Context  *clCxt = src.clCxt;
-    if(!clCxt->supportsFeature(Context::CL_DOUBLE) && src.type() == CV_64F)
+    if (!clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
      {
-        CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
+        CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
          return;
      }
-    //int channels = dst.oclchannels();
-    int depth = dst.depth();
+
+    CV_Assert( src.depth() == CV_32F || src.depth() == CV_64F);
+    dst.create(src.size(), src.type());
+
+    int ddepth = dst.depth();
+    int cols1 = src.cols * src.oclchannels();
+    int srcoffset1 = src.offset / src.elemSize1(), dstoffset1 = dst.offset / dst.elemSize1();
+    int srcstep1 = src.step1(), dststep1 = dst.step1();
  
      size_t localThreads[3]  = { 64, 4, 1 };
      size_t globalThreads[3] = { dst.cols, dst.rows, 1 };
  
+    std::string buildOptions = format("-D srcT=%s",
+                                      ddepth == CV_32F ? "float" : "double");
+
      vector<pair<size_t , const void *> > args;
-    args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows ));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&src.cols ));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&src.step ));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step ));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&src.offset ));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&dst.offset ));
      args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data ));
      args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data ));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&cols1 ));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&src.rows ));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&srcoffset1 ));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&dstoffset1 ));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&srcstep1 ));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&dststep1 ));
  
-    openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, depth);
+    openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads,
+                        args, src.oclchannels(), -1, buildOptions.c_str());
  }
+
  void cv::ocl::exp(const oclMat &src, oclMat &dst)
  {
      arithmetic_exp_log_run(src, dst, "arithm_exp", &arithm_exp);
diff --git a/modules/ocl/src/opencl/arithm_exp.cl b/modules/ocl/src/opencl/arithm_exp.cl

index 6f537a2..b2143ba 100644 (file)
--- a/modules/ocl/src/opencl/arithm_exp.cl
+++ b/modules/ocl/src/opencl/arithm_exp.cl
@@ -42,52 +42,70 @@
  // the use of this software, even if advised of the possibility of such damage.
  //
  //M*/
+
  #if defined (DOUBLE_SUPPORT)
+#ifdef cl_khr_fp64
  #pragma OPENCL EXTENSION cl_khr_fp64:enable
+#elif defined (cl_amd_fp64)
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#endif
  #endif
-
  
  //////////////////////////////////////////////////////////////////////////////////////////////////////
  /////////////////////////////////////////////EXP//////////////////////////////////////////////////////
  ///////////////////////////////////////////////////////////////////////////////////////////////////////
  
-__kernel void arithm_exp_D5(int rows, int cols, int srcStep, int dstStep, int srcOffset, int dstOffset, __global float *src, __global float *dst)
+__kernel void arithm_exp_C1(__global srcT *src, __global srcT *dst,
+    int cols1, int rows,
+    int srcOffset1, int dstOffset1,
+    int srcStep1, int dstStep1)
  {
-
      int x = get_global_id(0);
      int y = get_global_id(1);
  
-    if(x < cols && y < rows)
+    if(x < cols1 && y < rows)
      {
-      x = x << 2;
-      int srcIdx = mad24( y, srcStep, x + srcOffset);
-      int dstIdx = mad24( y, dstStep, x + dstOffset);
+        int srcIdx = mad24(y, srcStep1, x + srcOffset1);
+        int dstIdx = mad24(y, dstStep1, x + dstOffset1);
  
-      float src_data = *((__global float *)((__global char *)src + srcIdx));
-      float dst_data = exp(src_data);
+        dst[dstIdx] = exp(src[srcIdx]);
+    }
+}
+
+__kernel void arithm_exp_C2(__global srcT *src, __global srcT *dst,
+    int cols1, int rows,
+    int srcOffset1, int dstOffset1,
+    int srcStep1, int dstStep1)
+{
+    int x1 = get_global_id(0) << 1;
+    int y = get_global_id(1);
  
-      *((__global float *)((__global char *)dst + dstIdx)) = dst_data;
+    if(x1 < cols1 && y < rows)
+    {
+        int srcIdx = mad24(y, srcStep1, x1 + srcOffset1);
+        int dstIdx = mad24(y, dstStep1, x1 + dstOffset1);
  
+        dst[dstIdx] =                      exp(src[srcIdx]);
+        dst[dstIdx + 1] = x1 + 1 < cols1 ? exp(src[srcIdx + 1]) : dst[dstIdx + 1];
      }
  }
  
-#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_exp_D6(int rows, int cols, int srcStep, int dstStep, int srcOffset, int dstOffset, __global double *src, __global double *dst)
+__kernel void arithm_exp_C4(__global srcT *src, __global srcT *dst,
+    int cols1, int rows,
+    int srcOffset1, int dstOffset1,
+    int srcStep1, int dstStep1)
  {
-  int x = get_global_id(0);
-  int y = get_global_id(1);
-  if(x < cols && y < rows )
-  {
-      x = x << 3;
-      int srcIdx = mad24( y, srcStep, x + srcOffset);
-      int dstIdx = mad24( y, dstStep, x + dstOffset);
+    int x1 = get_global_id(0) << 2;
+    int y = get_global_id(1);
  
-      double src_data = *((__global double *)((__global char *)src + srcIdx));
-      double dst_data = exp(src_data);
+    if(x1 < cols1 && y < rows)
+    {
+        int srcIdx = mad24(y, srcStep1, x1 + srcOffset1);
+        int dstIdx = mad24(y, dstStep1, x1 + dstOffset1);
  
-      *((__global double *)((__global char *)dst + dstIdx )) = dst_data;
-     // dst[dstIdx] = exp(src[srcIdx]);
-  }
+        dst[dstIdx] =                      exp(src[srcIdx]);
+        dst[dstIdx + 1] = x1 + 1 < cols1 ? exp(src[srcIdx + 1]) : dst[dstIdx + 1];
+        dst[dstIdx + 2] = x1 + 2 < cols1 ? exp(src[srcIdx + 2]) : dst[dstIdx + 2];
+        dst[dstIdx + 3] = x1 + 3 < cols1 ? exp(src[srcIdx + 3]) : dst[dstIdx + 3];
+    }
  }
-
-#endif
diff --git a/modules/ocl/src/opencl/arithm_log.cl b/modules/ocl/src/opencl/arithm_log.cl

index ea19c9d..ef8c4dd 100644 (file)
--- a/modules/ocl/src/opencl/arithm_log.cl
+++ b/modules/ocl/src/opencl/arithm_log.cl
@@ -1,4 +1,3 @@
-
  /*M///////////////////////////////////////////////////////////////////////////////////////
  //
  //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
@@ -43,52 +42,66 @@
  // the use of this software, even if advised of the possibility of such damage.
  //
  //M*/
+
  #if defined (DOUBLE_SUPPORT)
  #pragma OPENCL EXTENSION cl_khr_fp64:enable
  #endif
  
-#define INF_FLOAT -88.029694
-#define INF_DOUBLE -709.0895657128241
-
-
  //////////////////////////////////////////////////////////////////////////////////////////////////////
  /////////////////////////////////////////////LOG/////////////////////////////////////////////////////
  ///////////////////////////////////////////////////////////////////////////////////////////////////////
  
-__kernel void arithm_log_D5(int rows, int cols, int srcStep, int dstStep, int srcOffset, int dstOffset, __global float *src, __global float *dst)
+__kernel void arithm_log_C1(__global srcT *src, __global srcT *dst,
+    int cols1, int rows,
+    int srcOffset1, int dstOffset1,
+    int srcStep1, int dstStep1)
  {
      int x = get_global_id(0);
      int y = get_global_id(1);
  
-    if(x < cols && y < rows )
+    if(x < cols1 && y < rows)
      {
-      x = x << 2;
-      int srcIdx = mad24( y, srcStep, x + srcOffset);
-      int dstIdx = mad24( y, dstStep, x + dstOffset);
+        int srcIdx = mad24(y, srcStep1, x + srcOffset1);
+        int dstIdx = mad24(y, dstStep1, x + dstOffset1);
  
-      float src_data = *((__global float *)((__global char *)src + srcIdx));
-      float dst_data = (src_data == 0) ? INF_FLOAT : log(fabs(src_data));
-
-      *((__global float *)((__global char *)dst + dstIdx)) = dst_data;
+        dst[dstIdx] = log(src[srcIdx]);
      }
  }
  
-#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_log_D6(int rows, int cols, int srcStep, int dstStep, int srcOffset, int dstOffset, __global double *src, __global double *dst)
+__kernel void arithm_log_C2(__global srcT *src, __global srcT *dst,
+    int cols1, int rows,
+    int srcOffset1, int dstOffset1,
+    int srcStep1, int dstStep1)
  {
-    int x = get_global_id(0);
+    int x1 = get_global_id(0) << 1;
      int y = get_global_id(1);
  
-    if(x < cols && y < rows )
+    if(x1 < cols1 && y < rows)
      {
-      x = x << 3;
-      int srcIdx = mad24( y, srcStep, x + srcOffset);
-      int dstIdx = mad24( y, dstStep, x + dstOffset);
+        int srcIdx = mad24(y, srcStep1, x1 + srcOffset1);
+        int dstIdx = mad24(y, dstStep1, x1 + dstOffset1);
  
-      double src_data = *((__global double *)((__global char *)src + srcIdx));
-      double dst_data = (src_data == 0) ? INF_DOUBLE : log(fabs(src_data));
-      *((__global double *)((__global char *)dst + dstIdx)) = dst_data;
+        dst[dstIdx] =                      log(src[srcIdx]);
+        dst[dstIdx + 1] = x1 + 1 < cols1 ? log(src[srcIdx + 1]) : dst[dstIdx + 1];
+    }
+}
+
+__kernel void arithm_log_C4(__global srcT *src, __global srcT *dst,
+    int cols1, int rows,
+    int srcOffset1, int dstOffset1,
+    int srcStep1, int dstStep1)
+{
+    int x1 = get_global_id(0) << 2;
+    int y = get_global_id(1);
  
+    if(x1 < cols1 && y < rows)
+    {
+        int srcIdx = mad24(y, srcStep1, x1 + srcOffset1);
+        int dstIdx = mad24(y, dstStep1, x1 + dstOffset1);
+
+        dst[dstIdx] =                      log(src[srcIdx]);
+        dst[dstIdx + 1] = x1 + 1 < cols1 ? log(src[srcIdx + 1]) : dst[dstIdx + 1];
+        dst[dstIdx + 2] = x1 + 2 < cols1 ? log(src[srcIdx + 2]) : dst[dstIdx + 2];
+        dst[dstIdx + 3] = x1 + 3 < cols1 ? log(src[srcIdx + 3]) : dst[dstIdx + 3];
      }
  }
-#endif
author	Ilya Lavrenov <ilya.lavrenov@itseez.com>
	Tue, 24 Sep 2013 10:02:01 +0000 (14:02 +0400)
committer	Ilya Lavrenov <ilya.lavrenov@itseez.com>
	Tue, 24 Sep 2013 10:02:01 +0000 (14:02 +0400)
modules/ocl/src/arithm.cpp		patch \| blob \| history
modules/ocl/src/opencl/arithm_exp.cl		patch \| blob \| history
modules/ocl/src/opencl/arithm_log.cl		patch \| blob \| history