fixing bugs for Intel platform CPU device

author Konstantin Matskevich <konstantin.matskevich@itseez.com>

Fri, 15 Nov 2013 13:26:18 +0000 (17:26 +0400)

committer Konstantin Matskevich <konstantin.matskevich@itseez.com>

Mon, 18 Nov 2013 09:20:54 +0000 (13:20 +0400)
author Konstantin Matskevich <konstantin.matskevich@itseez.com>
Fri, 15 Nov 2013 13:26:18 +0000 (17:26 +0400)
committer Konstantin Matskevich <konstantin.matskevich@itseez.com>
Mon, 18 Nov 2013 09:20:54 +0000 (13:20 +0400)
diff --git a/modules/ocl/src/arithm.cpp b/modules/ocl/src/arithm.cpp

index 997b201..68c5269 100644 (file)
--- a/modules/ocl/src/arithm.cpp
+++ b/modules/ocl/src/arithm.cpp
@@ -1638,8 +1638,9 @@ static void arithmetic_pow_run(const oclMat &src, double p, oclMat &dst, string
      size_t localThreads[3]  = { 64, 4, 1 };
      size_t globalThreads[3] = { dst.cols, dst.rows, 1 };
  
+    const char * const typeStr = depth == CV_32F ? "float" : "double";
      const char * const channelMap[] = { "", "", "2", "4", "4" };
-    std::string buildOptions = format("-D T=%s%s", depth == CV_32F ? "float" : "double", channelMap[channels]);
+    std::string buildOptions = format("-D VT=%s%s -D T=%s", typeStr, channelMap[channels], typeStr);
  
      int src_step = src.step / src.elemSize(), src_offset = src.offset / src.elemSize();
      int dst_step = dst.step / dst.elemSize(), dst_offset = dst.offset / dst.elemSize();
@@ -1655,7 +1656,7 @@ static void arithmetic_pow_run(const oclMat &src, double p, oclMat &dst, string
      args.push_back( make_pair( sizeof(cl_int), (void *)&dst.cols ));
  
      float pf = static_cast<float>(p);
-    if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE))
+    if(src.depth() == CV_32F)
          args.push_back( make_pair( sizeof(cl_float), (void *)&pf ));
      else
          args.push_back( make_pair( sizeof(cl_double), (void *)&p ));
diff --git a/modules/ocl/src/opencl/arithm_absdiff_nonsaturate.cl b/modules/ocl/src/opencl/arithm_absdiff_nonsaturate.cl

index c09560a..e03fa69 100644 (file)
--- a/modules/ocl/src/opencl/arithm_absdiff_nonsaturate.cl
+++ b/modules/ocl/src/opencl/arithm_absdiff_nonsaturate.cl
@@ -65,12 +65,16 @@ __kernel void arithm_absdiff_nonsaturate_binary(__global srcT *src1, int src1_st
          int src1_index = mad24(y, src1_step, x + src1_offset);
          int src2_index = mad24(y, src2_step, x + src2_offset);
          int dst_index  = mad24(y, dst_step, x + dst_offset);
+#ifdef INTEL_DEVICE //workaround for intel compiler bug
+        if(src1_index >= 0 && src2_index >= 0)
+#endif
+        {
+            dstT t0 = convertToDstT(src1[src1_index]);
+            dstT t1 = convertToDstT(src2[src2_index]);
+            dstT t2 = t0 - t1;
  
-        dstT t0 = convertToDstT(src1[src1_index]);
-        dstT t1 = convertToDstT(src2[src2_index]);
-        dstT t2 = t0 - t1;
-
-        dst[dst_index] = t2 >= (dstT)(0) ? t2 : -t2;
+            dst[dst_index] = t2 >= (dstT)(0) ? t2 : -t2;
+        }
      }
  }
  
@@ -85,9 +89,13 @@ __kernel void arithm_absdiff_nonsaturate(__global srcT *src1, int src1_step, int
      {
          int src1_index = mad24(y, src1_step, x + src1_offset);
          int dst_index  = mad24(y, dst_step, x + dst_offset);
+#ifdef INTEL_DEVICE //workaround for intel compiler bug
+        if(src1_index >= 0)
+#endif
+        {
+            dstT t0 = convertToDstT(src1[src1_index]);
  
-        dstT t0 = convertToDstT(src1[src1_index]);
-
-        dst[dst_index] = t0 >= (dstT)(0) ? t0 : -t0;
+            dst[dst_index] = t0 >= (dstT)(0) ? t0 : -t0;
+        }
      }
  }
diff --git a/modules/ocl/src/opencl/arithm_pow.cl b/modules/ocl/src/opencl/arithm_pow.cl

index bb0673d..385e4cc 100644 (file)
--- a/modules/ocl/src/opencl/arithm_pow.cl
+++ b/modules/ocl/src/opencl/arithm_pow.cl
@@ -49,16 +49,13 @@
  #elif defined (cl_khr_fp64)
  #pragma OPENCL EXTENSION cl_khr_fp64:enable
  #endif
-#define F double
-#else
-#define F float
  #endif
  
  /************************************** pow **************************************/
  
-__kernel void arithm_pow(__global T * src, int src_step, int src_offset,
-                         __global T * dst, int dst_step, int dst_offset,
-                         int rows, int cols, F p)
+__kernel void arithm_pow(__global VT * src, int src_step, int src_offset,
+                         __global VT * dst, int dst_step, int dst_offset,
+                         int rows, int cols, T p)
  {
      int x = get_global_id(0);
      int y = get_global_id(1);
@@ -68,8 +65,8 @@ __kernel void arithm_pow(__global T * src, int src_step, int src_offset,
          int src_index = mad24(y, src_step, x + src_offset);
          int dst_index = mad24(y, dst_step, x + dst_offset);
  
-        T src_data = src[src_index];
-        T tmp = src_data > 0 ? exp(p * log(src_data)) : (src_data == 0 ? 0 : exp(p * log(fabs(src_data))));
+        VT src_data = src[src_index];
+        VT tmp = src_data > 0 ? exp(p * log(src_data)) : (src_data == 0 ? 0 : exp(p * log(fabs(src_data))));
  
          dst[dst_index] = tmp;
      }
author	Konstantin Matskevich <konstantin.matskevich@itseez.com>
	Fri, 15 Nov 2013 13:26:18 +0000 (17:26 +0400)
committer	Konstantin Matskevich <konstantin.matskevich@itseez.com>
	Mon, 18 Nov 2013 09:20:54 +0000 (13:20 +0400)
modules/ocl/src/arithm.cpp		patch \| blob \| history
modules/ocl/src/opencl/arithm_absdiff_nonsaturate.cl		patch \| blob \| history
modules/ocl/src/opencl/arithm_pow.cl		patch \| blob \| history