kernel warnings on AMD

author Ilya Lavrenov <ilya.lavrenov@itseez.com>

Mon, 11 Nov 2013 16:08:40 +0000 (20:08 +0400)

committer Ilya Lavrenov <ilya.lavrenov@itseez.com>

Tue, 12 Nov 2013 11:06:46 +0000 (15:06 +0400)
author Ilya Lavrenov <ilya.lavrenov@itseez.com>
Mon, 11 Nov 2013 16:08:40 +0000 (20:08 +0400)
committer Ilya Lavrenov <ilya.lavrenov@itseez.com>
Tue, 12 Nov 2013 11:06:46 +0000 (15:06 +0400)
diff --git a/modules/ocl/src/opencl/arithm_LUT.cl b/modules/ocl/src/opencl/arithm_LUT.cl

index 658e1f4..30407bb 100644 (file)
--- a/modules/ocl/src/opencl/arithm_LUT.cl
+++ b/modules/ocl/src/opencl/arithm_LUT.cl
@@ -34,9 +34,13 @@
  //
  //
  
-#if defined (DOUBLE_SUPPORT)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
  #pragma OPENCL EXTENSION cl_khr_fp64:enable
  #endif
+#endif
  
  __kernel void LUT_C1( __global const srcT * src, __global const dstT *lut,
        __global dstT *dst,
diff --git a/modules/ocl/src/opencl/arithm_absdiff_nonsaturate.cl b/modules/ocl/src/opencl/arithm_absdiff_nonsaturate.cl

index fcf3874..c09560a 100644 (file)
--- a/modules/ocl/src/opencl/arithm_absdiff_nonsaturate.cl
+++ b/modules/ocl/src/opencl/arithm_absdiff_nonsaturate.cl
@@ -44,11 +44,11 @@
  //
  //M*/
  
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
  #pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
  #endif
  #endif
  
diff --git a/modules/ocl/src/opencl/arithm_add.cl b/modules/ocl/src/opencl/arithm_add.cl

index a73b65d..04262b8 100644 (file)
--- a/modules/ocl/src/opencl/arithm_add.cl
+++ b/modules/ocl/src/opencl/arithm_add.cl
@@ -44,11 +44,11 @@
  //
  //M*/
  
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
  #pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
  #endif
  #endif
  
diff --git a/modules/ocl/src/opencl/arithm_addWeighted.cl b/modules/ocl/src/opencl/arithm_addWeighted.cl

index 8272806..872ee85 100644 (file)
--- a/modules/ocl/src/opencl/arithm_addWeighted.cl
+++ b/modules/ocl/src/opencl/arithm_addWeighted.cl
@@ -43,11 +43,11 @@
  //
  //M*/
  
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
  #pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
  #endif
  #endif
  
diff --git a/modules/ocl/src/opencl/arithm_add_mask.cl b/modules/ocl/src/opencl/arithm_add_mask.cl

index ea96d8a..b115d9b 100644 (file)
--- a/modules/ocl/src/opencl/arithm_add_mask.cl
+++ b/modules/ocl/src/opencl/arithm_add_mask.cl
@@ -43,11 +43,11 @@
  //
  //M*/
  
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
  #pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
  #endif
  #endif
  
diff --git a/modules/ocl/src/opencl/arithm_add_scalar.cl b/modules/ocl/src/opencl/arithm_add_scalar.cl

index b82eff5..05ea48d 100644 (file)
--- a/modules/ocl/src/opencl/arithm_add_scalar.cl
+++ b/modules/ocl/src/opencl/arithm_add_scalar.cl
@@ -43,11 +43,11 @@
  //
  //M*/
  
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
  #pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
  #endif
  #endif
  
diff --git a/modules/ocl/src/opencl/arithm_add_scalar_mask.cl b/modules/ocl/src/opencl/arithm_add_scalar_mask.cl

index 0762b19..a8b9657 100644 (file)
--- a/modules/ocl/src/opencl/arithm_add_scalar_mask.cl
+++ b/modules/ocl/src/opencl/arithm_add_scalar_mask.cl
@@ -43,11 +43,11 @@
  //
  //M*/
  
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
  #pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
  #endif
  #endif
  
diff --git a/modules/ocl/src/opencl/arithm_bitwise_binary_scalar_mask.cl b/modules/ocl/src/opencl/arithm_bitwise_binary_scalar_mask.cl

index 03f46cc..756f201 100644 (file)
--- a/modules/ocl/src/opencl/arithm_bitwise_binary_scalar_mask.cl
+++ b/modules/ocl/src/opencl/arithm_bitwise_binary_scalar_mask.cl
@@ -43,14 +43,6 @@
  //
  //M*/
  
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#endif
-#endif
-
  //////////////////////////////////////////////////////////////////////////////////////////////////////
  ////////////////////////////////////////////bitwise_binary////////////////////////////////////////////
  //////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/modules/ocl/src/opencl/arithm_bitwise_not.cl b/modules/ocl/src/opencl/arithm_bitwise_not.cl

index 5bc1839..b6f76d6 100644 (file)
--- a/modules/ocl/src/opencl/arithm_bitwise_not.cl
+++ b/modules/ocl/src/opencl/arithm_bitwise_not.cl
@@ -43,11 +43,11 @@
  //
  //M*/
  
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
  #pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
  #endif
  #endif
  
diff --git a/modules/ocl/src/opencl/arithm_cartToPolar.cl b/modules/ocl/src/opencl/arithm_cartToPolar.cl

index e37818c..f634f2d 100644 (file)
--- a/modules/ocl/src/opencl/arithm_cartToPolar.cl
+++ b/modules/ocl/src/opencl/arithm_cartToPolar.cl
@@ -43,24 +43,21 @@
  //
  //M*/
  
-#if defined (DOUBLE_SUPPORT)
-    #pragma OPENCL EXTENSION cl_khr_fp64:enable
-    #define CV_PI   3.1415926535897932384626433832795
-    #ifndef DBL_EPSILON
-        #define DBL_EPSILON 0x1.0p-52
-    #endif
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
+#endif
+#define CV_PI M_PI
  #else
-    #define CV_PI   3.1415926535897932384626433832795f
-    #ifndef DBL_EPSILON
-        #define DBL_EPSILON 0x1.0p-52f
-    #endif
+#define CV_PI M_PI_F
  #endif
  
-
  __kernel void arithm_cartToPolar_D5 (__global float *src1, int src1_step, int src1_offset,
                                       __global float *src2, int src2_step, int src2_offset,
-                                     __global float *dst1, int dst1_step, int dst1_offset, //magnitude
-                                     __global float *dst2, int dst2_step, int dst2_offset, //cartToPolar
+                                     __global float *dst1, int dst1_step, int dst1_offset, // magnitude
+                                     __global float *dst2, int dst2_step, int dst2_offset, // cartToPolar
                                       int rows, int cols, int angInDegree)
  {
      int x = get_global_id(0);
@@ -81,16 +78,15 @@ __kernel void arithm_cartToPolar_D5 (__global float *src1, int src1_step, int sr
          float y2 = y * y;
  
          float magnitude = sqrt(x2 + y2);
-        float cartToPolar;
  
          float tmp = y >= 0 ? 0 : CV_PI*2;
          tmp = x < 0 ? CV_PI : tmp;
  
          float tmp1 = y >= 0 ? CV_PI*0.5f : CV_PI*1.5f;
-        cartToPolar = y2 <= x2 ? x*y/(x2 + 0.28f*y2 + DBL_EPSILON)  + tmp :
-                                 tmp1 - x*y/(y2 + 0.28f*x2 + DBL_EPSILON);
+        float cartToPolar = y2 <= x2 ? x*y/(x2 + 0.28f*y2 + FLT_EPSILON) + tmp :
+                                 tmp1 - x*y/(y2 + 0.28f*x2 + FLT_EPSILON);
  
-        cartToPolar = angInDegree == 0 ? cartToPolar : cartToPolar * (float)(180/CV_PI);
+        cartToPolar = angInDegree == 0 ? cartToPolar : cartToPolar * (180/CV_PI);
  
          *((__global float *)((__global char *)dst1 + dst1_index)) = magnitude;
          *((__global float *)((__global char *)dst2 + dst2_index)) = cartToPolar;
@@ -98,6 +94,7 @@ __kernel void arithm_cartToPolar_D5 (__global float *src1, int src1_step, int sr
  }
  
  #if defined (DOUBLE_SUPPORT)
+
  __kernel void arithm_cartToPolar_D6 (__global double *src1, int src1_step, int src1_offset,
                                       __global double *src2, int src2_step, int src2_offset,
                                       __global double *dst1, int dst1_step, int dst1_offset,
@@ -122,19 +119,19 @@ __kernel void arithm_cartToPolar_D6 (__global double *src1, int src1_step, int s
          double y2 = y * y;
  
          double magnitude = sqrt(x2 + y2);
-        double cartToPolar;
  
          float tmp = y >= 0 ? 0 : CV_PI*2;
          tmp = x < 0 ? CV_PI : tmp;
  
          float tmp1 = y >= 0 ? CV_PI*0.5 : CV_PI*1.5;
-        cartToPolar = y2 <= x2 ? x*y/(x2 + 0.28f*y2 + (float)DBL_EPSILON)  + tmp :
-                                 tmp1 - x*y/(y2 + 0.28f*x2 + (float)DBL_EPSILON);
+        double cartToPolar = y2 <= x2 ? x*y/(x2 + 0.28f*y2 + DBL_EPSILON)  + tmp :
+                                 tmp1 - x*y/(y2 + 0.28f*x2 + DBL_EPSILON);
  
-        cartToPolar = angInDegree == 0 ? cartToPolar : cartToPolar * (float)(180/CV_PI);
+        cartToPolar = angInDegree == 0 ? cartToPolar : cartToPolar * (180/CV_PI);
  
          *((__global double *)((__global char *)dst1 + dst1_index)) = magnitude;
          *((__global double *)((__global char *)dst2 + dst2_index)) = cartToPolar;
      }
  }
+
  #endif
diff --git a/modules/ocl/src/opencl/arithm_compare.cl b/modules/ocl/src/opencl/arithm_compare.cl

index 005d3c7..73e6299 100644 (file)
--- a/modules/ocl/src/opencl/arithm_compare.cl
+++ b/modules/ocl/src/opencl/arithm_compare.cl
@@ -43,11 +43,11 @@
  //
  //M*/
  
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
  #pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
  #endif
  #endif
  
diff --git a/modules/ocl/src/opencl/arithm_exp.cl b/modules/ocl/src/opencl/arithm_exp.cl

index 835bc95..f0a1893 100644 (file)
--- a/modules/ocl/src/opencl/arithm_exp.cl
+++ b/modules/ocl/src/opencl/arithm_exp.cl
@@ -43,11 +43,11 @@
  //
  //M*/
  
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
  #pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
  #endif
  #endif
  
diff --git a/modules/ocl/src/opencl/arithm_flip.cl b/modules/ocl/src/opencl/arithm_flip.cl

index 416240b..b9bacd3 100644 (file)
--- a/modules/ocl/src/opencl/arithm_flip.cl
+++ b/modules/ocl/src/opencl/arithm_flip.cl
@@ -43,11 +43,11 @@
  //
  //M*/
  
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
  #pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
  #endif
  #endif
  
diff --git a/modules/ocl/src/opencl/arithm_log.cl b/modules/ocl/src/opencl/arithm_log.cl

index fe1b304..ba5f32d 100644 (file)
--- a/modules/ocl/src/opencl/arithm_log.cl
+++ b/modules/ocl/src/opencl/arithm_log.cl
@@ -43,9 +43,13 @@
  //
  //M*/
  
-#if defined (DOUBLE_SUPPORT)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
  #pragma OPENCL EXTENSION cl_khr_fp64:enable
  #endif
+#endif
  
  //////////////////////////////////////////////////////////////////////////////////////////////////////
  /////////////////////////////////////////////LOG/////////////////////////////////////////////////////
diff --git a/modules/ocl/src/opencl/arithm_magnitude.cl b/modules/ocl/src/opencl/arithm_magnitude.cl

index 7c8cc18..6fd2ac3 100644 (file)
--- a/modules/ocl/src/opencl/arithm_magnitude.cl
+++ b/modules/ocl/src/opencl/arithm_magnitude.cl
@@ -43,9 +43,13 @@
  //
  //M*/
  
-#if defined (DOUBLE_SUPPORT)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
  #pragma OPENCL EXTENSION cl_khr_fp64:enable
  #endif
+#endif
  
  __kernel void arithm_magnitude_D5 (__global float *src1, int src1_step, int src1_offset,
                                     __global float *src2, int src2_step, int src2_offset,
diff --git a/modules/ocl/src/opencl/arithm_minMax.cl b/modules/ocl/src/opencl/arithm_minMax.cl

index 33a39d8..01db7d0 100644 (file)
--- a/modules/ocl/src/opencl/arithm_minMax.cl
+++ b/modules/ocl/src/opencl/arithm_minMax.cl
@@ -45,7 +45,7 @@
  
  /**************************************PUBLICFUNC*************************************/
  
-#if defined (DOUBLE_SUPPORT)
+#ifdef DOUBLE_SUPPORT
  #ifdef cl_amd_fp64
  #pragma OPENCL EXTENSION cl_amd_fp64:enable
  #elif defined (cl_khr_fp64)
diff --git a/modules/ocl/src/opencl/arithm_minMaxLoc.cl b/modules/ocl/src/opencl/arithm_minMaxLoc.cl

index 076fb06..b80ce2b 100644 (file)
--- a/modules/ocl/src/opencl/arithm_minMaxLoc.cl
+++ b/modules/ocl/src/opencl/arithm_minMaxLoc.cl
@@ -44,8 +44,13 @@
  //M*/
  
  /**************************************PUBLICFUNC*************************************/
-#if defined (DOUBLE_SUPPORT)
+
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
  #pragma OPENCL EXTENSION cl_khr_fp64:enable
+#endif
  #define RES_TYPE double4
  #define CONVERT_RES_TYPE convert_double4
  #else
diff --git a/modules/ocl/src/opencl/arithm_minMaxLoc_mask.cl b/modules/ocl/src/opencl/arithm_minMaxLoc_mask.cl

index 4d73be9..fbde684 100644 (file)
--- a/modules/ocl/src/opencl/arithm_minMaxLoc_mask.cl
+++ b/modules/ocl/src/opencl/arithm_minMaxLoc_mask.cl
@@ -44,8 +44,13 @@
  //M*/
  
  /**************************************PUBLICFUNC*************************************/
-#if defined (DOUBLE_SUPPORT)
+
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
  #pragma OPENCL EXTENSION cl_khr_fp64:enable
+#endif
  #define RES_TYPE double4
  #define CONVERT_RES_TYPE convert_double4
  #else
diff --git a/modules/ocl/src/opencl/arithm_nonzero.cl b/modules/ocl/src/opencl/arithm_nonzero.cl

index fc98257..3180c26 100644 (file)
--- a/modules/ocl/src/opencl/arithm_nonzero.cl
+++ b/modules/ocl/src/opencl/arithm_nonzero.cl
@@ -42,7 +42,7 @@
  // the use of this software, even if advised of the possibility of such damage.
  //
  
-#if defined (DOUBLE_SUPPORT)
+#ifdef DOUBLE_SUPPORT
  #ifdef cl_amd_fp64
  #pragma OPENCL EXTENSION cl_amd_fp64:enable
  #elif defined (cl_khr_fp64)
diff --git a/modules/ocl/src/opencl/arithm_phase.cl b/modules/ocl/src/opencl/arithm_phase.cl

index 978fd3b..40346b2 100644 (file)
--- a/modules/ocl/src/opencl/arithm_phase.cl
+++ b/modules/ocl/src/opencl/arithm_phase.cl
@@ -44,17 +44,17 @@
  //
  //
  
-#if defined (DOUBLE_SUPPORT)
-    #ifdef cl_amd_fp64
-        #pragma OPENCL EXTENSION cl_amd_fp64:enable
-    #elif defined (cl_khr_fp64)
-        #pragma OPENCL EXTENSION cl_khr_fp64:enable
-    #endif
-    #define CV_PI M_PI
-    #define CV_2PI (2 * CV_PI)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
+#endif
+#define CV_PI M_PI
+#define CV_2PI (2 * CV_PI)
  #else
-    #define CV_PI M_PI_F
-    #define CV_2PI (2 * CV_PI)
+#define CV_PI M_PI_F
+#define CV_2PI (2 * CV_PI)
  #endif
  
  /**************************************phase inradians**************************************/
@@ -159,7 +159,7 @@ __kernel void arithm_phase_indegrees_D6 (__global double *src1, int src1_step1,
  
          double data1 = src1[src1_index];
          double data2 = src2[src2_index];
-        double tmp = atan2(src2[src2_index], src1[src1_index]);
+        double tmp = atan2(data2, data1);
  
          tmp = 180 * tmp / CV_PI;
          if (tmp < 0)
diff --git a/modules/ocl/src/opencl/arithm_polarToCart.cl b/modules/ocl/src/opencl/arithm_polarToCart.cl

index 9e2e860..f3ec311 100644 (file)
--- a/modules/ocl/src/opencl/arithm_polarToCart.cl
+++ b/modules/ocl/src/opencl/arithm_polarToCart.cl
@@ -44,14 +44,14 @@
  //M*/
  
  #ifdef DOUBLE_SUPPORT
-    #ifdef cl_amd_fp64
-        #pragma OPENCL EXTENSION cl_amd_fp64:enable
-    #elif defined (cl_khr_fp64)
-        #pragma OPENCL EXTENSION cl_khr_fp64:enable
-    #endif
-    #define CV_PI M_PI
+#ifdef cl_amd_fp64
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
+#endif
+#define CV_PI M_PI
  #else
-    #define CV_PI M_PI_F
+#define CV_PI M_PI_F
  #endif
  
  /////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/modules/ocl/src/opencl/arithm_pow.cl b/modules/ocl/src/opencl/arithm_pow.cl

index 1704f6b..36a22b6 100644 (file)
--- a/modules/ocl/src/opencl/arithm_pow.cl
+++ b/modules/ocl/src/opencl/arithm_pow.cl
@@ -43,21 +43,22 @@
  //
  //M*/
  
-#if defined (DOUBLE_SUPPORT)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
  #pragma OPENCL EXTENSION cl_khr_fp64:enable
-typedef double F;
-typedef double4 F4;
-#define convert_F4 convert_double4;
+#endif
+#define F double
  #else
-typedef float F;
-typedef float4 F4;
-#define convert_F4 convert_float4;
+#define F float
  #endif
+
  /************************************** pow **************************************/
+
  __kernel void arithm_pow_D5 (__global float *src1, int src1_step, int src1_offset,
                               __global float *dst,  int dst_step,  int dst_offset,
-                             int rows, int cols, int dst_step1,
-                             F p)
+                             int rows, int cols, int dst_step1, F p)
  {
  
      int x = get_global_id(0);
@@ -73,14 +74,13 @@ __kernel void arithm_pow_D5 (__global float *src1, int src1_step, int src1_offse
  
          *((__global float *)((__global char *)dst + dst_index)) = tmp;
      }
-
  }
  
  #if defined (DOUBLE_SUPPORT)
+
  __kernel void arithm_pow_D6 (__global double *src1, int src1_step, int src1_offset,
                               __global double *dst,  int dst_step,  int dst_offset,
-                             int rows, int cols, int dst_step1,
-                             F p)
+                             int rows, int cols, int dst_step1, F p)
  {
  
      int x = get_global_id(0);
@@ -95,6 +95,6 @@ __kernel void arithm_pow_D6 (__global double *src1, int src1_step, int src1_offs
          double tmp = src1_data > 0 ? exp(p * log(src1_data)) : (src1_data == 0 ? 0 : exp(p * log(fabs(src1_data))));
          *((__global double *)((__global char *)dst + dst_index)) = tmp;
      }
-
  }
+
  #endif
diff --git a/modules/ocl/src/opencl/arithm_setidentity.cl b/modules/ocl/src/opencl/arithm_setidentity.cl

index fb684c3..0ead5b0 100644 (file)
--- a/modules/ocl/src/opencl/arithm_setidentity.cl
+++ b/modules/ocl/src/opencl/arithm_setidentity.cl
@@ -43,11 +43,11 @@
  //
  //M*/
  
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
  #pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
  #endif
  #endif
  
diff --git a/modules/ocl/src/opencl/arithm_sum.cl b/modules/ocl/src/opencl/arithm_sum.cl

index 7ada5be..514cf2a 100644 (file)
--- a/modules/ocl/src/opencl/arithm_sum.cl
+++ b/modules/ocl/src/opencl/arithm_sum.cl
@@ -43,11 +43,11 @@
  //
  //M*/
  
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
  #pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
  #endif
  #endif
  
diff --git a/modules/ocl/src/opencl/arithm_transpose.cl b/modules/ocl/src/opencl/arithm_transpose.cl

index bd06a52..8cde654 100644 (file)
--- a/modules/ocl/src/opencl/arithm_transpose.cl
+++ b/modules/ocl/src/opencl/arithm_transpose.cl
@@ -43,7 +43,7 @@
  //
  //M*/
  
-#if defined (DOUBLE_SUPPORT)
+#ifdef DOUBLE_SUPPORT
  #ifdef cl_amd_fp64
  #pragma OPENCL EXTENSION cl_amd_fp64:enable
  #elif defined (cl_khr_fp64)
diff --git a/modules/ocl/src/opencl/bgfg_mog.cl b/modules/ocl/src/opencl/bgfg_mog.cl

index a13a30e..06e18c2 100644 (file)
--- a/modules/ocl/src/opencl/bgfg_mog.cl
+++ b/modules/ocl/src/opencl/bgfg_mog.cl
@@ -67,11 +67,14 @@ static float clamp1(float var, float learningRate, float diff, float minVar)
  {
      return fmax(var + learningRate * (diff * diff - var), minVar);
  }
+
  #else
+
  #define T_FRAME uchar4
  #define T_MEAN_VAR float4
  #define CONVERT_TYPE convert_uchar4_sat
  #define F_ZERO (0.0f, 0.0f, 0.0f, 0.0f)
+
  inline float4 cvt(const uchar4 val)
  {
      float4 result;
@@ -93,6 +96,14 @@ inline float sum(const float4 val)
      return (val.x + val.y + val.z);
  }
  
+static void swap4(__global float4* ptr, int x, int y, int k, int rows, int ptr_step)
+{
+    float4 val = ptr[(k * rows + y) * ptr_step + x];
+    ptr[(k * rows + y) * ptr_step + x] = ptr[((k + 1) * rows + y) * ptr_step + x];
+    ptr[((k + 1) * rows + y) * ptr_step + x] = val;
+}
+
+
  static float4 clamp1(const float4 var, float learningRate, const float4 diff, float minVar)
  {
      float4 result;
@@ -102,6 +113,7 @@ static float4 clamp1(const float4 var, float learningRate, const float4 diff, fl
      result.w = 0.0f;
      return result;
  }
+
  #endif
  
  typedef struct
@@ -114,7 +126,7 @@ typedef struct
      float c_varMax;
      float c_tau;
      uchar c_shadowVal;
-}con_srtuct_t;
+} con_srtuct_t;
  
  static void swap(__global float* ptr, int x, int y, int k, int rows, int ptr_step)
  {
@@ -123,13 +135,6 @@ static void swap(__global float* ptr, int x, int y, int k, int rows, int ptr_ste
      ptr[((k + 1) * rows + y) * ptr_step + x] = val;
  }
  
-static void swap4(__global float4* ptr, int x, int y, int k, int rows, int ptr_step)
-{
-    float4 val = ptr[(k * rows + y) * ptr_step + x];
-    ptr[(k * rows + y) * ptr_step + x] = ptr[((k + 1) * rows + y) * ptr_step + x];
-    ptr[((k + 1) * rows + y) * ptr_step + x] = val;
-}
-
  __kernel void mog_withoutLearning_kernel(__global T_FRAME* frame, __global uchar* fgmask,
      __global float* weight, __global T_MEAN_VAR* mean, __global T_MEAN_VAR* var,
      int frame_row, int frame_col, int frame_step, int fgmask_step,
diff --git a/modules/ocl/src/opencl/blend_linear.cl b/modules/ocl/src/opencl/blend_linear.cl

index 06a51f2..bc7aa46 100644 (file)
--- a/modules/ocl/src/opencl/blend_linear.cl
+++ b/modules/ocl/src/opencl/blend_linear.cl
@@ -43,7 +43,7 @@
  //
  //M*/
  
-#if defined (DOUBLE_SUPPORT)
+#ifdef DOUBLE_SUPPORT
  #ifdef cl_amd_fp64
  #pragma OPENCL EXTENSION cl_amd_fp64:enable
  #elif defined (cl_khr_fp64)
diff --git a/modules/ocl/src/opencl/brute_force_match.cl b/modules/ocl/src/opencl/brute_force_match.cl

index ce0d86e..a005284 100644 (file)
--- a/modules/ocl/src/opencl/brute_force_match.cl
+++ b/modules/ocl/src/opencl/brute_force_match.cl
@@ -63,14 +63,6 @@
  #define DIST_TYPE 0
  #endif
  
-//http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
-static int bit1Count(int v)
-{
-    v = v - ((v >> 1) & 0x55555555);                    // reuse input as temporary
-    v = (v & 0x33333333) + ((v >> 2) & 0x33333333);     // temp
-    return ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24; // count
-}
-
  // dirty fix for non-template support
  #if   (DIST_TYPE == 0) // L1Dist
  #   ifdef T_FLOAT
@@ -89,6 +81,13 @@ typedef float value_type;
  typedef float result_type;
  #define DIST_RES(x) sqrt(x)
  #elif (DIST_TYPE == 2) // Hamming
+//http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
+static int bit1Count(int v)
+{
+    v = v - ((v >> 1) & 0x55555555);                    // reuse input as temporary
+    v = (v & 0x33333333) + ((v >> 2) & 0x33333333);     // temp
+    return ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24; // count
+}
  #define DIST(x, y) bit1Count( (x) ^ (y) )
  typedef int value_type;
  typedef int result_type;
diff --git a/modules/ocl/src/opencl/convertC3C4.cl b/modules/ocl/src/opencl/convertC3C4.cl

index b3e699d..4c519fd 100644 (file)
--- a/modules/ocl/src/opencl/convertC3C4.cl
+++ b/modules/ocl/src/opencl/convertC3C4.cl
@@ -33,12 +33,17 @@
  //
  //
  
-#if defined (DOUBLE_SUPPORT)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
  #pragma OPENCL EXTENSION cl_khr_fp64:enable
  #endif
+#endif
  
-__kernel void convertC3C4(__global const GENTYPE4 * restrict src, __global GENTYPE4 *dst, int cols, int rows,
-                    int dstStep_in_piexl,int pixel_end)
+__kernel void convertC3C4(__global const GENTYPE4 * restrict src, __global GENTYPE4 *dst,
+                         int cols, int rows,
+                         int dstStep_in_piexl, int pixel_end)
  {
      int id = get_global_id(0);
      int3 pixelid = (int3)(mul24(id,3),mad24(id,3,1),mad24(id,3,2));
@@ -88,13 +93,12 @@ __kernel void convertC3C4(__global const GENTYPE4 * restrict src, __global GENTY
          dst[addr.y] = outpix1;
      }
      else if(outx.x<cols && outy.x<rows)
-    {
          dst[addr.x] = outpix0;
-    }
  }
  
-__kernel void convertC4C3(__global const GENTYPE4 * restrict src, __global GENTYPE4 *dst, int cols, int rows,
-                    int srcStep_in_pixel,int pixel_end)
+__kernel void convertC4C3(__global const GENTYPE4 * restrict src, __global GENTYPE4 *dst,
+                          int cols, int rows,
+                          int srcStep_in_pixel, int pixel_end)
  {
      int id = get_global_id(0)<<2;
      int y = id / cols;
@@ -145,7 +149,5 @@ __kernel void convertC4C3(__global const GENTYPE4 * restrict src, __global GENTY
          dst[outaddr.y] = outpixel1;
      }
      else if(outaddr.x <= pixel_end)
-    {
          dst[outaddr.x] = pixel0;
-    }
  }
diff --git a/modules/ocl/src/opencl/filtering_boxFilter.cl b/modules/ocl/src/opencl/filtering_boxFilter.cl

index 7f7fd01..96091ce 100644 (file)
--- a/modules/ocl/src/opencl/filtering_boxFilter.cl
+++ b/modules/ocl/src/opencl/filtering_boxFilter.cl
@@ -146,7 +146,11 @@
  #endif
  
  #if USE_DOUBLE
+#ifdef cl_amd_fp64
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
  #pragma OPENCL EXTENSION cl_khr_fp64:enable
+#endif
  #define FPTYPE double
  #define CONVERT_TO_FPTYPE CAT(convert_double, VEC_SIZE)
  #else
diff --git a/modules/ocl/src/opencl/filtering_filter2D.cl b/modules/ocl/src/opencl/filtering_filter2D.cl

index f966766..fb7dca5 100644 (file)
--- a/modules/ocl/src/opencl/filtering_filter2D.cl
+++ b/modules/ocl/src/opencl/filtering_filter2D.cl
@@ -143,7 +143,11 @@
  #endif
  
  #if USE_DOUBLE
+#ifdef cl_amd_fp64
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
  #pragma OPENCL EXTENSION cl_khr_fp64:enable
+#endif
  #define FPTYPE double
  #define CONVERT_TO_FPTYPE CAT(convert_double, VEC_SIZE)
  #else
diff --git a/modules/ocl/src/opencl/haarobjectdetect_scaled2.cl b/modules/ocl/src/opencl/haarobjectdetect_scaled2.cl

index 72b9403..a2feb82 100644 (file)
--- a/modules/ocl/src/opencl/haarobjectdetect_scaled2.cl
+++ b/modules/ocl/src/opencl/haarobjectdetect_scaled2.cl
@@ -45,8 +45,6 @@
  //
  //M*/
  
-// Enter your kernel in this window
-//#pragma OPENCL EXTENSION cl_amd_printf:enable
  #define CV_HAAR_FEATURE_MAX           3
  typedef int   sumtype;
  typedef float sqsumtype;
@@ -288,8 +286,8 @@ __kernel void gpuscaleclassifier(global GpuHidHaarTreeNode *orinode, global GpuH
      int counter = get_global_id(0);
      int tr_x[3], tr_y[3], tr_h[3], tr_w[3], i = 0;
      GpuHidHaarTreeNode t1 = *(orinode + counter);
-#pragma unroll
  
+    #pragma unroll
      for (i = 0; i < 3; i++)
      {
          tr_x[i] = (int)(t1.p[i][0] * scale + 0.5f);
@@ -300,8 +298,8 @@ __kernel void gpuscaleclassifier(global GpuHidHaarTreeNode *orinode, global GpuH
  
      t1.weight[0] = -(t1.weight[1] * tr_h[1] * tr_w[1] + t1.weight[2] * tr_h[2] * tr_w[2]) / (tr_h[0] * tr_w[0]);
      counter += nodenum;
-#pragma unroll
  
+    #pragma unroll
      for (i = 0; i < 3; i++)
      {
          newnode[counter].p[i][0] = tr_x[i];
diff --git a/modules/ocl/src/opencl/imgproc_convolve.cl b/modules/ocl/src/opencl/imgproc_convolve.cl

index fb9596e..b8f9742 100644 (file)
--- a/modules/ocl/src/opencl/imgproc_convolve.cl
+++ b/modules/ocl/src/opencl/imgproc_convolve.cl
@@ -43,11 +43,13 @@
  //
  //M*/
  
-#if defined (__ATI__)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
  #pragma OPENCL EXTENSION cl_amd_fp64:enable
-#elif defined (__NVIDIA__)
+#elif defined (cl_khr_fp64)
  #pragma OPENCL EXTENSION cl_khr_fp64:enable
  #endif
+#endif
  
  /************************************** convolve **************************************/
  
diff --git a/modules/ocl/src/opencl/imgproc_copymakeboder.cl b/modules/ocl/src/opencl/imgproc_copymakeboder.cl

index d97f660..ac149a4 100644 (file)
--- a/modules/ocl/src/opencl/imgproc_copymakeboder.cl
+++ b/modules/ocl/src/opencl/imgproc_copymakeboder.cl
@@ -34,7 +34,7 @@
  //
  //
  
-#if defined (DOUBLE_SUPPORT)
+#ifdef DOUBLE_SUPPORT
  #ifdef cl_amd_fp64
  #pragma OPENCL EXTENSION cl_amd_fp64:enable
  #elif defined (cl_khr_fp64)
diff --git a/modules/ocl/src/opencl/imgproc_integral.cl b/modules/ocl/src/opencl/imgproc_integral.cl

index 05e76f9..a8102e5 100644 (file)
--- a/modules/ocl/src/opencl/imgproc_integral.cl
+++ b/modules/ocl/src/opencl/imgproc_integral.cl
@@ -43,13 +43,14 @@
  //
  //M*/
  
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
  #pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
  #endif
  #endif
+
  #define LSIZE 256
  #define LSIZE_1 255
  #define LSIZE_2 254
diff --git a/modules/ocl/src/opencl/imgproc_integral_sum.cl b/modules/ocl/src/opencl/imgproc_integral_sum.cl

index a6f73c7..6624061 100644 (file)
--- a/modules/ocl/src/opencl/imgproc_integral_sum.cl
+++ b/modules/ocl/src/opencl/imgproc_integral_sum.cl
@@ -43,11 +43,11 @@
  //
  //M*/
  
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
  #pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
  #endif
  #endif
  
diff --git a/modules/ocl/src/opencl/imgproc_remap.cl b/modules/ocl/src/opencl/imgproc_remap.cl

index 340e741..e1e3ca8 100644 (file)
--- a/modules/ocl/src/opencl/imgproc_remap.cl
+++ b/modules/ocl/src/opencl/imgproc_remap.cl
@@ -43,11 +43,11 @@
  //
  //M*/
  
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
  #pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
  #endif
  #endif
  
diff --git a/modules/ocl/src/opencl/imgproc_resize.cl b/modules/ocl/src/opencl/imgproc_resize.cl

index 2bb75b9..0d4cbed 100644 (file)
--- a/modules/ocl/src/opencl/imgproc_resize.cl
+++ b/modules/ocl/src/opencl/imgproc_resize.cl
@@ -48,8 +48,12 @@
  // Currently, CV_8UC1  CV_8UC4  CV_32FC1 and CV_32FC4are supported.
  // We shall support other types later if necessary.
  
-#if defined DOUBLE_SUPPORT
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
  #pragma OPENCL EXTENSION cl_khr_fp64:enable
+#endif
  #define F double
  #else
  #define F float
diff --git a/modules/ocl/src/opencl/imgproc_threshold.cl b/modules/ocl/src/opencl/imgproc_threshold.cl

index 6f97c04..63e4102 100644 (file)
--- a/modules/ocl/src/opencl/imgproc_threshold.cl
+++ b/modules/ocl/src/opencl/imgproc_threshold.cl
@@ -43,7 +43,7 @@
  //
  //M*/
  
-#if defined (DOUBLE_SUPPORT)
+#ifdef DOUBLE_SUPPORT
  #ifdef cl_amd_fp64
  #pragma OPENCL EXTENSION cl_amd_fp64:enable
  #elif defined (cl_khr_fp64)
diff --git a/modules/ocl/src/opencl/imgproc_warpAffine.cl b/modules/ocl/src/opencl/imgproc_warpAffine.cl

index a5050bb..27f99e0 100644 (file)
--- a/modules/ocl/src/opencl/imgproc_warpAffine.cl
+++ b/modules/ocl/src/opencl/imgproc_warpAffine.cl
@@ -47,11 +47,11 @@
  //warpAffine kernel
  //support data types: CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4, and three interpolation methods: NN, Linear, Cubic.
  
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
  #pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
  #endif
  typedef double F;
  typedef double4 F4;
diff --git a/modules/ocl/src/opencl/imgproc_warpPerspective.cl b/modules/ocl/src/opencl/imgproc_warpPerspective.cl

index eee1c81..97f8664 100644 (file)
--- a/modules/ocl/src/opencl/imgproc_warpPerspective.cl
+++ b/modules/ocl/src/opencl/imgproc_warpPerspective.cl
@@ -47,11 +47,11 @@
  //wrapPerspective kernel
  //support data types: CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4, and three interpolation methods: NN, Linear, Cubic.
  
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
  #pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
  #endif
  typedef double F;
  typedef double4 F4;
diff --git a/modules/ocl/src/opencl/kernel_stablesort_by_key.cl b/modules/ocl/src/opencl/kernel_stablesort_by_key.cl

index 2d38fbf..f8cc693 100644 (file)
--- a/modules/ocl/src/opencl/kernel_stablesort_by_key.cl
+++ b/modules/ocl/src/opencl/kernel_stablesort_by_key.cl
@@ -61,35 +61,6 @@
  #define my_comp(x,y) ((x) < (y))
  #endif
  
-///////////// parallel merge sort ///////////////
-// ported from https://github.com/HSA-Libraries/Bolt/blob/master/include/bolt/cl/stablesort_by_key_kernels.cl
-static uint lowerBoundLinear( global K_T* data, uint left, uint right, K_T searchVal)
-{
-    //  The values firstIndex and lastIndex get modified within the loop, narrowing down the potential sequence
-    uint firstIndex = left;
-    uint lastIndex = right;
-
-    //  This loops through [firstIndex, lastIndex)
-    //  Since firstIndex and lastIndex will be different for every thread depending on the nested branch,
-    //  this while loop will be divergent within a wavefront
-    while( firstIndex < lastIndex )
-    {
-        K_T dataVal = data[ firstIndex ];
-
-        //  This branch will create divergent wavefronts
-        if( my_comp( dataVal, searchVal ) )
-        {
-            firstIndex = firstIndex+1;
-        }
-        else
-        {
-            break;
-        }
-    }
-
-    return firstIndex;
-}
-
  //  This implements a binary search routine to look for an 'insertion point' in a sequence, denoted
  //  by a base pointer and left and right index for a particular candidate value.  The comparison operator is
  //  passed as a functor parameter my_comp
diff --git a/modules/ocl/src/opencl/knearest.cl b/modules/ocl/src/opencl/knearest.cl

index bc0ae89..85e2451 100644 (file)
--- a/modules/ocl/src/opencl/knearest.cl
+++ b/modules/ocl/src/opencl/knearest.cl
@@ -42,8 +42,13 @@
  // the use of this software, even if advised of the possibility of such damage.
  //
  //M*/
-#if defined (DOUBLE_SUPPORT)
+
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
  #pragma OPENCL EXTENSION cl_khr_fp64:enable
+#endif
  #define TYPE double
  #else
  #define TYPE float
diff --git a/modules/ocl/src/opencl/match_template.cl b/modules/ocl/src/opencl/match_template.cl

index 8b63c3b..4d46d00 100644 (file)
--- a/modules/ocl/src/opencl/match_template.cl
+++ b/modules/ocl/src/opencl/match_template.cl
@@ -43,14 +43,12 @@
  //
  //M*/
  
-#if defined (DOUBLE_SUPPORT)
-
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
  #pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
  #endif
-
  #define TYPE_IMAGE_SQSUM double
  #else
  #define TYPE_IMAGE_SQSUM float
diff --git a/modules/ocl/src/opencl/merge_mat.cl b/modules/ocl/src/opencl/merge_mat.cl

index 8b445c6..aea05ae 100644 (file)
--- a/modules/ocl/src/opencl/merge_mat.cl
+++ b/modules/ocl/src/opencl/merge_mat.cl
@@ -43,15 +43,19 @@
  //
  //M*/
  
-#if defined (DOUBLE_SUPPORT)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
  #pragma OPENCL EXTENSION cl_khr_fp64:enable
  #endif
-
+#endif
  
  ///////////////////////////////////////////////////////////////////////////////////////////////
  //////////////////////////////////optimized code using vector roi//////////////////////////
  ////////////vector fuction name format: merge_vector_C(channels number)D_(data type depth)//////
  ////////////////////////////////////////////////////////////////////////////////////////////////
+
  __kernel void merge_vector_C2_D0(__global uchar *mat_dst,  int dst_step,  int dst_offset,
                                   __global uchar *mat_src0, int src0_step, int src0_offset,
                                   __global uchar *mat_src1, int src1_step, int src1_offset,
diff --git a/modules/ocl/src/opencl/moments.cl b/modules/ocl/src/opencl/moments.cl

index 31c4c85..09c79c4 100644 (file)
--- a/modules/ocl/src/opencl/moments.cl
+++ b/modules/ocl/src/opencl/moments.cl
@@ -44,11 +44,11 @@
  //
  //M*/
  
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
  #pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
  #endif
  typedef double T;
  #else
diff --git a/modules/ocl/src/opencl/operator_convertTo.cl b/modules/ocl/src/opencl/operator_convertTo.cl

index 85b562d..ca38bd5 100644 (file)
--- a/modules/ocl/src/opencl/operator_convertTo.cl
+++ b/modules/ocl/src/opencl/operator_convertTo.cl
@@ -35,8 +35,12 @@
  //
  
  #ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
  #pragma OPENCL EXTENSION cl_khr_fp64:enable
  #endif
+#endif
  
  __kernel void convert_to(
          __global const srcT* restrict srcMat,
diff --git a/modules/ocl/src/opencl/operator_copyToM.cl b/modules/ocl/src/opencl/operator_copyToM.cl

index dcf5af9..69e1798 100644 (file)
--- a/modules/ocl/src/opencl/operator_copyToM.cl
+++ b/modules/ocl/src/opencl/operator_copyToM.cl
@@ -34,11 +34,11 @@
  //
  //
  
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
  #pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
  #endif
  #endif
  
diff --git a/modules/ocl/src/opencl/operator_setTo.cl b/modules/ocl/src/opencl/operator_setTo.cl

index 8ac4803..20c5cf2 100644 (file)
--- a/modules/ocl/src/opencl/operator_setTo.cl
+++ b/modules/ocl/src/opencl/operator_setTo.cl
@@ -34,11 +34,11 @@
  //
  //
  
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
  #pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
  #endif
  #endif
  
diff --git a/modules/ocl/src/opencl/operator_setToM.cl b/modules/ocl/src/opencl/operator_setToM.cl

index 8a489da..afaa2e6 100644 (file)
--- a/modules/ocl/src/opencl/operator_setToM.cl
+++ b/modules/ocl/src/opencl/operator_setToM.cl
@@ -34,11 +34,11 @@
  //
  //
  
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
  #pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
  #endif
  #endif
  
diff --git a/modules/ocl/src/opencl/pyrlk.cl b/modules/ocl/src/opencl/pyrlk.cl

index a7fc278..303d268 100644 (file)
--- a/modules/ocl/src/opencl/pyrlk.cl
+++ b/modules/ocl/src/opencl/pyrlk.cl
@@ -45,8 +45,6 @@
  //
  //M*/
  
-//#pragma OPENCL EXTENSION cl_amd_printf : enable
-
  #define        BUFFER  64
  #define        BUFFER2 BUFFER>>1
  #ifndef WAVE_SIZE
diff --git a/modules/ocl/src/opencl/split_mat.cl b/modules/ocl/src/opencl/split_mat.cl

index b9aa048..b52b3c2 100644 (file)
--- a/modules/ocl/src/opencl/split_mat.cl
+++ b/modules/ocl/src/opencl/split_mat.cl
@@ -38,9 +38,14 @@
  // the use of this software, even if advised of the possibility of such damage.
  //
  //M*/
-#if defined (DOUBLE_SUPPORT)
+
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
  #pragma OPENCL EXTENSION cl_khr_fp64:enable
  #endif
+#endif
  
  #if DATA_DEPTH == 0
  #define BASE_TYPE uchar
diff --git a/modules/ocl/src/opencl/stereobm.cl b/modules/ocl/src/opencl/stereobm.cl

index 207bf00..0edccdb 100644 (file)
--- a/modules/ocl/src/opencl/stereobm.cl
+++ b/modules/ocl/src/opencl/stereobm.cl
@@ -260,7 +260,6 @@ static float CalcSums(__local float *cols, __local float *cols_cache, int winsz)
  {
      unsigned int cache = cols[0];
  
-#pragma unroll
      for(int i = 1; i <= winsz; i++)
          cache += cols[i];
  
diff --git a/modules/ocl/src/opencl/stereobp.cl b/modules/ocl/src/opencl/stereobp.cl

index ec02f82..4b5864f 100644 (file)
--- a/modules/ocl/src/opencl/stereobp.cl
+++ b/modules/ocl/src/opencl/stereobp.cl
@@ -45,13 +45,11 @@
  //M*/
  
  #if defined (DOUBLE_SUPPORT)
-
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef cl_amd_fp64
  #pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
  #endif
-
  #endif
  
  #ifdef T_FLOAT
diff --git a/modules/ocl/src/opencl/stereocsbp.cl b/modules/ocl/src/opencl/stereocsbp.cl

index 13a201c..72c1707 100644 (file)
--- a/modules/ocl/src/opencl/stereocsbp.cl
+++ b/modules/ocl/src/opencl/stereocsbp.cl
@@ -44,19 +44,10 @@
  //
  //M*/
  
-
-#ifndef FLT_MAX
-#define FLT_MAX  CL_FLT_MAX
-#endif
-
-#ifndef SHRT_MAX
-#define SHRT_MAX  CL_SHORT_MAX
-#endif
-
-
  ///////////////////////////////////////////////////////////////////////////////////////////////
  ////////////////////////////////////////get_first_k_initial_global//////////////////////////////
  //////////////////////////////////////////////////////////////////////////////////////////////
+
  __kernel void get_first_k_initial_global_0(__global short *data_cost_selected_, __global short *selected_disp_pyr,
      __global short *ctemp, int h, int w, int nr_plane,
      int cmsg_step1, int cdisp_step1, int cndisp)
@@ -91,6 +82,7 @@ __kernel void get_first_k_initial_global_0(__global short *data_cost_selected_,
          }
      }
  }
+
  __kernel void get_first_k_initial_global_1(__global  float *data_cost_selected_, __global float *selected_disp_pyr,
      __global  float *ctemp, int h, int w, int nr_plane,
      int cmsg_step1, int cdisp_step1, int cndisp)
@@ -129,6 +121,7 @@ __kernel void get_first_k_initial_global_1(__global  float *data_cost_selected_,
  ////////////////////////////////////////////////////////////////////////////////////////////////////////
  ///////////////////////////////////////////get_first_k_initial_local////////////////////////////////////
  ////////////////////////////////////////////////////////////////////////////////////////////////////////
+
  __kernel void get_first_k_initial_local_0(__global  short *data_cost_selected_, __global short *selected_disp_pyr,
      __global  short *ctemp,int h, int w, int nr_plane,
      int cmsg_step1, int cdisp_step1, int cndisp)
@@ -248,6 +241,7 @@ __kernel void get_first_k_initial_local_1(__global float *data_cost_selected_, _
  ///////////////////////////////////////////////////////////////
  /////////////////////// init data cost ////////////////////////
  ///////////////////////////////////////////////////////////////
+
  inline float compute_3(__global uchar* left, __global uchar* right,
      float cdata_weight,  float cmax_data_term)
  {
@@ -257,6 +251,7 @@ inline float compute_3(__global uchar* left, __global uchar* right,
  
      return fmin(cdata_weight * (tr + tg + tb), cdata_weight * cmax_data_term);
  }
+
  inline float compute_1(__global uchar* left, __global uchar* right,
      float cdata_weight,  float cmax_data_term)
  {
@@ -316,6 +311,7 @@ __kernel void init_data_cost_0(__global short *ctemp, __global uchar *cleft, __g
          }
      }
  }
+
  __kernel void init_data_cost_1(__global float *ctemp, __global uchar *cleft, __global uchar *cright,
      int h, int w, int level, int channels,
      int cmsg_step1, float cdata_weight, float cmax_data_term, int cdisp_step1,
@@ -360,9 +356,11 @@ __kernel void init_data_cost_1(__global float *ctemp, __global uchar *cleft, __g
          }
      }
  }
+
  ////////////////////////////////////////////////////////////////////////////////////////////////////////
  //////////////////////////////////init_data_cost_reduce//////////////////////////////////////////////////
  //////////////////////////////////////////////////////////////////////////////////////////////////////////
+
  __kernel void init_data_cost_reduce_0(__global short *ctemp, __global uchar *cleft, __global uchar *cright,
      __local float *smem, int level, int rows, int cols, int h, int winsz, int channels,
      int cndisp,int cimg_step, float cdata_weight, float cmax_data_term, int cth,
@@ -630,6 +628,7 @@ __kernel void init_data_cost_reduce_1(__global float *ctemp, __global uchar *cle
  ///////////////////////////////////////////////////////////////
  ////////////////////// compute data cost //////////////////////
  ///////////////////////////////////////////////////////////////
+
  __kernel void compute_data_cost_0(__global const short *selected_disp_pyr, __global short *data_cost_,
      __global uchar *cleft, __global uchar *cright,
      int h, int w, int level, int nr_plane, int channels,
@@ -680,6 +679,7 @@ __kernel void compute_data_cost_0(__global const short *selected_disp_pyr, __glo
          }
      }
  }
+
  __kernel void compute_data_cost_1(__global const float *selected_disp_pyr, __global float *data_cost_,
      __global uchar *cleft, __global uchar *cright,
      int h, int w, int level, int nr_plane, int channels,
@@ -729,9 +729,11 @@ __kernel void compute_data_cost_1(__global const float *selected_disp_pyr, __glo
          }
      }
  }
+
  ////////////////////////////////////////////////////////////////////////////////////////////////////////
  ////////////////////////////////////////compute_data_cost_reduce//////////////////////////////////////////
  /////////////////////////////////////////////////////////////////////////////////////////////////////////
+
  __kernel void compute_data_cost_reduce_0(__global const short* selected_disp_pyr, __global short* data_cost_,
      __global uchar *cleft, __global uchar *cright,__local float *smem,
      int level, int rows, int cols, int h, int nr_plane,
@@ -1033,41 +1035,6 @@ static void get_first_k_element_increase_0(__global short* u_new, __global short
      }
  }
  
-static void get_first_k_element_increase_1(__global float *u_new, __global float *d_new, __global float *l_new,
-    __global float *r_new, __global const float *u_cur, __global const float *d_cur,
-    __global const float *l_cur, __global const float *r_cur,
-    __global float *data_cost_selected, __global float *disparity_selected_new,
-    __global float *data_cost_new, __global const float *data_cost_cur,
-    __global const float *disparity_selected_cur,
-    int nr_plane, int nr_plane2,
-    int cdisp_step1, int cdisp_step2)
-{
-    for(int i = 0; i < nr_plane; i++)
-    {
-        float minimum = FLT_MAX;
-        int id = 0;
-
-        for(int j = 0; j < nr_plane2; j++)
-        {
-            float cur = data_cost_new[j * cdisp_step1];
-            if(cur < minimum)
-            {
-                minimum = cur;
-                id = j;
-            }
-        }
-
-        data_cost_selected[i * cdisp_step1] = data_cost_cur[id * cdisp_step1];
-        disparity_selected_new[i * cdisp_step1] = disparity_selected_cur[id * cdisp_step2];
-
-        u_new[i * cdisp_step1] = u_cur[id * cdisp_step2];
-        d_new[i * cdisp_step1] = d_cur[id * cdisp_step2];
-        l_new[i * cdisp_step1] = l_cur[id * cdisp_step2];
-        r_new[i * cdisp_step1] = r_cur[id * cdisp_step2];
-        data_cost_new[id * cdisp_step1] = FLT_MAX;
-
-    }
-}
  __kernel void init_message_0(__global short *u_new_, __global short *d_new_, __global short *l_new_,
      __global short *r_new_, __global  short *u_cur_, __global const short *d_cur_,
      __global const short *l_cur_, __global const short *r_cur_, __global short *ctemp,
@@ -1118,6 +1085,7 @@ __kernel void init_message_0(__global short *u_new_, __global short *d_new_, __g
              cdisp_step1, cdisp_step2);
      }
  }
+
  __kernel void init_message_1(__global float *u_new_, __global float *d_new_, __global float *l_new_,
      __global float *r_new_, __global const float *u_cur_, __global const float *d_cur_,
      __global const float *l_cur_, __global const float *r_cur_, __global float *ctemp,
diff --git a/modules/ocl/src/opencl/svm.cl b/modules/ocl/src/opencl/svm.cl

index 36ae38e..32b8194 100644 (file)
--- a/modules/ocl/src/opencl/svm.cl
+++ b/modules/ocl/src/opencl/svm.cl
@@ -33,11 +33,12 @@
  // the use of this software, even if advised of the possibility of such damage.
  //
  //
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
  #pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
  #endif
  #define TYPE double
  #else
@@ -53,7 +54,6 @@
  #else
  #define POW(X,Y) X
  #endif
-#define FLT_MAX   3.402823466e+38F
  #define MAX_VAL   (FLT_MAX*1e-3)
  
  __kernel void svm_linear(__global float* src, int src_step, __global float* src2, int src2_step, __global TYPE* dst, int dst_step, int src_rows, int src2_cols,
@@ -206,4 +206,4 @@ __kernel void svm_rbf(__global float* src, int src_step, __global float* src2, i
              dst[row * dst_step + col] = temp1;
          }
      }
-}
-\ No newline at end of file
+}
diff --git a/modules/ocl/src/opencl/tvl1flow.cl b/modules/ocl/src/opencl/tvl1flow.cl

index 2787f00..6111a4a 100644 (file)
--- a/modules/ocl/src/opencl/tvl1flow.cl
+++ b/modules/ocl/src/opencl/tvl1flow.cl
@@ -44,7 +44,7 @@
  //M*/
  
  __kernel void centeredGradientKernel(__global const float* src, int src_col, int src_row, int src_step,
-__global float* dx, __global float* dy, int dx_step)
+                                     __global float* dx, __global float* dy, int dx_step)
  {
      int x = get_global_id(0);
      int y = get_global_id(1);
@@ -53,13 +53,6 @@ __global float* dx, __global float* dy, int dx_step)
      {
          int src_x1 = (x + 1) < (src_col -1)? (x + 1) : (src_col - 1);
          int src_x2 = (x - 1) > 0 ? (x -1) : 0;
-
-        //if(src[y * src_step + src_x1] == src[y * src_step+ src_x2])
-        //{
-        //    printf("y = %d\n", y);
-        //    printf("src_x1 = %d\n", src_x1);
-        //    printf("src_x2 = %d\n", src_x2);
-        //}
          dx[y * dx_step+ x] = 0.5f * (src[y * src_step + src_x1] - src[y * src_step+ src_x2]);
  
          int src_y1 = (y+1) < (src_row - 1) ? (y + 1) : (src_row - 1);
@@ -97,24 +90,24 @@ __kernel void warpBackwardKernel(__global const float* I0, int I0_step, int I0_c
      int u2_offset_x,
      int u2_offset_y)
  {
-    const int x = get_global_id(0);
-    const int y = get_global_id(1);
+    int x = get_global_id(0);
+    int y = get_global_id(1);
  
      if(x < I0_col&&y < I0_row)
      {
-        //const float u1Val = u1(y, x);
-        const float u1Val = u1[(y + u1_offset_y) * u1_step + x + u1_offset_x];
-        //const float u2Val = u2(y, x);
-        const float u2Val = u2[(y + u2_offset_y) * u2_step + x + u2_offset_x];
+        //float u1Val = u1(y, x);
+        float u1Val = u1[(y + u1_offset_y) * u1_step + x + u1_offset_x];
+        //float u2Val = u2(y, x);
+        float u2Val = u2[(y + u2_offset_y) * u2_step + x + u2_offset_x];
  
-        const float wx = x + u1Val;
-        const float wy = y + u2Val;
+        float wx = x + u1Val;
+        float wy = y + u2Val;
  
-        const int xmin = ceil(wx - 2.0f);
-        const int xmax = floor(wx + 2.0f);
+        int xmin = ceil(wx - 2.0f);
+        int xmax = floor(wx + 2.0f);
  
-        const int ymin = ceil(wy - 2.0f);
-        const int ymax = floor(wy + 2.0f);
+        int ymin = ceil(wy - 2.0f);
+        int ymax = floor(wy + 2.0f);
  
          float sum  = 0.0f;
          float sumx = 0.0f;
@@ -126,7 +119,7 @@ __kernel void warpBackwardKernel(__global const float* I0, int I0_step, int I0_c
          {
              for (int cx = xmin; cx <= xmax; ++cx)
              {
-                const float w = bicubicCoeff(wx - cx) * bicubicCoeff(wy - cy);
+                float w = bicubicCoeff(wx - cx) * bicubicCoeff(wy - cy);
  
                  //sum  += w * tex2D(tex_I1 , cx, cy);
                  int2 cood = (int2)(cx, cy);
@@ -140,30 +133,30 @@ __kernel void warpBackwardKernel(__global const float* I0, int I0_step, int I0_c
              }
          }
  
-        const float coeff = 1.0f / wsum;
+        float coeff = 1.0f / wsum;
  
-        const float I1wVal  = sum  * coeff;
-        const float I1wxVal = sumx * coeff;
-        const float I1wyVal = sumy * coeff;
+        float I1wVal  = sum  * coeff;
+        float I1wxVal = sumx * coeff;
+        float I1wyVal = sumy * coeff;
  
          I1w[y * I1w_step + x]  = I1wVal;
          I1wx[y * I1w_step + x] = I1wxVal;
          I1wy[y * I1w_step + x] = I1wyVal;
  
-        const float Ix2 = I1wxVal * I1wxVal;
-        const float Iy2 = I1wyVal * I1wyVal;
+        float Ix2 = I1wxVal * I1wxVal;
+        float Iy2 = I1wyVal * I1wyVal;
  
          // store the |Grad(I1)|^2
          grad[y * I1w_step + x] = Ix2 + Iy2;
  
          // compute the constant part of the rho function
-        const float I0Val = I0[y * I0_step + x];
+        float I0Val = I0[y * I0_step + x];
          rho[y * I1w_step + x] = I1wVal - I1wxVal * u1Val - I1wyVal * u2Val - I0Val;
      }
  
  }
  
-static float readImage(__global const float *image,  const int x,  const int y,  const int rows,  const int cols, const int elemCntPerRow)
+static float readImage(__global float *image,  int x,  int y,  int rows,  int cols, int elemCntPerRow)
  {
      int i0 = clamp(x, 0, cols - 1);
      int j0 = clamp(y, 0, rows - 1);
@@ -185,24 +178,24 @@ __kernel void warpBackwardKernelNoImage2d(__global const float* I0, int I0_step,
      int I1_step,
      int I1x_step)
  {
-    const int x = get_global_id(0);
-    const int y = get_global_id(1);
+    int x = get_global_id(0);
+    int y = get_global_id(1);
  
      if(x < I0_col&&y < I0_row)
      {
-        //const float u1Val = u1(y, x);
-        const float u1Val = u1[y * u1_step + x];
-        //const float u2Val = u2(y, x);
-        const float u2Val = u2[y * u2_step + x];
+        //float u1Val = u1(y, x);
+        float u1Val = u1[y * u1_step + x];
+        //float u2Val = u2(y, x);
+        float u2Val = u2[y * u2_step + x];
  
-        const float wx = x + u1Val;
-        const float wy = y + u2Val;
+        float wx = x + u1Val;
+        float wy = y + u2Val;
  
-        const int xmin = ceil(wx - 2.0f);
-        const int xmax = floor(wx + 2.0f);
+        int xmin = ceil(wx - 2.0f);
+        int xmax = floor(wx + 2.0f);
  
-        const int ymin = ceil(wy - 2.0f);
-        const int ymax = floor(wy + 2.0f);
+        int ymin = ceil(wy - 2.0f);
+        int ymax = floor(wy + 2.0f);
  
          float sum  = 0.0f;
          float sumx = 0.0f;
@@ -213,7 +206,7 @@ __kernel void warpBackwardKernelNoImage2d(__global const float* I0, int I0_step,
          {
              for (int cx = xmin; cx <= xmax; ++cx)
              {
-                const float w = bicubicCoeff(wx - cx) * bicubicCoeff(wy - cy);
+                float w = bicubicCoeff(wx - cx) * bicubicCoeff(wy - cy);
  
                  int2 cood = (int2)(cx, cy);
                  sum += w * readImage(tex_I1, cood.x, cood.y, I0_col, I0_row, I1_step);
@@ -223,24 +216,24 @@ __kernel void warpBackwardKernelNoImage2d(__global const float* I0, int I0_step,
              }
          }
  
-        const float coeff = 1.0f / wsum;
+        float coeff = 1.0f / wsum;
  
-        const float I1wVal  = sum  * coeff;
-        const float I1wxVal = sumx * coeff;
-        const float I1wyVal = sumy * coeff;
+        float I1wVal  = sum  * coeff;
+        float I1wxVal = sumx * coeff;
+        float I1wyVal = sumy * coeff;
  
          I1w[y * I1w_step + x]  = I1wVal;
          I1wx[y * I1w_step + x] = I1wxVal;
          I1wy[y * I1w_step + x] = I1wyVal;
  
-        const float Ix2 = I1wxVal * I1wxVal;
-        const float Iy2 = I1wyVal * I1wyVal;
+        float Ix2 = I1wxVal * I1wxVal;
+        float Iy2 = I1wyVal * I1wyVal;
  
          // store the |Grad(I1)|^2
          grad[y * I1w_step + x] = Ix2 + Iy2;
  
          // compute the constant part of the rho function
-        const float I0Val = I0[y * I0_step + x];
+        float I0Val = I0[y * I0_step + x];
          rho[y * I1w_step + x] = I1wVal - I1wxVal * u1Val - I1wyVal * u2Val - I0Val;
      }
  
@@ -253,38 +246,35 @@ __kernel void estimateDualVariablesKernel(__global const float* u1, int u1_col,
      __global float* p12,
      __global float* p21,
      __global float* p22,
-    const float taut,
+    float taut,
      int u2_step,
      int u1_offset_x,
      int u1_offset_y,
      int u2_offset_x,
      int u2_offset_y)
  {
-
-    //const int x = blockIdx.x * blockDim.x + threadIdx.x;
-    //const int y = blockIdx.y * blockDim.y + threadIdx.y;
-    const int x = get_global_id(0);
-    const int y = get_global_id(1);
+    int x = get_global_id(0);
+    int y = get_global_id(1);
  
      if(x < u1_col && y < u1_row)
      {
          int src_x1 = (x + 1) < (u1_col - 1) ? (x + 1) : (u1_col - 1);
-        const float u1x = u1[(y + u1_offset_y) * u1_step + src_x1 + u1_offset_x] - u1[(y + u1_offset_y) * u1_step + x + u1_offset_x];
+        float u1x = u1[(y + u1_offset_y) * u1_step + src_x1 + u1_offset_x] - u1[(y + u1_offset_y) * u1_step + x + u1_offset_x];
  
          int src_y1 = (y + 1) < (u1_row - 1) ? (y + 1) : (u1_row - 1);
-        const float u1y = u1[(src_y1 + u1_offset_y) * u1_step + x + u1_offset_x] - u1[(y + u1_offset_y) * u1_step + x + u1_offset_x];
+        float u1y = u1[(src_y1 + u1_offset_y) * u1_step + x + u1_offset_x] - u1[(y + u1_offset_y) * u1_step + x + u1_offset_x];
  
          int src_x2 = (x + 1) < (u1_col - 1) ? (x + 1) : (u1_col - 1);
-        const float u2x = u2[(y + u2_offset_y) * u2_step + src_x2 + u2_offset_x] - u2[(y + u2_offset_y) * u2_step + x + u2_offset_x];
+        float u2x = u2[(y + u2_offset_y) * u2_step + src_x2 + u2_offset_x] - u2[(y + u2_offset_y) * u2_step + x + u2_offset_x];
  
          int src_y2 = (y + 1) <  (u1_row - 1) ? (y + 1) : (u1_row - 1);
-        const float u2y = u2[(src_y2 + u2_offset_y) * u2_step + x + u2_offset_x] - u2[(y + u2_offset_y) * u2_step + x + u2_offset_x];
+        float u2y = u2[(src_y2 + u2_offset_y) * u2_step + x + u2_offset_x] - u2[(y + u2_offset_y) * u2_step + x + u2_offset_x];
  
-        const float g1 = hypot(u1x, u1y);
-        const float g2 = hypot(u2x, u2y);
+        float g1 = hypot(u1x, u1y);
+        float g2 = hypot(u2x, u2y);
  
-        const float ng1 = 1.0f + taut * g1;
-        const float ng2 = 1.0f + taut * g2;
+        float ng1 = 1.0f + taut * g1;
+        float ng2 = 1.0f + taut * g2;
  
          p11[y * p11_step + x] = (p11[y * p11_step + x] + taut * u1x) / ng1;
          p12[y * p11_step + x] = (p12[y * p11_step + x] + taut * u1y) / ng1;
@@ -299,8 +289,8 @@ static float divergence(__global const float* v1, __global const float* v2, int
  
      if (x > 0 && y > 0)
      {
-        const float v1x = v1[y * v1_step + x] - v1[y * v1_step + x - 1];
-        const float v2y = v2[y * v2_step + x] - v2[(y - 1) * v2_step + x];
+        float v1x = v1[y * v1_step + x] - v1[y * v1_step + x - 1];
+        float v2y = v2[y * v2_step + x] - v2[(y - 1) * v2_step + x];
          return v1x + v2y;
      }
      else
@@ -328,30 +318,25 @@ __kernel void estimateUKernel(__global const float* I1wx, int I1wx_col, int I1wx
      __global const float* p22, /*int p22_step,*/
      __global float* u1, int u1_step,
      __global float* u2,
-    __global float* error, const float l_t, const float theta, int u2_step,
+    __global float* error, float l_t, float theta, int u2_step,
      int u1_offset_x,
      int u1_offset_y,
      int u2_offset_x,
      int u2_offset_y,
      char calc_error)
  {
-
-    //const int x = blockIdx.x * blockDim.x + threadIdx.x;
-    //const int y = blockIdx.y * blockDim.y + threadIdx.y;
-
      int x = get_global_id(0);
      int y = get_global_id(1);
  
-
      if(x < I1wx_col && y < I1wx_row)
      {
-        const float I1wxVal = I1wx[y * I1wx_step + x];
-        const float I1wyVal = I1wy[y * I1wx_step + x];
-        const float gradVal = grad[y * I1wx_step + x];
-        const float u1OldVal = u1[(y + u1_offset_y) * u1_step + x + u1_offset_x];
-        const float u2OldVal = u2[(y + u2_offset_y) * u2_step + x + u2_offset_x];
+        float I1wxVal = I1wx[y * I1wx_step + x];
+        float I1wyVal = I1wy[y * I1wx_step + x];
+        float gradVal = grad[y * I1wx_step + x];
+        float u1OldVal = u1[(y + u1_offset_y) * u1_step + x + u1_offset_x];
+        float u2OldVal = u2[(y + u2_offset_y) * u2_step + x + u2_offset_x];
  
-        const float rho = rho_c[y * I1wx_step + x] + (I1wxVal * u1OldVal + I1wyVal * u2OldVal);
+        float rho = rho_c[y * I1wx_step + x] + (I1wxVal * u1OldVal + I1wyVal * u2OldVal);
  
          // estimate the values of the variable (v1, v2) (thresholding operator TH)
  
@@ -370,31 +355,31 @@ __kernel void estimateUKernel(__global const float* I1wx, int I1wx_col, int I1wx
          }
          else if (gradVal > 1.192092896e-07f)
          {
-            const float fi = -rho / gradVal;
+            float fi = -rho / gradVal;
              d1 = fi * I1wxVal;
              d2 = fi * I1wyVal;
          }
  
-        const float v1 = u1OldVal + d1;
-        const float v2 = u2OldVal + d2;
+        float v1 = u1OldVal + d1;
+        float v2 = u2OldVal + d2;
  
          // compute the divergence of the dual variable (p1, p2)
  
-        const float div_p1 = divergence(p11, p12, y, x, I1wx_step, I1wx_step);
-        const float div_p2 = divergence(p21, p22, y, x, I1wx_step, I1wx_step);
+        float div_p1 = divergence(p11, p12, y, x, I1wx_step, I1wx_step);
+        float div_p2 = divergence(p21, p22, y, x, I1wx_step, I1wx_step);
  
          // estimate the values of the optical flow (u1, u2)
  
-        const float u1NewVal = v1 + theta * div_p1;
-        const float u2NewVal = v2 + theta * div_p2;
+        float u1NewVal = v1 + theta * div_p1;
+        float u2NewVal = v2 + theta * div_p2;
  
          u1[(y + u1_offset_y) * u1_step + x + u1_offset_x] = u1NewVal;
          u2[(y + u2_offset_y) * u2_step + x + u2_offset_x] = u2NewVal;
  
          if(calc_error)
          {
-            const float n1 = (u1OldVal - u1NewVal) * (u1OldVal - u1NewVal);
-            const float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal);
+            float n1 = (u1OldVal - u1NewVal) * (u1OldVal - u1NewVal);
+            float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal);
              error[y * I1wx_step + x] = n1 + n2;
          }
      }
author	Ilya Lavrenov <ilya.lavrenov@itseez.com>
	Mon, 11 Nov 2013 16:08:40 +0000 (20:08 +0400)
committer	Ilya Lavrenov <ilya.lavrenov@itseez.com>
	Tue, 12 Nov 2013 11:06:46 +0000 (15:06 +0400)
modules/ocl/src/opencl/arithm_LUT.cl		patch \| blob \| history
modules/ocl/src/opencl/arithm_absdiff_nonsaturate.cl		patch \| blob \| history
modules/ocl/src/opencl/arithm_add.cl		patch \| blob \| history
modules/ocl/src/opencl/arithm_addWeighted.cl		patch \| blob \| history
modules/ocl/src/opencl/arithm_add_mask.cl		patch \| blob \| history
modules/ocl/src/opencl/arithm_add_scalar.cl		patch \| blob \| history
modules/ocl/src/opencl/arithm_add_scalar_mask.cl		patch \| blob \| history
modules/ocl/src/opencl/arithm_bitwise_binary_scalar_mask.cl		patch \| blob \| history
modules/ocl/src/opencl/arithm_bitwise_not.cl		patch \| blob \| history
modules/ocl/src/opencl/arithm_cartToPolar.cl		patch \| blob \| history
modules/ocl/src/opencl/arithm_compare.cl		patch \| blob \| history
modules/ocl/src/opencl/arithm_exp.cl		patch \| blob \| history
modules/ocl/src/opencl/arithm_flip.cl		patch \| blob \| history
modules/ocl/src/opencl/arithm_log.cl		patch \| blob \| history
modules/ocl/src/opencl/arithm_magnitude.cl		patch \| blob \| history
modules/ocl/src/opencl/arithm_minMax.cl		patch \| blob \| history
modules/ocl/src/opencl/arithm_minMaxLoc.cl		patch \| blob \| history
modules/ocl/src/opencl/arithm_minMaxLoc_mask.cl		patch \| blob \| history
modules/ocl/src/opencl/arithm_nonzero.cl		patch \| blob \| history
modules/ocl/src/opencl/arithm_phase.cl		patch \| blob \| history
modules/ocl/src/opencl/arithm_polarToCart.cl		patch \| blob \| history
modules/ocl/src/opencl/arithm_pow.cl		patch \| blob \| history
modules/ocl/src/opencl/arithm_setidentity.cl		patch \| blob \| history
modules/ocl/src/opencl/arithm_sum.cl		patch \| blob \| history
modules/ocl/src/opencl/arithm_transpose.cl		patch \| blob \| history
modules/ocl/src/opencl/bgfg_mog.cl		patch \| blob \| history
modules/ocl/src/opencl/blend_linear.cl		patch \| blob \| history
modules/ocl/src/opencl/brute_force_match.cl		patch \| blob \| history
modules/ocl/src/opencl/convertC3C4.cl		patch \| blob \| history
modules/ocl/src/opencl/filtering_boxFilter.cl		patch \| blob \| history
modules/ocl/src/opencl/filtering_filter2D.cl		patch \| blob \| history
modules/ocl/src/opencl/haarobjectdetect_scaled2.cl		patch \| blob \| history
modules/ocl/src/opencl/imgproc_convolve.cl		patch \| blob \| history
modules/ocl/src/opencl/imgproc_copymakeboder.cl		patch \| blob \| history
modules/ocl/src/opencl/imgproc_integral.cl		patch \| blob \| history
modules/ocl/src/opencl/imgproc_integral_sum.cl		patch \| blob \| history
modules/ocl/src/opencl/imgproc_remap.cl		patch \| blob \| history
modules/ocl/src/opencl/imgproc_resize.cl		patch \| blob \| history
modules/ocl/src/opencl/imgproc_threshold.cl		patch \| blob \| history
modules/ocl/src/opencl/imgproc_warpAffine.cl		patch \| blob \| history
modules/ocl/src/opencl/imgproc_warpPerspective.cl		patch \| blob \| history
modules/ocl/src/opencl/kernel_stablesort_by_key.cl		patch \| blob \| history
modules/ocl/src/opencl/knearest.cl		patch \| blob \| history
modules/ocl/src/opencl/match_template.cl		patch \| blob \| history
modules/ocl/src/opencl/merge_mat.cl		patch \| blob \| history
modules/ocl/src/opencl/moments.cl		patch \| blob \| history
modules/ocl/src/opencl/operator_convertTo.cl		patch \| blob \| history
modules/ocl/src/opencl/operator_copyToM.cl		patch \| blob \| history
modules/ocl/src/opencl/operator_setTo.cl		patch \| blob \| history
modules/ocl/src/opencl/operator_setToM.cl		patch \| blob \| history
modules/ocl/src/opencl/pyrlk.cl		patch \| blob \| history
modules/ocl/src/opencl/split_mat.cl		patch \| blob \| history
modules/ocl/src/opencl/stereobm.cl		patch \| blob \| history
modules/ocl/src/opencl/stereobp.cl		patch \| blob \| history
modules/ocl/src/opencl/stereocsbp.cl		patch \| blob \| history
modules/ocl/src/opencl/svm.cl		patch \| blob \| history
modules/ocl/src/opencl/tvl1flow.cl		patch \| blob \| history