fixed ocl::phase

author Ilya Lavrenov <ilya.lavrenov@itseez.com>

Mon, 30 Sep 2013 11:08:26 +0000 (15:08 +0400)

committer Ilya Lavrenov <ilya.lavrenov@itseez.com>

Mon, 30 Sep 2013 12:02:14 +0000 (16:02 +0400)
author Ilya Lavrenov <ilya.lavrenov@itseez.com>
Mon, 30 Sep 2013 11:08:26 +0000 (15:08 +0400)
committer Ilya Lavrenov <ilya.lavrenov@itseez.com>
Mon, 30 Sep 2013 12:02:14 +0000 (16:02 +0400)
diff --git a/modules/ocl/src/arithm.cpp b/modules/ocl/src/arithm.cpp

index deb5163..6467040 100644 (file)
--- a/modules/ocl/src/arithm.cpp
+++ b/modules/ocl/src/arithm.cpp
@@ -461,8 +461,8 @@ void cv::ocl::meanStdDev(const oclMat &src, Scalar &mean, Scalar &stddev)
          m2(sz, CV_MAKETYPE(CV_32S, channels), cv::Scalar::all(0));
      oclMat dst1(m1), dst2(m2);
  
-    //arithmetic_sum_run(src, dst1,"arithm_op_sum");
-    //arithmetic_sum_run(src, dst2,"arithm_op_squares_sum");
+//    arithmetic_sum_run(src, dst1, "arithm_op_sum");
+//    arithmetic_sum_run(src, dst2, "arithm_op_squares_sum");
  
      m1 = (Mat)dst1;
      m2 = (Mat)dst2;
@@ -558,7 +558,6 @@ void arithmetic_minMax(const oclMat &src, double *minVal, double *maxVal,
      }
  }
  
-
  void cv::ocl::minMax(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask)
  {
      oclMat buf;
@@ -928,47 +927,38 @@ static void arithmetic_phase_run(const oclMat &src1, const oclMat &src2, oclMat
          return;
      }
  
-    CV_Assert(src1.cols == src2.cols && src2.cols == dst.cols && src1.rows == src2.rows && src2.rows == dst.rows);
-    CV_Assert(src1.type() == src2.type() && src1.type() == dst.type());
-
      Context  *clCxt = src1.clCxt;
-    int channels = dst.oclchannels();
-    int depth = dst.depth();
-
-    size_t vector_length = 1;
-    int offset_cols = ((dst.offset % dst.step) / dst.elemSize1()) & (vector_length - 1);
-    int cols = divUp(dst.cols * channels + offset_cols, vector_length);
+    int depth = dst.depth(), cols1 = src1.cols * src1.oclchannels();
+    int src1step1 = src1.step / src1.elemSize1(), src1offset1 = src1.offset / src1.elemSize1();
+    int src2step1 = src2.step / src2.elemSize1(), src2offset1 = src2.offset / src2.elemSize1();
+    int dststep1 = dst.step / dst.elemSize1(), dstoffset1 = dst.offset / dst.elemSize1();
  
      size_t localThreads[3]  = { 64, 4, 1 };
-    size_t globalThreads[3] = { cols, dst.rows, 1 };
+    size_t globalThreads[3] = { cols1, dst.rows, 1 };
  
-    int dst_step1 = dst.cols * dst.elemSize();
      vector<pair<size_t , const void *> > args;
      args.push_back( make_pair( sizeof(cl_mem), (void *)&src1.data ));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&src1.step ));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&src1.offset ));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&src1step1 ));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&src1offset1 ));
      args.push_back( make_pair( sizeof(cl_mem), (void *)&src2.data ));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&src2.step ));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&src2.offset ));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&src2step1 ));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&src2offset1 ));
      args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data ));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step ));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&dst.offset ));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&dststep1 ));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&dstoffset1 ));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&cols1 ));
      args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows ));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&cols ));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 ));
  
      openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, depth);
  }
  
-void cv::ocl::phase(const oclMat &x, const oclMat &y, oclMat &Angle , bool angleInDegrees)
+void cv::ocl::phase(const oclMat &x, const oclMat &y, oclMat &Angle, bool angleInDegrees)
  {
      CV_Assert(x.type() == y.type() && x.size() == y.size() && (x.depth() == CV_32F || x.depth() == CV_64F));
+    CV_Assert(x.step % x.elemSize() == 0 && y.step % y.elemSize() == 0);
+
      Angle.create(x.size(), x.type());
-    string kernelName = angleInDegrees ? "arithm_phase_indegrees" : "arithm_phase_inradians";
-    if (angleInDegrees)
-        arithmetic_phase_run(x, y, Angle, kernelName, &arithm_phase);
-    else
-        arithmetic_phase_run(x, y, Angle, kernelName, &arithm_phase);
+    arithmetic_phase_run(x, y, Angle, angleInDegrees ? "arithm_phase_indegrees" : "arithm_phase_inradians", &arithm_phase);
  }
  
  //////////////////////////////////////////////////////////////////////////////
@@ -1539,8 +1529,8 @@ oclMatExpr::operator oclMat() const
  /////////////////////////////// transpose ////////////////////////////////////
  //////////////////////////////////////////////////////////////////////////////
  
-#define TILE_DIM      (32)
-#define BLOCK_ROWS    (256/TILE_DIM)
+#define TILE_DIM   (32)
+#define BLOCK_ROWS (256 / TILE_DIM)
  
  static void transpose_run(const oclMat &src, oclMat &dst, string kernelName, bool inplace = false)
  {
diff --git a/modules/ocl/src/opencl/arithm_phase.cl b/modules/ocl/src/opencl/arithm_phase.cl

index 9dda5e9..a30eba4 100644 (file)
--- a/modules/ocl/src/opencl/arithm_phase.cl
+++ b/modules/ocl/src/opencl/arithm_phase.cl
@@ -45,110 +45,125 @@
  //
  
  #if defined (DOUBLE_SUPPORT)
+#ifdef cl_khr_fp64
  #pragma OPENCL EXTENSION cl_khr_fp64:enable
+#elif defined (cl_amd_fp64)
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
  #endif
+#endif
+
  #define CV_PI 3.1415926535898
+#define CV_2PI 2*3.1415926535898
+
  /**************************************phase inradians**************************************/
-__kernel void arithm_phase_inradians_D5 (__global float *src1, int src1_step, int src1_offset,
-                                         __global float *src2, int src2_step, int src2_offset,
-                                         __global float *dst,  int dst_step,  int dst_offset,
-                                         int rows, int cols, int dst_step1)
-{
  
+__kernel void arithm_phase_inradians_D5(__global float *src1, int src1_step1, int src1_offset1,
+                                         __global float *src2, int src2_step1, int src2_offset1,
+                                         __global float *dst,  int dst_step1,  int dst_offset1,
+                                         int cols, int rows)
+{
      int x = get_global_id(0);
      int y = get_global_id(1);
  
-    if(x < cols && y < rows)
+    if (x < cols && y < rows)
      {
-        int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
-        int src2_index = mad24(y, src2_step, (x << 2) + src2_offset);
-        int dst_index  = mad24(y, dst_step,  (x << 2) + dst_offset);
+        int src1_index = mad24(y, src1_step1, x + src1_offset1);
+        int src2_index = mad24(y, src2_step1, x + src2_offset1);
+        int dst_index  = mad24(y, dst_step1, x + dst_offset1);
  
-        float data1 = *((__global float *)((__global char *)src1 + src1_index));
-        float data2 = *((__global float *)((__global char *)src2 + src2_index));
-        float tmp = atan2(data2,data1);
+        float data1 = src1[src1_index];
+        float data2 = src2[src2_index];
+        float tmp = atan2(data2, data1);
  
-        *((__global float *)((__global char *)dst + dst_index)) = tmp;
-    }
+        if (tmp < 0)
+            tmp += CV_2PI;
  
+        dst[dst_index] = tmp;
+    }
  }
  
  
  #if defined (DOUBLE_SUPPORT)
-__kernel void arithm_phase_inradians_D6 (__global double *src1, int src1_step, int src1_offset,
-                                         __global double *src2, int src2_step, int src2_offset,
-                                         __global double *dst,  int dst_step,  int dst_offset,
-                                         int rows, int cols, int dst_step1)
+__kernel void arithm_phase_inradians_D6(__global double *src1, int src1_step1, int src1_offset1,
+                                         __global double *src2, int src2_step1, int src2_offset1,
+                                         __global double *dst,  int dst_step1,  int dst_offset1,
+                                         int cols, int rows)
  {
-
      int x = get_global_id(0);
      int y = get_global_id(1);
  
-    if(x < cols && y < rows)
+    if (x < cols && y < rows)
      {
-        int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
-        int src2_index = mad24(y, src2_step, (x << 3) + src2_offset);
-        int dst_index  = mad24(y, dst_step,  (x << 3) + dst_offset);
+        int src1_index = mad24(y, src1_step1, x + src1_offset1);
+        int src2_index = mad24(y, src2_step1, x + src2_offset1);
+        int dst_index  = mad24(y, dst_step1, x + dst_offset1);
  
-        double data1 = *((__global double *)((__global char *)src1 + src1_index));
-        double data2 = *((__global double *)((__global char *)src2 + src2_index));
+        double data1 = src1[src1_index];
+        double data2 = src2[src2_index];
+        double tmp = atan2(data2, data1);
  
-        *((__global double *)((__global char *)dst + dst_index)) = atan2(data2,data1);
-    }
+        if (tmp < 0)
+            tmp += CV_2PI;
  
+        dst[dst_index] = tmp;
+    }
  }
+
  #endif
  
  /**************************************phase indegrees**************************************/
-__kernel void arithm_phase_indegrees_D5 (__global float *src1, int src1_step, int src1_offset,
-                                         __global float *src2, int src2_step, int src2_offset,
-                                         __global float *dst,  int dst_step,  int dst_offset,
-                                         int rows, int cols, int dst_step1)
-{
  
+__kernel void arithm_phase_indegrees_D5(__global float *src1, int src1_step1, int src1_offset1,
+                                         __global float *src2, int src2_step1, int src2_offset1,
+                                         __global float *dst,  int dst_step1,  int dst_offset1,
+                                         int cols, int rows)
+{
      int x = get_global_id(0);
      int y = get_global_id(1);
  
-    if(x < cols && y < rows)
+    if (x < cols && y < rows)
      {
-        int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
-        int src2_index = mad24(y, src2_step, (x << 2) + src2_offset);
-        int dst_index  = mad24(y, dst_step,  (x << 2) + dst_offset);
+        int src1_index = mad24(y, src1_step1, x + src1_offset1);
+        int src2_index = mad24(y, src2_step1, x + src2_offset1);
+        int dst_index  = mad24(y, dst_step1, x + dst_offset1);
  
-        float data1 = *((__global float *)((__global char *)src1 + src1_index));
-        float data2 = *((__global float *)((__global char *)src2 + src2_index));
-        float tmp = atan2(data2,data1);
-        float tmp_data = 180*tmp/CV_PI;
+        float data1 = src1[src1_index];
+        float data2 = src2[src2_index];
+        float tmp = atan2(data2, data1);
+        tmp = 180 * tmp / CV_PI;
  
-        *((__global float *)((__global char *)dst + dst_index)) = tmp_data;
-    }
+        if (tmp < 0)
+            tmp += 360;
  
+        dst[dst_index] = tmp;
+    }
  }
  
  
  #if defined (DOUBLE_SUPPORT)
-__kernel void arithm_phase_indegrees_D6 (__global double *src1, int src1_step, int src1_offset,
-                                         __global double *src2, int src2_step, int src2_offset,
-                                         __global double *dst,  int dst_step,  int dst_offset,
-                                         int rows, int cols, int dst_step1)
+__kernel void arithm_phase_indegrees_D6 (__global double *src1, int src1_step1, int src1_offset1,
+                                         __global double *src2, int src2_step1, int src2_offset1,
+                                         __global double *dst,  int dst_step1,  int dst_offset1,
+                                         int cols, int rows)
  {
-
      int x = get_global_id(0);
      int y = get_global_id(1);
  
-    if(x < cols && y < rows)
+    if (x < cols && y < rows)
      {
-        int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
-        int src2_index = mad24(y, src2_step, (x << 3) + src2_offset);
-        int dst_index  = mad24(y, dst_step,  (x << 3) + dst_offset);
+        int src1_index = mad24(y, src1_step1, x + src1_offset1);
+        int src2_index = mad24(y, src2_step1, x + src2_offset1);
+        int dst_index  = mad24(y, dst_step1, x + dst_offset1);
  
-        double data1 = *((__global double *)((__global char *)src1 + src1_index));
-        double data2 = *((__global double *)((__global char *)src2 + src2_index));
-        double tmp = atan2(data2,data1);
-        double tmp_data = 180*tmp/CV_PI;
+        double data1 = src1[src1_index];
+        double data2 = src2[src2_index];
+        double tmp = atan2(src2[src2_index], src1[src1_index]);
  
-        *((__global double *)((__global char *)dst + dst_index)) = tmp_data;
-    }
+        tmp = 180 * tmp / CV_PI;
+        if (tmp < 0)
+            tmp += 360;
  
+        dst[dst_index] = tmp;
+    }
  }
  #endif
diff --git a/modules/ocl/test/test_arithm.cpp b/modules/ocl/test/test_arithm.cpp

index 1505419..2438148 100644 (file)
--- a/modules/ocl/test/test_arithm.cpp
+++ b/modules/ocl/test/test_arithm.cpp
@@ -464,7 +464,6 @@ TEST_P(Mul, Scalar)
      }
  }
  
-
  TEST_P(Mul, Mat_Scalar)
  {
      for (int j = 0; j < LOOP_TIMES; j++)
@@ -507,7 +506,6 @@ TEST_P(Div, Scalar)
      }
  }
  
-
  TEST_P(Div, Mat_Scalar)
  {
      for (int j = 0; j < LOOP_TIMES; j++)
@@ -1173,17 +1171,27 @@ TEST_P(CountNonZero, MAT)
  
  typedef ArithmTestBase Phase;
  
-TEST_P(Phase, DISABLED_Mat)
+TEST_P(Phase, angleInDegrees)
  {
-    for (int angelInDegrees = 0; angelInDegrees < 2; angelInDegrees++)
+    for (int j = 0; j < LOOP_TIMES; j++)
      {
-        for (int j = 0; j < LOOP_TIMES; j++)
-        {
-            random_roi();
-            cv::phase(src1_roi, src2_roi, dst1_roi, angelInDegrees ? true : false);
-            cv::ocl::phase(gsrc1, gsrc2, gdst1, angelInDegrees ? true : false);
-            Near(1e-2);
-        }
+        random_roi();
+        cv::phase(src1_roi, src2_roi, dst1_roi, true);
+        cv::ocl::phase(gsrc1, gsrc2, gdst1, true);
+
+        Near(1e-2);
+    }
+}
+
+TEST_P(Phase, angleInRadians)
+{
+    for (int j = 0; j < LOOP_TIMES; j++)
+    {
+        random_roi();
+        cv::phase(src1_roi, src2_roi, dst1_roi);
+        cv::ocl::phase(gsrc1, gsrc2, gdst1);
+
+        Near(1e-2);
      }
  }
author	Ilya Lavrenov <ilya.lavrenov@itseez.com>
	Mon, 30 Sep 2013 11:08:26 +0000 (15:08 +0400)
committer	Ilya Lavrenov <ilya.lavrenov@itseez.com>
	Mon, 30 Sep 2013 12:02:14 +0000 (16:02 +0400)
modules/ocl/src/arithm.cpp		patch \| blob \| history
modules/ocl/src/opencl/arithm_phase.cl		patch \| blob \| history
modules/ocl/test/test_arithm.cpp		patch \| blob \| history