From 7edcefb2be9f0372cd2f3a004fd6cd3c975cca41 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Mon, 30 Sep 2013 15:08:26 +0400 Subject: [PATCH] fixed ocl::phase --- modules/ocl/src/arithm.cpp | 50 +++++-------- modules/ocl/src/opencl/arithm_phase.cl | 129 ++++++++++++++++++--------------- modules/ocl/test/test_arithm.cpp | 30 +++++--- 3 files changed, 111 insertions(+), 98 deletions(-) diff --git a/modules/ocl/src/arithm.cpp b/modules/ocl/src/arithm.cpp index deb5163..6467040 100644 --- a/modules/ocl/src/arithm.cpp +++ b/modules/ocl/src/arithm.cpp @@ -461,8 +461,8 @@ void cv::ocl::meanStdDev(const oclMat &src, Scalar &mean, Scalar &stddev) m2(sz, CV_MAKETYPE(CV_32S, channels), cv::Scalar::all(0)); oclMat dst1(m1), dst2(m2); - //arithmetic_sum_run(src, dst1,"arithm_op_sum"); - //arithmetic_sum_run(src, dst2,"arithm_op_squares_sum"); +// arithmetic_sum_run(src, dst1, "arithm_op_sum"); +// arithmetic_sum_run(src, dst2, "arithm_op_squares_sum"); m1 = (Mat)dst1; m2 = (Mat)dst2; @@ -558,7 +558,6 @@ void arithmetic_minMax(const oclMat &src, double *minVal, double *maxVal, } } - void cv::ocl::minMax(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask) { oclMat buf; @@ -928,47 +927,38 @@ static void arithmetic_phase_run(const oclMat &src1, const oclMat &src2, oclMat return; } - CV_Assert(src1.cols == src2.cols && src2.cols == dst.cols && src1.rows == src2.rows && src2.rows == dst.rows); - CV_Assert(src1.type() == src2.type() && src1.type() == dst.type()); - Context *clCxt = src1.clCxt; - int channels = dst.oclchannels(); - int depth = dst.depth(); - - size_t vector_length = 1; - int offset_cols = ((dst.offset % dst.step) / dst.elemSize1()) & (vector_length - 1); - int cols = divUp(dst.cols * channels + offset_cols, vector_length); + int depth = dst.depth(), cols1 = src1.cols * src1.oclchannels(); + int src1step1 = src1.step / src1.elemSize1(), src1offset1 = src1.offset / src1.elemSize1(); + int src2step1 = src2.step / src2.elemSize1(), src2offset1 = src2.offset / src2.elemSize1(); + int dststep1 = dst.step / dst.elemSize1(), dstoffset1 = dst.offset / dst.elemSize1(); size_t localThreads[3] = { 64, 4, 1 }; - size_t globalThreads[3] = { cols, dst.rows, 1 }; + size_t globalThreads[3] = { cols1, dst.rows, 1 }; - int dst_step1 = dst.cols * dst.elemSize(); vector > args; args.push_back( make_pair( sizeof(cl_mem), (void *)&src1.data )); - args.push_back( make_pair( sizeof(cl_int), (void *)&src1.step )); - args.push_back( make_pair( sizeof(cl_int), (void *)&src1.offset )); + args.push_back( make_pair( sizeof(cl_int), (void *)&src1step1 )); + args.push_back( make_pair( sizeof(cl_int), (void *)&src1offset1 )); args.push_back( make_pair( sizeof(cl_mem), (void *)&src2.data )); - args.push_back( make_pair( sizeof(cl_int), (void *)&src2.step )); - args.push_back( make_pair( sizeof(cl_int), (void *)&src2.offset )); + args.push_back( make_pair( sizeof(cl_int), (void *)&src2step1 )); + args.push_back( make_pair( sizeof(cl_int), (void *)&src2offset1 )); args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data )); - args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step )); - args.push_back( make_pair( sizeof(cl_int), (void *)&dst.offset )); + args.push_back( make_pair( sizeof(cl_int), (void *)&dststep1 )); + args.push_back( make_pair( sizeof(cl_int), (void *)&dstoffset1 )); + args.push_back( make_pair( sizeof(cl_int), (void *)&cols1 )); args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows )); - args.push_back( make_pair( sizeof(cl_int), (void *)&cols )); - args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 )); openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, depth); } -void cv::ocl::phase(const oclMat &x, const oclMat &y, oclMat &Angle , bool angleInDegrees) +void cv::ocl::phase(const oclMat &x, const oclMat &y, oclMat &Angle, bool angleInDegrees) { CV_Assert(x.type() == y.type() && x.size() == y.size() && (x.depth() == CV_32F || x.depth() == CV_64F)); + CV_Assert(x.step % x.elemSize() == 0 && y.step % y.elemSize() == 0); + Angle.create(x.size(), x.type()); - string kernelName = angleInDegrees ? "arithm_phase_indegrees" : "arithm_phase_inradians"; - if (angleInDegrees) - arithmetic_phase_run(x, y, Angle, kernelName, &arithm_phase); - else - arithmetic_phase_run(x, y, Angle, kernelName, &arithm_phase); + arithmetic_phase_run(x, y, Angle, angleInDegrees ? "arithm_phase_indegrees" : "arithm_phase_inradians", &arithm_phase); } ////////////////////////////////////////////////////////////////////////////// @@ -1539,8 +1529,8 @@ oclMatExpr::operator oclMat() const /////////////////////////////// transpose //////////////////////////////////// ////////////////////////////////////////////////////////////////////////////// -#define TILE_DIM (32) -#define BLOCK_ROWS (256/TILE_DIM) +#define TILE_DIM (32) +#define BLOCK_ROWS (256 / TILE_DIM) static void transpose_run(const oclMat &src, oclMat &dst, string kernelName, bool inplace = false) { diff --git a/modules/ocl/src/opencl/arithm_phase.cl b/modules/ocl/src/opencl/arithm_phase.cl index 9dda5e9..a30eba4 100644 --- a/modules/ocl/src/opencl/arithm_phase.cl +++ b/modules/ocl/src/opencl/arithm_phase.cl @@ -45,110 +45,125 @@ // #if defined (DOUBLE_SUPPORT) +#ifdef cl_khr_fp64 #pragma OPENCL EXTENSION cl_khr_fp64:enable +#elif defined (cl_amd_fp64) +#pragma OPENCL EXTENSION cl_amd_fp64:enable #endif +#endif + #define CV_PI 3.1415926535898 +#define CV_2PI 2*3.1415926535898 + /**************************************phase inradians**************************************/ -__kernel void arithm_phase_inradians_D5 (__global float *src1, int src1_step, int src1_offset, - __global float *src2, int src2_step, int src2_offset, - __global float *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) -{ +__kernel void arithm_phase_inradians_D5(__global float *src1, int src1_step1, int src1_offset1, + __global float *src2, int src2_step1, int src2_offset1, + __global float *dst, int dst_step1, int dst_offset1, + int cols, int rows) +{ int x = get_global_id(0); int y = get_global_id(1); - if(x < cols && y < rows) + if (x < cols && y < rows) { - int src1_index = mad24(y, src1_step, (x << 2) + src1_offset); - int src2_index = mad24(y, src2_step, (x << 2) + src2_offset); - int dst_index = mad24(y, dst_step, (x << 2) + dst_offset); + int src1_index = mad24(y, src1_step1, x + src1_offset1); + int src2_index = mad24(y, src2_step1, x + src2_offset1); + int dst_index = mad24(y, dst_step1, x + dst_offset1); - float data1 = *((__global float *)((__global char *)src1 + src1_index)); - float data2 = *((__global float *)((__global char *)src2 + src2_index)); - float tmp = atan2(data2,data1); + float data1 = src1[src1_index]; + float data2 = src2[src2_index]; + float tmp = atan2(data2, data1); - *((__global float *)((__global char *)dst + dst_index)) = tmp; - } + if (tmp < 0) + tmp += CV_2PI; + dst[dst_index] = tmp; + } } #if defined (DOUBLE_SUPPORT) -__kernel void arithm_phase_inradians_D6 (__global double *src1, int src1_step, int src1_offset, - __global double *src2, int src2_step, int src2_offset, - __global double *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_phase_inradians_D6(__global double *src1, int src1_step1, int src1_offset1, + __global double *src2, int src2_step1, int src2_offset1, + __global double *dst, int dst_step1, int dst_offset1, + int cols, int rows) { - int x = get_global_id(0); int y = get_global_id(1); - if(x < cols && y < rows) + if (x < cols && y < rows) { - int src1_index = mad24(y, src1_step, (x << 3) + src1_offset); - int src2_index = mad24(y, src2_step, (x << 3) + src2_offset); - int dst_index = mad24(y, dst_step, (x << 3) + dst_offset); + int src1_index = mad24(y, src1_step1, x + src1_offset1); + int src2_index = mad24(y, src2_step1, x + src2_offset1); + int dst_index = mad24(y, dst_step1, x + dst_offset1); - double data1 = *((__global double *)((__global char *)src1 + src1_index)); - double data2 = *((__global double *)((__global char *)src2 + src2_index)); + double data1 = src1[src1_index]; + double data2 = src2[src2_index]; + double tmp = atan2(data2, data1); - *((__global double *)((__global char *)dst + dst_index)) = atan2(data2,data1); - } + if (tmp < 0) + tmp += CV_2PI; + dst[dst_index] = tmp; + } } + #endif /**************************************phase indegrees**************************************/ -__kernel void arithm_phase_indegrees_D5 (__global float *src1, int src1_step, int src1_offset, - __global float *src2, int src2_step, int src2_offset, - __global float *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) -{ +__kernel void arithm_phase_indegrees_D5(__global float *src1, int src1_step1, int src1_offset1, + __global float *src2, int src2_step1, int src2_offset1, + __global float *dst, int dst_step1, int dst_offset1, + int cols, int rows) +{ int x = get_global_id(0); int y = get_global_id(1); - if(x < cols && y < rows) + if (x < cols && y < rows) { - int src1_index = mad24(y, src1_step, (x << 2) + src1_offset); - int src2_index = mad24(y, src2_step, (x << 2) + src2_offset); - int dst_index = mad24(y, dst_step, (x << 2) + dst_offset); + int src1_index = mad24(y, src1_step1, x + src1_offset1); + int src2_index = mad24(y, src2_step1, x + src2_offset1); + int dst_index = mad24(y, dst_step1, x + dst_offset1); - float data1 = *((__global float *)((__global char *)src1 + src1_index)); - float data2 = *((__global float *)((__global char *)src2 + src2_index)); - float tmp = atan2(data2,data1); - float tmp_data = 180*tmp/CV_PI; + float data1 = src1[src1_index]; + float data2 = src2[src2_index]; + float tmp = atan2(data2, data1); + tmp = 180 * tmp / CV_PI; - *((__global float *)((__global char *)dst + dst_index)) = tmp_data; - } + if (tmp < 0) + tmp += 360; + dst[dst_index] = tmp; + } } #if defined (DOUBLE_SUPPORT) -__kernel void arithm_phase_indegrees_D6 (__global double *src1, int src1_step, int src1_offset, - __global double *src2, int src2_step, int src2_offset, - __global double *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_phase_indegrees_D6 (__global double *src1, int src1_step1, int src1_offset1, + __global double *src2, int src2_step1, int src2_offset1, + __global double *dst, int dst_step1, int dst_offset1, + int cols, int rows) { - int x = get_global_id(0); int y = get_global_id(1); - if(x < cols && y < rows) + if (x < cols && y < rows) { - int src1_index = mad24(y, src1_step, (x << 3) + src1_offset); - int src2_index = mad24(y, src2_step, (x << 3) + src2_offset); - int dst_index = mad24(y, dst_step, (x << 3) + dst_offset); + int src1_index = mad24(y, src1_step1, x + src1_offset1); + int src2_index = mad24(y, src2_step1, x + src2_offset1); + int dst_index = mad24(y, dst_step1, x + dst_offset1); - double data1 = *((__global double *)((__global char *)src1 + src1_index)); - double data2 = *((__global double *)((__global char *)src2 + src2_index)); - double tmp = atan2(data2,data1); - double tmp_data = 180*tmp/CV_PI; + double data1 = src1[src1_index]; + double data2 = src2[src2_index]; + double tmp = atan2(src2[src2_index], src1[src1_index]); - *((__global double *)((__global char *)dst + dst_index)) = tmp_data; - } + tmp = 180 * tmp / CV_PI; + if (tmp < 0) + tmp += 360; + dst[dst_index] = tmp; + } } #endif diff --git a/modules/ocl/test/test_arithm.cpp b/modules/ocl/test/test_arithm.cpp index 1505419..2438148 100644 --- a/modules/ocl/test/test_arithm.cpp +++ b/modules/ocl/test/test_arithm.cpp @@ -464,7 +464,6 @@ TEST_P(Mul, Scalar) } } - TEST_P(Mul, Mat_Scalar) { for (int j = 0; j < LOOP_TIMES; j++) @@ -507,7 +506,6 @@ TEST_P(Div, Scalar) } } - TEST_P(Div, Mat_Scalar) { for (int j = 0; j < LOOP_TIMES; j++) @@ -1173,17 +1171,27 @@ TEST_P(CountNonZero, MAT) typedef ArithmTestBase Phase; -TEST_P(Phase, DISABLED_Mat) +TEST_P(Phase, angleInDegrees) { - for (int angelInDegrees = 0; angelInDegrees < 2; angelInDegrees++) + for (int j = 0; j < LOOP_TIMES; j++) { - for (int j = 0; j < LOOP_TIMES; j++) - { - random_roi(); - cv::phase(src1_roi, src2_roi, dst1_roi, angelInDegrees ? true : false); - cv::ocl::phase(gsrc1, gsrc2, gdst1, angelInDegrees ? true : false); - Near(1e-2); - } + random_roi(); + cv::phase(src1_roi, src2_roi, dst1_roi, true); + cv::ocl::phase(gsrc1, gsrc2, gdst1, true); + + Near(1e-2); + } +} + +TEST_P(Phase, angleInRadians) +{ + for (int j = 0; j < LOOP_TIMES; j++) + { + random_roi(); + cv::phase(src1_roi, src2_roi, dst1_roi); + cv::ocl::phase(gsrc1, gsrc2, gdst1); + + Near(1e-2); } } -- 2.7.4