From 66a8acfd3d0f2cc32845a8fca44311eeded1690f Mon Sep 17 00:00:00 2001 From: Alexander Karsakov Date: Tue, 7 Oct 2014 14:57:02 +0400 Subject: [PATCH] Optimization for HoughLinesP --- modules/cudaimgproc/src/cuda/hough_segments.cu | 2 +- modules/imgproc/src/hough.cpp | 21 +++++++---- modules/imgproc/src/opencl/hough_lines.cl | 52 +++++++++++--------------- modules/imgproc/test/ocl/test_houghlines.cpp | 2 +- 4 files changed, 37 insertions(+), 40 deletions(-) diff --git a/modules/cudaimgproc/src/cuda/hough_segments.cu b/modules/cudaimgproc/src/cuda/hough_segments.cu index ee50e00..ca433d3 100644 --- a/modules/cudaimgproc/src/cuda/hough_segments.cu +++ b/modules/cudaimgproc/src/cuda/hough_segments.cu @@ -117,7 +117,7 @@ namespace cv { namespace cuda { namespace device if (dir.x < 0) dir = -dir; } - else if (pb[1].x == cols - 1 && (pb[0].y >= 0 && pb[0].y < rows)) + else if (pb[1].x == cols - 1 && (pb[1].y >= 0 && pb[1].y < rows)) { p0 = pb[1]; if (dir.x > 0) diff --git a/modules/imgproc/src/hough.cpp b/modules/imgproc/src/hough.cpp index 7631b3b..ead62d1 100644 --- a/modules/imgproc/src/hough.cpp +++ b/modules/imgproc/src/hough.cpp @@ -221,7 +221,7 @@ HoughLinesSDiv( const Mat& img, std::vector lst; CV_Assert( img.type() == CV_8UC1 ); - CV_Assert( linesMax > 0 && rho > 0 && theta > 0 ); + CV_Assert( linesMax > 0 ); threshold = MIN( threshold, 255 ); @@ -655,6 +655,8 @@ HoughLinesProbabilistic( Mat& image, #ifdef HAVE_OPENCL +#define OCL_MAX_LINES 4096 + static bool ocl_makePointsList(InputArray _src, OutputArray _pointsList, InputOutputArray _counters) { UMat src = _src.getUMat(); @@ -702,7 +704,7 @@ static bool ocl_fillAccum(InputArray _pointsList, OutputArray _accum, int total_ if (fillAccumKernel.empty()) return false; globalThreads[0] = workgroup_size; globalThreads[1] = numangle; - fillAccumKernel.args(ocl::KernelArg::ReadOnlyNoSize(pointsList), ocl::KernelArg::WriteOnly(accum), + fillAccumKernel.args(ocl::KernelArg::ReadOnlyNoSize(pointsList), ocl::KernelArg::WriteOnlyNoSize(accum), total_points, irho, (float) theta, numrho, numangle); return fillAccumKernel.run(2, globalThreads, NULL, false); } @@ -714,7 +716,7 @@ static bool ocl_fillAccum(InputArray _pointsList, OutputArray _accum, int total_ return false; localThreads[0] = workgroup_size; localThreads[1] = 1; globalThreads[0] = workgroup_size; globalThreads[1] = numangle+2; - fillAccumKernel.args(ocl::KernelArg::ReadOnlyNoSize(pointsList), ocl::KernelArg::WriteOnly(accum), + fillAccumKernel.args(ocl::KernelArg::ReadOnlyNoSize(pointsList), ocl::KernelArg::WriteOnlyNoSize(accum), total_points, irho, (float) theta, numrho, numangle); return fillAccumKernel.run(2, globalThreads, localThreads, false); } @@ -731,6 +733,9 @@ static bool ocl_HoughLines(InputArray _src, OutputArray _lines, double rho, doub if (min_theta < 0 || min_theta > max_theta ) { CV_Error( CV_StsBadArg, "min_theta must fall between 0 and max_theta" ); } + if (!(rho > 0 && theta > 0)) { + CV_Error( CV_StsBadArg, "rho and theta must be greater 0" ); + } UMat src = _src.getUMat(); int numangle = cvRound((max_theta - min_theta) / theta); @@ -759,8 +764,7 @@ static bool ocl_HoughLines(InputArray _src, OutputArray _lines, double rho, doub if (getLinesKernel.empty()) return false; - // TODO: investigate other strategies to choose linesMax - int linesMax = min(total_points*numangle/threshold, 4096); + int linesMax = threshold > 0 ? min(total_points*numangle/threshold, OCL_MAX_LINES) : OCL_MAX_LINES; UMat lines(linesMax, 1, CV_32FC2); getLinesKernel.args(ocl::KernelArg::ReadOnly(accum), ocl::KernelArg::WriteOnlyNoSize(lines), @@ -783,6 +787,10 @@ static bool ocl_HoughLinesP(InputArray _src, OutputArray _lines, double rho, dou { CV_Assert(_src.type() == CV_8UC1); + if (!(rho > 0 && theta > 0)) { + CV_Error( CV_StsBadArg, "rho and theta must be greater 0" ); + } + UMat src = _src.getUMat(); int numangle = cvRound(CV_PI / theta); int numrho = cvRound(((src.cols + src.rows) * 2 + 1) / rho); @@ -809,8 +817,7 @@ static bool ocl_HoughLinesP(InputArray _src, OutputArray _lines, double rho, dou if (getLinesKernel.empty()) return false; - // TODO: investigate other strategies to choose linesMax - int linesMax = min(total_points*numangle/threshold, 4096); + int linesMax = threshold > 0 ? min(total_points*numangle/threshold, OCL_MAX_LINES) : OCL_MAX_LINES; UMat lines(linesMax, 1, CV_32SC4); getLinesKernel.args(ocl::KernelArg::ReadOnly(accum), ocl::KernelArg::ReadOnly(src), diff --git a/modules/imgproc/src/opencl/hough_lines.cl b/modules/imgproc/src/opencl/hough_lines.cl index 19c465d..f318133 100644 --- a/modules/imgproc/src/opencl/hough_lines.cl +++ b/modules/imgproc/src/opencl/hough_lines.cl @@ -5,6 +5,8 @@ // Copyright (C) 2014, Itseez, Inc., all rights reserved. // Third party copyrights are property of their respective owners. +#define ACCUM(ptr) *((__global int*)(ptr)) + #ifdef MAKE_POINTS_LIST __kernel void make_point_list(__global const uchar * src_ptr, int src_step, int src_offset, int src_rows, int src_cols, @@ -25,11 +27,13 @@ __kernel void make_point_list(__global const uchar * src_ptr, int src_step, int if (y < src_rows) { + y <<= 16; + for (int i=x; i < src_cols; i+=GROUP_SIZE) { if (src[i]) { - int val = (y << 16) | i; + int val = y | i; int index = atomic_inc(&l_index); l_points[index] = val; } @@ -53,7 +57,7 @@ __kernel void make_point_list(__global const uchar * src_ptr, int src_step, int #elif defined FILL_ACCUM_GLOBAL __kernel void fill_accum_global(__global const uchar * list_ptr, int list_step, int list_offset, - __global uchar * accum_ptr, int accum_step, int accum_offset, int accum_rows, int accum_cols, + __global uchar * accum_ptr, int accum_step, int accum_offset, int total_points, float irho, float theta, int numrho, int numangle) { int theta_idx = get_global_id(1); @@ -76,7 +80,7 @@ __kernel void fill_accum_global(__global const uchar * list_ptr, int list_step, const int x = (val & 0xFFFF); const int y = (val >> 16) & 0xFFFF; - int r = convert_int_rte(x * cosVal + y * sinVal) + shift; + int r = convert_int_rte(mad(x, cosVal, y * sinVal)) + shift; atomic_inc(accum + r + 1); } } @@ -85,7 +89,7 @@ __kernel void fill_accum_global(__global const uchar * list_ptr, int list_step, #elif defined FILL_ACCUM_LOCAL __kernel void fill_accum_local(__global const uchar * list_ptr, int list_step, int list_offset, - __global uchar * accum_ptr, int accum_step, int accum_offset, int accum_rows, int accum_cols, + __global uchar * accum_ptr, int accum_step, int accum_offset, int total_points, float irho, float theta, int numrho, int numangle) { int theta_idx = get_group_id(1); @@ -133,15 +137,13 @@ __kernel void fill_accum_local(__global const uchar * list_ptr, int list_step, i #elif defined GET_LINES -#define ACCUM(ptr) *((__global int*)(ptr)) - __kernel void get_lines(__global uchar * accum_ptr, int accum_step, int accum_offset, int accum_rows, int accum_cols, __global uchar * lines_ptr, int lines_step, int lines_offset, __global int* lines_index_ptr, int linesMax, int threshold, float rho, float theta) { int x0 = get_global_id(0); int y = get_global_id(1); - int gl_size = get_global_size(0); + int glob_size = get_global_size(0); if (y < accum_rows-2) { @@ -149,7 +151,7 @@ __kernel void get_lines(__global uchar * accum_ptr, int accum_step, int accum_of __global float2* lines = (__global float2*)(lines_ptr + lines_offset); __global int* lines_index = lines_index_ptr + 1; - for (int x=x0; x= 0 && pb[0].y < src_rows)) + else if (pb[1].x == src_cols - 1 && (pb[1].y >= 0 && pb[1].y < src_rows)) { p0 = pb[1]; if (dir.x > 0) @@ -258,41 +259,30 @@ __kernel void get_lines(__global const uchar * accum_ptr, int accum_step, int ac dir = -dir; } - float2 d; - if (fabs(dir.x) > fabs(dir.y)) - { - d.x = dir.x > 0 ? 1 : -1; - d.y = dir.y / fabs(dir.x); - } - else - { - d.x = dir.x / fabs(dir.y); - d.y = dir.y > 0 ? 1 : -1; - } + dir /= max(fabs(dir.x), fabs(dir.y)); float2 line_end[2]; int gap; bool inLine = false; - float2 p1 = p0; - if (p1.x < 0 || p1.x >= src_cols || p1.y < 0 || p1.y >= src_rows) + if (p0.x < 0 || p0.x >= src_cols || p0.y < 0 || p0.y >= src_rows) return; for (;;) { - if (*(src_ptr + mad24(p1.y, src_step, p1.x + src_offset))) + if (*(src_ptr + mad24(p0.y, src_step, p0.x + src_offset))) { gap = 0; if (!inLine) { - line_end[0] = p1; - line_end[1] = p1; + line_end[0] = p0; + line_end[1] = p0; inLine = true; } else { - line_end[1] = p1; + line_end[1] = p0; } } else if (inLine) @@ -314,8 +304,8 @@ __kernel void get_lines(__global const uchar * accum_ptr, int accum_step, int ac } } - p1 = p1 + d; - if (p1.x < 0 || p1.x >= src_cols || p1.y < 0 || p1.y >= src_rows) + p0 = p0 + dir; + if (p0.x < 0 || p0.x >= src_cols || p0.y < 0 || p0.y >= src_rows) { if (inLine) { diff --git a/modules/imgproc/test/ocl/test_houghlines.cpp b/modules/imgproc/test/ocl/test_houghlines.cpp index aa251a7..1f9d802 100644 --- a/modules/imgproc/test/ocl/test_houghlines.cpp +++ b/modules/imgproc/test/ocl/test_houghlines.cpp @@ -168,7 +168,7 @@ OCL_TEST_P(HoughLinesP, RealImage) OCL_OFF(cv::HoughLinesP(src, dst, rhoStep, thetaStep, threshold, minLineLength, maxGap)); OCL_ON(cv::HoughLinesP(usrc, udst, rhoStep, thetaStep, threshold, minLineLength, maxGap)); - Near(0.2); + Near(0.25); } OCL_INSTANTIATE_TEST_CASE_P(Imgproc, HoughLines, Combine(Values(1, 0.5), // rhoStep -- 2.7.4