From c3f277b7bc47256ef9466d996f9736fde8a6c704 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Thu, 23 Aug 2012 16:54:48 +0400 Subject: [PATCH] gpu version of HoughCircles --- modules/gpu/doc/image_processing.rst | 68 +++++----- modules/gpu/include/opencv2/gpu/gpu.hpp | 25 +++- modules/gpu/perf/perf_imgproc.cpp | 71 ++++++++-- modules/gpu/perf/utility.hpp | 2 +- modules/gpu/src/cuda/hough.cu | 234 ++++++++++++++++++++++++++++---- modules/gpu/src/hough.cpp | 209 +++++++++++++++++++++++----- modules/gpu/test/test_imgproc.cpp | 75 +++++++++- 7 files changed, 581 insertions(+), 103 deletions(-) diff --git a/modules/gpu/doc/image_processing.rst b/modules/gpu/doc/image_processing.rst index eb2561e..858b707 100644 --- a/modules/gpu/doc/image_processing.rst +++ b/modules/gpu/doc/image_processing.rst @@ -893,7 +893,7 @@ Finds lines in a binary image using the classical Hough transform. .. ocv:function:: void gpu::HoughLines(const GpuMat& src, GpuMat& lines, float rho, float theta, int threshold, bool doSort = false, int maxLines = 4096) -.. ocv:function:: void gpu::HoughLines(const GpuMat& src, GpuMat& lines, GpuMat& accum, GpuMat& buf, float rho, float theta, int threshold, bool doSort = false, int maxLines = 4096) +.. ocv:function:: void gpu::HoughLines(const GpuMat& src, GpuMat& lines, HoughLinesBuf& buf, float rho, float theta, int threshold, bool doSort = false, int maxLines = 4096) :param src: 8-bit, single-channel binary source image. @@ -909,70 +909,70 @@ Finds lines in a binary image using the classical Hough transform. :param maxLines: Maximum number of output lines. - :param accum: Optional buffer for accumulator to avoid extra memory allocations (for many calls with the same sizes). - :param buf: Optional buffer to avoid extra memory allocations (for many calls with the same sizes). .. seealso:: :ocv:func:`HoughLines` -gpu::HoughLinesTransform ------------------------- -Performs classical Hough transform for line detection. +gpu::HoughLinesDownload +----------------------- +Downloads results from :ocv:func:`gpu::HoughLines` to host memory. -.. ocv:function:: void gpu::HoughLinesTransform(const GpuMat& src, GpuMat& accum, GpuMat& buf, float rho, float theta) +.. ocv:function:: void gpu::HoughLinesDownload(const GpuMat& d_lines, OutputArray h_lines, OutputArray h_votes = noArray()) - :param src: 8-bit, single-channel binary source image. + :param d_lines: Result of :ocv:func:`gpu::HoughLines` . - :param accum: Output accumulator array. + :param h_lines: Output host array. - :param buf: Buffer to avoid extra memory allocations (for many calls with the same sizes). + :param h_votes: Optional output array for line's votes. - :param rho: Distance resolution of the accumulator in pixels. +.. seealso:: :ocv:func:`gpu::HoughLines` - :param theta: Angle resolution of the accumulator in radians. - :param threshold: Accumulator threshold parameter. Only those lines are returned that get enough votes ( :math:`>\texttt{threshold}` ). -.. seealso:: :ocv:func:`gpu::HoughLines` +gpu::HoughCircles +----------------- +Finds circles in a grayscale image using the Hough transform. +.. ocv:function:: void gpu::HoughCircles(const GpuMat& src, GpuMat& circles, int method, float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles = 4096) +.. ocv:function:: void gpu::HoughCircles(const GpuMat& src, GpuMat& circles, HoughCirclesBuf& buf, int method, float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles = 4096) -gpu::HoughLinesGet ------------------- -Finds lines in Hough space. + :param src: 8-bit, single-channel grayscale input image. -.. ocv:function:: void gpu::HoughLinesGet(const GpuMat& accum, GpuMat& lines, float rho, float theta, int threshold, bool doSort = false, int maxLines = 4096) + :param circles: Output vector of found circles. Each vector is encoded as a 3-element floating-point vector :math:`(x, y, radius)` . - :param accum: Accumulator array. + :param method: Detection method to use. Currently, the only implemented method is ``CV_HOUGH_GRADIENT`` , which is basically *21HT* , described in [Yuen90]_. - :param lines: Output vector of lines. Each line is represented by a two-element vector :math:`(\rho, \theta)` . :math:`\rho` is the distance from the coordinate origin :math:`(0,0)` (top-left corner of the image). :math:`\theta` is the line rotation angle in radians ( :math:`0 \sim \textrm{vertical line}, \pi/2 \sim \textrm{horizontal line}` ). + :param dp: Inverse ratio of the accumulator resolution to the image resolution. For example, if ``dp=1`` , the accumulator has the same resolution as the input image. If ``dp=2`` , the accumulator has half as big width and height. - :param rho: Distance resolution of the accumulator in pixels. + :param minDist: Minimum distance between the centers of the detected circles. If the parameter is too small, multiple neighbor circles may be falsely detected in addition to a true one. If it is too large, some circles may be missed. - :param theta: Angle resolution of the accumulator in radians. + :param cannyThreshold: The higher threshold of the two passed to the :ocv:func:`gpu::Canny` edge detector (the lower one is twice smaller). - :param threshold: Accumulator threshold parameter. Only those lines are returned that get enough votes ( :math:`>\texttt{threshold}` ). + :param votesThreshold: The accumulator threshold for the circle centers at the detection stage. The smaller it is, the more false circles may be detected. - :param doSort: Performs lines sort by votes. + :param minRadius: Minimum circle radius. - :param maxLines: Maximum number of output lines. + :param maxRadius: Maximum circle radius. -.. seealso:: :ocv:func:`gpu::HoughLines` + :param maxCircles: Maximum number of output circles. + :param buf: Optional buffer to avoid extra memory allocations (for many calls with the same sizes). +.. seealso:: :ocv:func:`HoughCircles` -gpu::HoughLinesDownload ------------------------ -Downloads results from :ocv:func:`gpu::HoughLines` to host memory. -.. ocv:function:: void gpu::HoughLinesDownload(const GpuMat& d_lines, OutputArray h_lines, OutputArray h_votes = noArray()) - :param d_lines: Result of :ocv:func:`gpu::HoughLines` . +gpu::HoughCirclesDownload +------------------------- +Downloads results from :ocv:func:`gpu::HoughCircles` to host memory. - :param h_lines: Output host array. +.. ocv:function:: void gpu::HoughCirclesDownload(const GpuMat& d_circles, OutputArray h_circles) - :param h_votes: Optional output array for line's votes. + :param d_circles: Result of :ocv:func:`gpu::HoughCircles` . -.. seealso:: :ocv:func:`gpu::HoughLines` + :param h_circles: Output host array. + +.. seealso:: :ocv:func:`gpu::HoughCircles` diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp index c6f2e11..c2fcc31 100644 --- a/modules/gpu/include/opencv2/gpu/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu/gpu.hpp @@ -821,12 +821,31 @@ private: }; //! HoughLines + +struct HoughLinesBuf +{ + GpuMat accum; + GpuMat list; +}; + CV_EXPORTS void HoughLines(const GpuMat& src, GpuMat& lines, float rho, float theta, int threshold, bool doSort = false, int maxLines = 4096); -CV_EXPORTS void HoughLines(const GpuMat& src, GpuMat& lines, GpuMat& accum, GpuMat& buf, float rho, float theta, int threshold, bool doSort = false, int maxLines = 4096); -CV_EXPORTS void HoughLinesTransform(const GpuMat& src, GpuMat& accum, GpuMat& buf, float rho, float theta); -CV_EXPORTS void HoughLinesGet(const GpuMat& accum, GpuMat& lines, float rho, float theta, int threshold, bool doSort = false, int maxLines = 4096); +CV_EXPORTS void HoughLines(const GpuMat& src, GpuMat& lines, HoughLinesBuf& buf, float rho, float theta, int threshold, bool doSort = false, int maxLines = 4096); CV_EXPORTS void HoughLinesDownload(const GpuMat& d_lines, OutputArray h_lines, OutputArray h_votes = noArray()); +//! HoughCircles + +struct HoughCirclesBuf +{ + GpuMat edges; + GpuMat accum; + GpuMat list; + CannyBuf cannyBuf; +}; + +CV_EXPORTS void HoughCircles(const GpuMat& src, GpuMat& circles, int method, float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles = 4096); +CV_EXPORTS void HoughCircles(const GpuMat& src, GpuMat& circles, HoughCirclesBuf& buf, int method, float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles = 4096); +CV_EXPORTS void HoughCirclesDownload(const GpuMat& d_circles, OutputArray h_circles); + ////////////////////////////// Matrix reductions ////////////////////////////// //! computes mean value and standard deviation of all or selected array elements diff --git a/modules/gpu/perf/perf_imgproc.cpp b/modules/gpu/perf/perf_imgproc.cpp index f938ca2..ba864af 100644 --- a/modules/gpu/perf/perf_imgproc.cpp +++ b/modules/gpu/perf/perf_imgproc.cpp @@ -1609,14 +1609,11 @@ PERF_TEST_P(Sz_Depth_Cn, ImgProc_ImagePyramidGetLayer, Combine(GPU_TYPICAL_MAT_S ////////////////////////////////////////////////////////////////////// // HoughLines -DEF_PARAM_TEST(Sz_DoSort, cv::Size, bool); - -PERF_TEST_P(Sz_DoSort, ImgProc_HoughLines, Combine(GPU_TYPICAL_MAT_SIZES, Bool())) +PERF_TEST_P(Sz, ImgProc_HoughLines, GPU_TYPICAL_MAT_SIZES) { declare.time(30.0); - const cv::Size size = GET_PARAM(0); - const bool doSort = GET_PARAM(1); + const cv::Size size = GetParam(); const float rho = 1.0f; const float theta = static_cast(CV_PI / 180.0); @@ -1638,14 +1635,13 @@ PERF_TEST_P(Sz_DoSort, ImgProc_HoughLines, Combine(GPU_TYPICAL_MAT_SIZES, Bool() { cv::gpu::GpuMat d_src(src); cv::gpu::GpuMat d_lines; - cv::gpu::GpuMat d_accum; - cv::gpu::GpuMat d_buf; + cv::gpu::HoughLinesBuf d_buf; - cv::gpu::HoughLines(d_src, d_lines, d_accum, d_buf, rho, theta, threshold, doSort); + cv::gpu::HoughLines(d_src, d_lines, d_buf, rho, theta, threshold); TEST_CYCLE() { - cv::gpu::HoughLines(d_src, d_lines, d_accum, d_buf, rho, theta, threshold, doSort); + cv::gpu::HoughLines(d_src, d_lines, d_buf, rho, theta, threshold); } } else @@ -1660,4 +1656,61 @@ PERF_TEST_P(Sz_DoSort, ImgProc_HoughLines, Combine(GPU_TYPICAL_MAT_SIZES, Bool() } } +////////////////////////////////////////////////////////////////////// +// HoughCircles + +DEF_PARAM_TEST(Sz_Dp_MinDist, cv::Size, float, float); + +PERF_TEST_P(Sz_Dp_MinDist, ImgProc_HoughCircles, Combine(GPU_TYPICAL_MAT_SIZES, Values(1.0f, 2.0f, 4.0f), Values(1.0f, 10.0f))) +{ + declare.time(30.0); + + const cv::Size size = GET_PARAM(0); + const float dp = GET_PARAM(1); + const float minDist = GET_PARAM(2); + + const int minRadius = 10; + const int maxRadius = 30; + const int cannyThreshold = 100; + const int votesThreshold = 15; + + cv::RNG rng(123456789); + + cv::Mat src(size, CV_8UC1, cv::Scalar::all(0)); + + const int numCircles = rng.uniform(50, 100); + for (int i = 0; i < numCircles; ++i) + { + cv::Point center(rng.uniform(0, src.cols), rng.uniform(0, src.rows)); + const int radius = rng.uniform(minRadius, maxRadius + 1); + + cv::circle(src, center, radius, cv::Scalar::all(255), -1); + } + + if (runOnGpu) + { + cv::gpu::GpuMat d_src(src); + cv::gpu::GpuMat d_circles; + cv::gpu::HoughCirclesBuf d_buf; + + cv::gpu::HoughCircles(d_src, d_circles, d_buf, CV_HOUGH_GRADIENT, dp, minDist, cannyThreshold, votesThreshold, minRadius, maxRadius); + + TEST_CYCLE() + { + cv::gpu::HoughCircles(d_src, d_circles, d_buf, CV_HOUGH_GRADIENT, dp, minDist, cannyThreshold, votesThreshold, minRadius, maxRadius); + } + } + else + { + std::vector circles; + + cv::HoughCircles(src, circles, CV_HOUGH_GRADIENT, dp, minDist, cannyThreshold, votesThreshold, minRadius, maxRadius); + + TEST_CYCLE() + { + cv::HoughCircles(src, circles, CV_HOUGH_GRADIENT, dp, minDist, cannyThreshold, votesThreshold, minRadius, maxRadius); + } + } +} + } // namespace diff --git a/modules/gpu/perf/utility.hpp b/modules/gpu/perf/utility.hpp index 2d21fff..441d32a 100644 --- a/modules/gpu/perf/utility.hpp +++ b/modules/gpu/perf/utility.hpp @@ -40,6 +40,6 @@ typedef perf::Size_MatType Sz_Type; DEF_PARAM_TEST(Sz_Depth, cv::Size, MatDepth); DEF_PARAM_TEST(Sz_Depth_Cn, cv::Size, MatDepth, int); -#define GPU_TYPICAL_MAT_SIZES testing::Values(perf::szSXGA, perf::sz720p, perf::sz1080p) +#define GPU_TYPICAL_MAT_SIZES testing::Values(perf::sz720p, perf::szSXGA, perf::sz1080p) #endif // __OPENCV_PERF_GPU_UTILITY_HPP__ diff --git a/modules/gpu/src/cuda/hough.cu b/modules/gpu/src/cuda/hough.cu index 66433ab..63099d3 100644 --- a/modules/gpu/src/cuda/hough.cu +++ b/modules/gpu/src/cuda/hough.cu @@ -57,7 +57,7 @@ namespace cv { namespace gpu { namespace device __global__ void buildPointList(const DevMem2Db src, unsigned int* list) { - __shared__ int s_queues[4][32 * PIXELS_PER_THREAD]; + __shared__ unsigned int s_queues[4][32 * PIXELS_PER_THREAD]; __shared__ int s_qsize[4]; __shared__ int s_globStart[4]; @@ -211,8 +211,6 @@ namespace cv { namespace gpu { namespace device const dim3 block(has20 ? 1024 : 512); const dim3 grid(accum.rows - 2); - cudaSafeCall( cudaFuncSetCacheConfig(linesAccumShared, cudaFuncCachePreferShared) ); - size_t smemSize = (accum.cols - 1) * sizeof(int); if (smemSize < sharedMemPerBlock - 1000) @@ -230,28 +228,19 @@ namespace cv { namespace gpu { namespace device __global__ void linesGetResult(const DevMem2Di accum, float2* out, int* votes, const int maxSize, const float rho, const float theta, const float threshold, const int numrho) { - __shared__ int smem[8][32]; - - const int x = blockIdx.x * (blockDim.x - 2) + threadIdx.x; - const int y = blockIdx.y * (blockDim.y - 2) + threadIdx.y; + const int r = blockIdx.x * blockDim.x + threadIdx.x; + const int n = blockIdx.y * blockDim.y + threadIdx.y; - if (x >= accum.cols || y >= accum.rows) + if (r >= accum.cols - 2 && n >= accum.rows - 2) return; - smem[threadIdx.y][threadIdx.x] = accum(y, x); - __syncthreads(); - - const int r = x - 1; - const int n = y - 1; + const int curVotes = accum(n + 1, r + 1); - if (threadIdx.x == 0 || threadIdx.x == blockDim.x - 1 || threadIdx.y == 0 || threadIdx.y == blockDim.y - 1 || r >= accum.cols - 2 || n >= accum.rows - 2) - return; - - if (smem[threadIdx.y][threadIdx.x] > threshold && - smem[threadIdx.y][threadIdx.x] > smem[threadIdx.y - 1][threadIdx.x] && - smem[threadIdx.y][threadIdx.x] >= smem[threadIdx.y + 1][threadIdx.x] && - smem[threadIdx.y][threadIdx.x] > smem[threadIdx.y][threadIdx.x - 1] && - smem[threadIdx.y][threadIdx.x] >= smem[threadIdx.y][threadIdx.x + 1]) + if (curVotes > threshold && + curVotes > accum(n + 1, r) && + curVotes >= accum(n + 1, r + 2) && + curVotes > accum(n, r + 1) && + curVotes >= accum(n + 2, r + 1)) { const float radius = (r - (numrho - 1) * 0.5f) * rho; const float angle = n * theta; @@ -260,7 +249,7 @@ namespace cv { namespace gpu { namespace device if (ind < maxSize) { out[ind] = make_float2(radius, angle); - votes[ind] = smem[threadIdx.y][threadIdx.x]; + votes[ind] = curVotes; } } } @@ -273,7 +262,9 @@ namespace cv { namespace gpu { namespace device cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) ); const dim3 block(32, 8); - const dim3 grid(divUp(accum.cols, block.x - 2), divUp(accum.rows, block.y - 2)); + const dim3 grid(divUp(accum.cols - 2, block.x), divUp(accum.rows - 2, block.y)); + + cudaSafeCall( cudaFuncSetCacheConfig(linesGetResult, cudaFuncCachePreferL1) ); linesGetResult<<>>(accum, out, votes, maxSize, rho, theta, threshold, accum.cols - 2); cudaSafeCall( cudaGetLastError() ); @@ -294,5 +285,202 @@ namespace cv { namespace gpu { namespace device return totalCount; } + + //////////////////////////////////////////////////////////////////////// + // circlesAccumCenters + + __global__ void circlesAccumCenters(const unsigned int* list, const int count, const PtrStepi dx, const PtrStepi dy, + PtrStepi accum, const int width, const int height, const int minRadius, const int maxRadius, const float idp) + { + const int SHIFT = 10; + const int ONE = 1 << SHIFT; + + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + + if (tid >= count) + return; + + const unsigned int val = list[tid]; + + const int x = (val & 0xFFFF); + const int y = (val >> 16) & 0xFFFF; + + const int vx = dx(y, x); + const int vy = dy(y, x); + + if (vx == 0 && vy == 0) + return; + + const float mag = ::sqrtf(vx * vx + vy * vy); + + const int x0 = __float2int_rn((x * idp) * ONE); + const int y0 = __float2int_rn((y * idp) * ONE); + + int sx = __float2int_rn((vx * idp) * ONE / mag); + int sy = __float2int_rn((vy * idp) * ONE / mag); + + // Step from minRadius to maxRadius in both directions of the gradient + for (int k1 = 0; k1 < 2; ++k1) + { + int x1 = x0 + minRadius * sx; + int y1 = y0 + minRadius * sy; + + for (int r = minRadius; r <= maxRadius; x1 += sx, y1 += sy, ++r) + { + const int x2 = x1 >> SHIFT; + const int y2 = y1 >> SHIFT; + + if (x2 < 0 || x2 >= width || y2 < 0 || y2 >= height) + break; + + ::atomicAdd(accum.ptr(y2 + 1) + x2 + 1, 1); + } + + sx = -sx; + sy = -sy; + } + } + + void circlesAccumCenters_gpu(const unsigned int* list, int count, PtrStepi dx, PtrStepi dy, DevMem2Di accum, int minRadius, int maxRadius, float idp) + { + const dim3 block(256); + const dim3 grid(divUp(count, block.x)); + + cudaSafeCall( cudaFuncSetCacheConfig(circlesAccumCenters, cudaFuncCachePreferL1) ); + + circlesAccumCenters<<>>(list, count, dx, dy, accum, accum.cols - 2, accum.rows - 2, minRadius, maxRadius, idp); + cudaSafeCall( cudaGetLastError() ); + + cudaSafeCall( cudaDeviceSynchronize() ); + } + + //////////////////////////////////////////////////////////////////////// + // buildCentersList + + __global__ void buildCentersList(const DevMem2Di accum, unsigned int* centers, const int threshold) + { + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; + + if (x < accum.cols - 2 && y < accum.rows - 2) + { + const int top = accum(y, x + 1); + + const int left = accum(y + 1, x); + const int cur = accum(y + 1, x + 1); + const int right = accum(y + 1, x + 2); + + const int bottom = accum(y + 2, x + 1); + + if (cur > threshold && cur > top && cur >= bottom && cur > left && cur >= right) + { + const unsigned int val = (y << 16) | x; + const int idx = ::atomicAdd(&g_counter, 1); + centers[idx] = val; + } + } + } + + int buildCentersList_gpu(DevMem2Di accum, unsigned int* centers, int threshold) + { + void* counterPtr; + cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) ); + + cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) ); + + const dim3 block(32, 8); + const dim3 grid(divUp(accum.cols - 2, block.x), divUp(accum.rows - 2, block.y)); + + cudaSafeCall( cudaFuncSetCacheConfig(buildCentersList, cudaFuncCachePreferL1) ); + + buildCentersList<<>>(accum, centers, threshold); + cudaSafeCall( cudaGetLastError() ); + + cudaSafeCall( cudaDeviceSynchronize() ); + + int totalCount; + cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) ); + + return totalCount; + } + + //////////////////////////////////////////////////////////////////////// + // circlesAccumRadius + + __global__ void circlesAccumRadius(const unsigned int* centers, const unsigned int* list, const int count, + float3* circles, const int maxCircles, const float dp, + const int minRadius, const int maxRadius, const int histSize, const int threshold) + { + extern __shared__ int smem[]; + + for (int i = threadIdx.x; i < histSize + 2; i += blockDim.x) + smem[i] = 0; + __syncthreads(); + + unsigned int val = centers[blockIdx.x]; + + float cx = (val & 0xFFFF); + float cy = (val >> 16) & 0xFFFF; + + cx = (cx + 0.5f) * dp; + cy = (cy + 0.5f) * dp; + + for (int i = threadIdx.x; i < count; i += blockDim.x) + { + val = list[i]; + + const int x = (val & 0xFFFF); + const int y = (val >> 16) & 0xFFFF; + + const float rad = ::sqrtf((cx - x) * (cx - x) + (cy - y) * (cy - y)); + if (rad >= minRadius && rad <= maxRadius) + { + const int r = __float2int_rn(rad - minRadius); + + Emulation::smem::atomicAdd(&smem[r + 1], 1); + } + } + + __syncthreads(); + + for (int i = threadIdx.x; i < histSize; i += blockDim.x) + { + const int curVotes = smem[i + 1]; + + if (curVotes >= threshold && curVotes > smem[i] && curVotes >= smem[i + 2]) + { + const int ind = ::atomicAdd(&g_counter, 1); + if (ind < maxCircles) + circles[ind] = make_float3(cx, cy, i + minRadius); + } + } + } + + int circlesAccumRadius_gpu(const unsigned int* centers, int centersCount, const unsigned int* list, int count, + float3* circles, int maxCircles, float dp, int minRadius, int maxRadius, int threshold, bool has20) + { + void* counterPtr; + cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) ); + + cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) ); + + const dim3 block(has20 ? 1024 : 512); + const dim3 grid(centersCount); + + const int histSize = ::ceil(maxRadius - minRadius + 1); + size_t smemSize = (histSize + 2) * sizeof(int); + + circlesAccumRadius<<>>(centers, list, count, circles, maxCircles, dp, minRadius, maxRadius, histSize, threshold); + cudaSafeCall( cudaGetLastError() ); + + cudaSafeCall( cudaDeviceSynchronize() ); + + int totalCount; + cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) ); + + totalCount = ::min(totalCount, maxCircles); + + return totalCount; + } } }}} diff --git a/modules/gpu/src/hough.cpp b/modules/gpu/src/hough.cpp index b2007ec..82396e3 100644 --- a/modules/gpu/src/hough.cpp +++ b/modules/gpu/src/hough.cpp @@ -44,12 +44,14 @@ #if !defined (HAVE_CUDA) -void cv::gpu::HoughLinesTransform(const GpuMat&, GpuMat&, GpuMat&, float, float) { throw_nogpu(); } -void cv::gpu::HoughLinesGet(const GpuMat&, GpuMat&, float, float, int, bool, int) { throw_nogpu(); } void cv::gpu::HoughLines(const GpuMat&, GpuMat&, float, float, int, bool, int) { throw_nogpu(); } -void cv::gpu::HoughLines(const GpuMat&, GpuMat&, GpuMat&, GpuMat&, float, float, int, bool, int) { throw_nogpu(); } +void cv::gpu::HoughLines(const GpuMat&, GpuMat&, HoughLinesBuf&, float, float, int, bool, int) { throw_nogpu(); } void cv::gpu::HoughLinesDownload(const GpuMat&, OutputArray, OutputArray) { throw_nogpu(); } +void cv::gpu::HoughCircles(const GpuMat&, GpuMat&, int, float, float, int, int, int, int, int) { throw_nogpu(); } +void cv::gpu::HoughCircles(const GpuMat&, GpuMat&, HoughCirclesBuf&, int, float, float, int, int, int, int, int) { throw_nogpu(); } +void cv::gpu::HoughCirclesDownload(const GpuMat&, OutputArray) { throw_nogpu(); } + #else /* !defined (HAVE_CUDA) */ namespace cv { namespace gpu { namespace device @@ -60,6 +62,11 @@ namespace cv { namespace gpu { namespace device void linesAccum_gpu(const unsigned int* list, int count, DevMem2Di accum, float rho, float theta, size_t sharedMemPerBlock, bool has20); int linesGetResult_gpu(DevMem2Di accum, float2* out, int* votes, int maxSize, float rho, float theta, float threshold, bool doSort); + + void circlesAccumCenters_gpu(const unsigned int* list, int count, PtrStepi dx, PtrStepi dy, DevMem2Di accum, int minRadius, int maxRadius, float idp); + int buildCentersList_gpu(DevMem2Di accum, unsigned int* centers, int threshold); + int circlesAccumRadius_gpu(const unsigned int* centers, int centersCount, const unsigned int* list, int count, + float3* circles, int maxCircles, float dp, int minRadius, int maxRadius, int threshold, bool has20); } }}} @@ -68,17 +75,11 @@ namespace cv { namespace gpu { namespace device void cv::gpu::HoughLines(const GpuMat& src, GpuMat& lines, float rho, float theta, int threshold, bool doSort, int maxLines) { - GpuMat accum, buf; - HoughLines(src, lines, accum, buf, rho, theta, threshold, doSort, maxLines); -} - -void cv::gpu::HoughLines(const GpuMat& src, GpuMat& lines, GpuMat& accum, GpuMat& buf, float rho, float theta, int threshold, bool doSort, int maxLines) -{ - HoughLinesTransform(src, accum, buf, rho, theta); - HoughLinesGet(accum, lines, rho, theta, threshold, doSort, maxLines); + HoughLinesBuf buf; + HoughLines(src, lines, buf, rho, theta, threshold, doSort, maxLines); } -void cv::gpu::HoughLinesTransform(const GpuMat& src, GpuMat& accum, GpuMat& buf, float rho, float theta) +void cv::gpu::HoughLines(const GpuMat& src, GpuMat& lines, HoughLinesBuf& buf, float rho, float theta, int threshold, bool doSort, int maxLines) { using namespace cv::gpu::device::hough; @@ -86,36 +87,31 @@ void cv::gpu::HoughLinesTransform(const GpuMat& src, GpuMat& accum, GpuMat& buf, CV_Assert(src.cols < std::numeric_limits::max()); CV_Assert(src.rows < std::numeric_limits::max()); - ensureSizeIsEnough(1, src.size().area(), CV_32SC1, buf); + ensureSizeIsEnough(1, src.size().area(), CV_32SC1, buf.list); + unsigned int* srcPoints = buf.list.ptr(); - const int count = buildPointList_gpu(src, buf.ptr()); + const int pointsCount = buildPointList_gpu(src, srcPoints); + if (pointsCount == 0) + { + lines.release(); + return; + } const int numangle = cvRound(CV_PI / theta); const int numrho = cvRound(((src.cols + src.rows) * 2 + 1) / rho); - CV_Assert(numangle > 0 && numrho > 0); - ensureSizeIsEnough(numangle + 2, numrho + 2, CV_32SC1, accum); - accum.setTo(Scalar::all(0)); + ensureSizeIsEnough(numangle + 2, numrho + 2, CV_32SC1, buf.accum); + buf.accum.setTo(Scalar::all(0)); DeviceInfo devInfo; - - if (count > 0) - linesAccum_gpu(buf.ptr(), count, accum, rho, theta, devInfo.sharedMemPerBlock(), devInfo.supports(FEATURE_SET_COMPUTE_20)); -} - -void cv::gpu::HoughLinesGet(const GpuMat& accum, GpuMat& lines, float rho, float theta, int threshold, bool doSort, int maxLines) -{ - using namespace cv::gpu::device::hough; - - CV_Assert(accum.type() == CV_32SC1); + linesAccum_gpu(srcPoints, pointsCount, buf.accum, rho, theta, devInfo.sharedMemPerBlock(), devInfo.supports(FEATURE_SET_COMPUTE_20)); ensureSizeIsEnough(2, maxLines, CV_32FC2, lines); - int count = linesGetResult_gpu(accum, lines.ptr(0), lines.ptr(1), maxLines, rho, theta, (float)threshold, doSort); - - if (count > 0) - lines.cols = count; + int linesCount = linesGetResult_gpu(buf.accum, lines.ptr(0), lines.ptr(1), maxLines, rho, theta, threshold, doSort); + if (linesCount > 0) + lines.cols = linesCount; else lines.release(); } @@ -145,4 +141,155 @@ void cv::gpu::HoughLinesDownload(const GpuMat& d_lines, OutputArray h_lines_, Ou } } +////////////////////////////////////////////////////////// +// HoughCircles + +void cv::gpu::HoughCircles(const GpuMat& src, GpuMat& circles, int method, float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles) +{ + HoughCirclesBuf buf; + HoughCircles(src, circles, buf, method, dp, minDist, cannyThreshold, votesThreshold, minRadius, maxRadius, maxCircles); +} + +void cv::gpu::HoughCircles(const GpuMat& src, GpuMat& circles, HoughCirclesBuf& buf, int method, + float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles) +{ + using namespace cv::gpu::device::hough; + + CV_Assert(src.type() == CV_8UC1); + CV_Assert(src.cols < std::numeric_limits::max()); + CV_Assert(src.rows < std::numeric_limits::max()); + CV_Assert(method == CV_HOUGH_GRADIENT); + CV_Assert(dp > 0); + CV_Assert(minRadius > 0 && maxRadius > minRadius); + CV_Assert(cannyThreshold > 0); + CV_Assert(votesThreshold > 0); + CV_Assert(maxCircles > 0); + + const float idp = 1.0f / dp; + + cv::gpu::Canny(src, buf.cannyBuf, buf.edges, std::max(cannyThreshold / 2, 1), cannyThreshold); + + ensureSizeIsEnough(2, src.size().area(), CV_32SC1, buf.list); + unsigned int* srcPoints = buf.list.ptr(0); + unsigned int* centers = buf.list.ptr(1); + + const int pointsCount = buildPointList_gpu(buf.edges, srcPoints); + if (pointsCount == 0) + { + circles.release(); + return; + } + + ensureSizeIsEnough(cvCeil(src.rows * idp) + 2, cvCeil(src.cols * idp) + 2, CV_32SC1, buf.accum); + buf.accum.setTo(Scalar::all(0)); + + circlesAccumCenters_gpu(srcPoints, pointsCount, buf.cannyBuf.dx, buf.cannyBuf.dy, buf.accum, minRadius, maxRadius, idp); + + int centersCount = buildCentersList_gpu(buf.accum, centers, votesThreshold); + if (centersCount == 0) + { + circles.release(); + return; + } + + if (minDist > 1) + { + cv::AutoBuffer oldBuf_(centersCount); + cv::AutoBuffer newBuf_(centersCount); + int newCount = 0; + + ushort2* oldBuf = oldBuf_; + ushort2* newBuf = newBuf_; + + cudaSafeCall( cudaMemcpy(oldBuf, centers, centersCount * sizeof(ushort2), cudaMemcpyDeviceToHost) ); + + const int cellSize = cvRound(minDist); + const int gridWidth = (src.cols + cellSize - 1) / cellSize; + const int gridHeight = (src.rows + cellSize - 1) / cellSize; + + std::vector< std::vector > grid(gridWidth * gridHeight); + + minDist *= minDist; + + for (int i = 0; i < centersCount; ++i) + { + ushort2 p = oldBuf[i]; + + bool good = true; + + int xCell = static_cast(p.x / cellSize); + int yCell = static_cast(p.y / cellSize); + + int x1 = xCell - 1; + int y1 = yCell - 1; + int x2 = xCell + 1; + int y2 = yCell + 1; + + // boundary check + x1 = std::max(0, x1); + y1 = std::max(0, y1); + x2 = std::min(gridWidth - 1, x2); + y2 = std::min(gridHeight - 1, y2); + + for (int yy = y1; yy <= y2; ++yy) + { + for (int xx = x1; xx <= x2; ++xx) + { + vector& m = grid[yy * gridWidth + xx]; + + for(size_t j = 0; j < m.size(); ++j) + { + float dx = p.x - m[j].x; + float dy = p.y - m[j].y; + + if (dx * dx + dy * dy < minDist) + { + good = false; + goto break_out; + } + } + } + } + + break_out: + + if(good) + { + grid[yCell * gridWidth + xCell].push_back(p); + + newBuf[newCount++] = p; + } + } + + cudaSafeCall( cudaMemcpy(centers, newBuf, newCount * sizeof(unsigned int), cudaMemcpyHostToDevice) ); + centersCount = newCount; + } + + ensureSizeIsEnough(1, maxCircles, CV_32FC3, circles); + + DeviceInfo devInfo; + const int circlesCount = circlesAccumRadius_gpu(centers, centersCount, srcPoints, pointsCount, circles.ptr(), maxCircles, + dp, minRadius, maxRadius, votesThreshold, devInfo.supports(FEATURE_SET_COMPUTE_20)); + + if (circlesCount > 0) + circles.cols = circlesCount; + else + circles.release(); +} + +void cv::gpu::HoughCirclesDownload(const GpuMat& d_circles, cv::OutputArray h_circles_) +{ + if (d_circles.empty()) + { + h_circles_.release(); + return; + } + + CV_Assert(d_circles.rows == 1 && d_circles.type() == CV_32FC3); + + h_circles_.create(1, d_circles.cols, CV_32FC3); + Mat h_circles = h_circles_.getMat(); + d_circles.download(h_circles); +} + #endif /* !defined (HAVE_CUDA) */ diff --git a/modules/gpu/test/test_imgproc.cpp b/modules/gpu/test/test_imgproc.cpp index 06662d8..2b1f55d 100644 --- a/modules/gpu/test/test_imgproc.cpp +++ b/modules/gpu/test/test_imgproc.cpp @@ -1131,7 +1131,7 @@ INSTANTIATE_TEST_CASE_P(GPU_ImgProc, CornerMinEigen, testing::Combine( PARAM_TEST_CASE(HoughLines, cv::gpu::DeviceInfo, cv::Size, UseRoi) { - void generateLines(cv::Mat& img) + static void generateLines(cv::Mat& img) { img.setTo(cv::Scalar::all(0)); @@ -1141,7 +1141,7 @@ PARAM_TEST_CASE(HoughLines, cv::gpu::DeviceInfo, cv::Size, UseRoi) cv::line(img, cv::Point(img.cols, 0), cv::Point(0, img.rows), cv::Scalar::all(255)); } - void drawLines(cv::Mat& dst, const std::vector& lines) + static void drawLines(cv::Mat& dst, const std::vector& lines) { dst.setTo(cv::Scalar::all(0)); @@ -1191,6 +1191,77 @@ INSTANTIATE_TEST_CASE_P(GPU_ImgProc, HoughLines, testing::Combine( DIFFERENT_SIZES, WHOLE_SUBMAT)); +/////////////////////////////////////////////////////////////////////////////////////////////////////// +// HoughCircles + +PARAM_TEST_CASE(HoughCircles, cv::gpu::DeviceInfo, cv::Size, UseRoi) +{ + static void drawCircles(cv::Mat& dst, const std::vector& circles, bool fill) + { + dst.setTo(cv::Scalar::all(0)); + + for (size_t i = 0; i < circles.size(); ++i) + cv::circle(dst, cv::Point(circles[i][0], circles[i][1]), circles[i][2], cv::Scalar::all(255), fill ? -1 : 1); + } +}; + +TEST_P(HoughCircles, Accuracy) +{ + const cv::gpu::DeviceInfo devInfo = GET_PARAM(0); + cv::gpu::setDevice(devInfo.deviceID()); + const cv::Size size = GET_PARAM(1); + const bool useRoi = GET_PARAM(2); + + const float dp = 2.0f; + const float minDist = 10.0f; + const int minRadius = 10; + const int maxRadius = 20; + const int cannyThreshold = 100; + const int votesThreshold = 20; + + std::vector circles_gold(4); + circles_gold[0] = cv::Vec3f(20, 20, minRadius); + circles_gold[1] = cv::Vec3f(90, 87, minRadius + 3); + circles_gold[2] = cv::Vec3f(30, 70, minRadius + 8); + circles_gold[3] = cv::Vec3f(80, 10, maxRadius); + + cv::Mat src(size, CV_8UC1); + drawCircles(src, circles_gold, true); + + cv::gpu::GpuMat d_circles; + cv::gpu::HoughCircles(loadMat(src, useRoi), d_circles, CV_HOUGH_GRADIENT, dp, minDist, cannyThreshold, votesThreshold, minRadius, maxRadius); + + std::vector circles; + cv::gpu::HoughCirclesDownload(d_circles, circles); + + ASSERT_FALSE(circles.empty()); + + for (size_t i = 0; i < circles.size(); ++i) + { + cv::Vec3f cur = circles[i]; + + bool found = false; + + for (size_t j = 0; j < circles_gold.size(); ++j) + { + cv::Vec3f gold = circles_gold[j]; + + if (std::fabs(cur[0] - gold[0]) < minDist && std::fabs(cur[1] - gold[1]) < minDist && std::fabs(cur[2] - gold[2]) < minDist) + { + found = true; + break; + } + } + + ASSERT_TRUE(found); + } +} + +INSTANTIATE_TEST_CASE_P(GPU_ImgProc, HoughCircles, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + WHOLE_SUBMAT)); + } // namespace #endif // HAVE_CUDA -- 2.7.4