From 51433a933168cdf75209f4d9d38d73813f2867b0 Mon Sep 17 00:00:00 2001 From: "marina.kolpakova" Date: Mon, 6 Aug 2012 15:08:27 +0400 Subject: [PATCH] C.C. Labeling: added stream support --- modules/gpu/include/opencv2/gpu/gpu.hpp | 2 +- modules/gpu/src/cuda/ccomponetns.cu | 22 ++++++++++++---------- modules/gpu/src/graphcuts.cpp | 25 +++++++++++++++++++------ modules/gpu/test/test_labeling.cpp | 17 ----------------- 4 files changed, 32 insertions(+), 34 deletions(-) diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp index b1fd24e..a2506f1 100644 --- a/modules/gpu/include/opencv2/gpu/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu/gpu.hpp @@ -918,7 +918,7 @@ CV_EXPORTS void graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTra GpuMat& buf, Stream& stream = Stream::Null()); //! performs connected componnents labeling. -CV_EXPORTS void labelComponents(const GpuMat& image, GpuMat& mask, GpuMat& components, const cv::Scalar& lo, const cv::Scalar& hi); +CV_EXPORTS void labelComponents(const GpuMat& image, GpuMat& mask, GpuMat& components, const cv::Scalar& lo, const cv::Scalar& hi, Stream& stream = Stream::Null()); ////////////////////////////////// Histograms ////////////////////////////////// diff --git a/modules/gpu/src/cuda/ccomponetns.cu b/modules/gpu/src/cuda/ccomponetns.cu index 1ed5ebb..fbc68cb 100644 --- a/modules/gpu/src/cuda/ccomponetns.cu +++ b/modules/gpu/src/cuda/ccomponetns.cu @@ -83,7 +83,7 @@ namespace cv { namespace gpu { namespace device }; template - __global__ void computeComponents(const DevMem2D image, DevMem2D components, F connected) + __global__ void computeConnectivity(const DevMem2D image, DevMem2D components, F connected) { int x = threadIdx.x + blockIdx.x * blockDim.x; int y = threadIdx.y + blockIdx.y * blockDim.y; @@ -108,15 +108,16 @@ namespace cv { namespace gpu { namespace device components(y, x) = c; } - void computeEdges(const DevMem2D& image, DevMem2D components, const int lo, const int hi) + void computeEdges(const DevMem2D& image, DevMem2D edges, const int lo, const int hi, cudaStream_t stream) { dim3 block(CTA_SIZE_X, CTA_SIZE_Y); dim3 grid(divUp(image.cols, block.x), divUp(image.rows, block.y)); InInterval inInt(lo, hi); - computeComponents ><<>>(image, components, inInt); + computeConnectivity ><<>>(image, edges, inInt); cudaSafeCall( cudaGetLastError() ); - cudaSafeCall( cudaDeviceSynchronize() ); + if (stream == 0) + cudaSafeCall( cudaDeviceSynchronize() ); } __global__ void lableTiles(const DevMem2D edges, DevMem2Di comps) @@ -384,25 +385,25 @@ namespace cv { namespace gpu { namespace device comps(y, x) = root(comps, comps(y, x)); } - void labelComponents(const DevMem2D& edges, DevMem2Di comps) + void labelComponents(const DevMem2D& edges, DevMem2Di comps, cudaStream_t stream) { dim3 block(CTA_SIZE_X, CTA_SIZE_Y); dim3 grid(divUp(edges.cols, TILE_COLS), divUp(edges.rows, TILE_ROWS)); - lableTiles<<>>(edges, comps); + lableTiles<<>>(edges, comps); cudaSafeCall( cudaGetLastError() ); int tileSizeX = TILE_COLS, tileSizeY = TILE_ROWS; cudaSafeCall( cudaGetLastError() ); - cudaSafeCall( cudaDeviceSynchronize() ); + // cudaSafeCall( cudaDeviceSynchronize() ); while (grid.x > 1 || grid.y > 1) { dim3 mergeGrid(ceilf(grid.x / 2.0), ceilf(grid.y / 2.0)); dim3 mergeBlock(STA_SIZE_MARGE_X, STA_SIZE_MARGE_Y); std::cout << "merging: " << grid.y << " x " << grid.x << " ---> " << mergeGrid.y << " x " << mergeGrid.x << " for tiles: " << tileSizeY << " x " << tileSizeX << std::endl; - crossMerge<<>>(2, 2, tileSizeY, tileSizeX, edges, comps, ceilf(grid.y / 2.0) - grid.y / 2, ceilf(grid.x / 2.0) - grid.x / 2); + crossMerge<<>>(2, 2, tileSizeY, tileSizeX, edges, comps, ceilf(grid.y / 2.0) - grid.y / 2, ceilf(grid.x / 2.0) - grid.x / 2); tileSizeX <<= 1; tileSizeY <<= 1; grid = mergeGrid; @@ -412,9 +413,10 @@ namespace cv { namespace gpu { namespace device grid.x = divUp(edges.cols, block.x); grid.y = divUp(edges.rows, block.y); - flatten<<>>(edges, comps); + flatten<<>>(edges, comps); cudaSafeCall( cudaGetLastError() ); - cudaSafeCall( cudaDeviceSynchronize() ); + if (stream == 0) + cudaSafeCall( cudaDeviceSynchronize() ); } } } } } \ No newline at end of file diff --git a/modules/gpu/src/graphcuts.cpp b/modules/gpu/src/graphcuts.cpp index d0af3e1..3ea9d3b 100644 --- a/modules/gpu/src/graphcuts.cpp +++ b/modules/gpu/src/graphcuts.cpp @@ -47,7 +47,7 @@ void cv::gpu::graphcut(GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); } void cv::gpu::graphcut(GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); } -void cv::gpu::labelComponents(const GpuMat& image, GpuMat& mask, GpuMat& components, const cv::Scalar& lo, const cv::Scalar& hi) { throw_nogpu(); } +void cv::gpu::labelComponents(const GpuMat&, GpuMat&, GpuMat&, const cv::Scalar&, const cv::Scalar&, Stream&) { throw_nogpu(); } #else /* !defined (HAVE_CUDA) */ @@ -55,15 +55,28 @@ namespace cv { namespace gpu { namespace device { namespace ccl { - void labelComponents(const DevMem2D& edges, DevMem2Di comps); - void computeEdges(const DevMem2D& image, DevMem2D edges, const int lo, const int hi); + void labelComponents(const DevMem2D& edges, DevMem2Di comps, cudaStream_t stream); + void computeEdges(const DevMem2D& image, DevMem2D edges, const int lo, const int hi, cudaStream_t stream); } }}} -void cv::gpu::labelComponents(const GpuMat& image, GpuMat& mask, GpuMat& components, const cv::Scalar& lo, const cv::Scalar& hi) +void cv::gpu::labelComponents(const GpuMat& image, GpuMat& mask, GpuMat& components, const cv::Scalar& lo, const cv::Scalar& hi, Stream& s) { - device::ccl::computeEdges(image, mask, lo[0], hi[0]); - device::ccl::labelComponents(mask, components); + CV_Assert(!image.empty()); + + int type = image.type(); + CV_Assert(type == CV_8UC1); + + if (image.size() != mask.size() || mask.type() != CV_8UC1) + mask.create(image.size(), CV_8UC1); + + if (image.size() != components.size() || components.type() != CV_32SC1) + components.create(image.size(), CV_32SC1); + + cudaStream_t stream = StreamAccessor::getStream(s); + + device::ccl::computeEdges(image, mask, lo[0], hi[0], stream); + device::ccl::labelComponents(mask, components, stream); } diff --git a/modules/gpu/test/test_labeling.cpp b/modules/gpu/test/test_labeling.cpp index 1c0f2ac..2ff010d 100644 --- a/modules/gpu/test/test_labeling.cpp +++ b/modules/gpu/test/test_labeling.cpp @@ -63,9 +63,6 @@ TEST_P(Labeling, ConnectedComponents) { cv::Mat image; cvtColor(loat_image(), image, CV_BGR2GRAY); - cv::Mat image_cpu = image.clone(); - - // cv::floodFill(image, cv::Point(1,1),cv::Scalar::all(64), 0, cv::Scalar::all(0), cv::Scalar::all(256)); cv::gpu::GpuMat mask; mask.create(image.rows, image.cols, CV_8UC1); @@ -73,21 +70,8 @@ TEST_P(Labeling, ConnectedComponents) cv::gpu::GpuMat components; components.create(image.rows, image.cols, CV_32SC1); - std::cout << "summary: " << image.cols << " " << image.rows << " " - << cv::gpu::GpuMat(image).cols << " " << cv::gpu::GpuMat(image).rows<< " " - << mask.cols << " " << mask.rows<< " " - << components.cols << " " << components.rows<< std::endl; - - cv::gpu::labelComponents(cv::gpu::GpuMat(image), mask, components, cv::Scalar::all(0), cv::Scalar::all(2)); - // // for(int i = 0; i + 32 < image.rows; i += 32) - // // for(int j = 0; j + 32 < image.cols; j += 32) - // // { - // // std::cout << cv::Mat(cv::Mat(mask), cv::Rect(j, i, 32, 32 ))<< std::endl; - // // std::cout << cv::Mat(cv::Mat(components), cv::Rect(j, i, 32, 32 )) << std::endl; - // // } - // std::cout << cv::Mat(components) << std::endl; // cv::imshow("test", image); // cv::waitKey(0); @@ -102,7 +86,6 @@ TEST_P(Labeling, ConnectedComponents) cv::waitKey(0); cv::imshow("test", cv::Mat(components) * 2); cv::waitKey(0); - std::cout << "test! " << image.cols << std::endl; } INSTANTIATE_TEST_CASE_P(ConnectedComponents, Labeling, ALL_DEVICES); \ No newline at end of file -- 2.7.4