From 12b7f3a0d08bcb1560ffce0c18fd11e97dfadef1 Mon Sep 17 00:00:00 2001 From: Alexey Spizhevoy Date: Tue, 28 Sep 2010 13:11:21 +0000 Subject: [PATCH] fixed some compile-time problems (under Ubuntu) --- modules/gpu/src/cuda/split_merge.cu | 213 ++++++++++++++++++------------------ modules/gpu/src/split_merge.cpp | 10 +- tests/gpu/src/imgproc_gpu.cpp | 4 +- 3 files changed, 115 insertions(+), 112 deletions(-) diff --git a/modules/gpu/src/cuda/split_merge.cu b/modules/gpu/src/cuda/split_merge.cu index b3b0c55..3788b22 100644 --- a/modules/gpu/src/cuda/split_merge.cu +++ b/modules/gpu/src/cuda/split_merge.cu @@ -94,73 +94,7 @@ namespace cv { namespace gpu { namespace split_merge { typedef void (*SplitFunction)(const DevMem2D& src, DevMem2D* dst, const cudaStream_t& stream); //------------------------------------------------------------ - // Merge - - template - static void mergeC2_(const DevMem2D* src, DevMem2D& dst, const cudaStream_t& stream) - { - dim3 blockDim(32, 8); - dim3 gridDim(divUp(dst.cols, blockDim.x), divUp(dst.rows, blockDim.y)); - mergeC2_<<>>( - src[0].ptr, src[0].step, - src[1].ptr, src[1].step, - dst.rows, dst.cols, dst.ptr, dst.step); - if (stream == 0) - cudaSafeCall(cudaThreadSynchronize()); - } - - - template - static void mergeC3_(const DevMem2D* src, DevMem2D& dst, const cudaStream_t& stream) - { - dim3 blockDim(32, 8); - dim3 gridDim(divUp(dst.cols, blockDim.x), divUp(dst.rows, blockDim.y)); - mergeC3_<<>>( - src[0].ptr, src[0].step, - src[1].ptr, src[1].step, - src[2].ptr, src[2].step, - dst.rows, dst.cols, dst.ptr, dst.step); - if (stream == 0) - cudaSafeCall(cudaThreadSynchronize()); - } - - - template - static void mergeC4_(const DevMem2D* src, DevMem2D& dst, const cudaStream_t& stream) - { - dim3 blockDim(32, 8); - dim3 gridDim(divUp(dst.cols, blockDim.x), divUp(dst.rows, blockDim.y)); - mergeC4_<<>>( - src[0].ptr, src[0].step, - src[1].ptr, src[1].step, - src[2].ptr, src[2].step, - src[3].ptr, src[3].step, - dst.rows, dst.cols, dst.ptr, dst.step); - if (stream == 0) - cudaSafeCall(cudaThreadSynchronize()); - } - - - extern "C" void merge_caller(const DevMem2D* src, DevMem2D& dst, - int total_channels, int elem_size, - const cudaStream_t& stream) - { - static MergeFunction merge_func_tbl[] = - { - mergeC2_, mergeC2_, mergeC2_, 0, mergeC2_, - mergeC3_, mergeC3_, mergeC3_, 0, mergeC3_, - mergeC4_, mergeC4_, mergeC4_, 0, mergeC4_, - }; - - int merge_func_id = (total_channels - 2) * 5 + (elem_size >> 1); - MergeFunction merge_func = merge_func_tbl[merge_func_id]; - - if (merge_func == 0) - cv::gpu::error("Unsupported channel count or data type", __FILE__, __LINE__); - - merge_func(src, dst, stream); - } - + // Merge template __global__ void mergeC2_(const uchar* src0, size_t src0_step, @@ -289,76 +223,78 @@ namespace cv { namespace gpu { namespace split_merge { } } - //------------------------------------------------------------ - // Split - - template - static void splitC2_(const DevMem2D& src, DevMem2D* dst, const cudaStream_t& stream) + template + static void mergeC2_(const DevMem2D* src, DevMem2D& dst, const cudaStream_t& stream) { dim3 blockDim(32, 8); - dim3 gridDim(divUp(src.cols, blockDim.x), divUp(src.rows, blockDim.y)); - splitC2_<<>>( - src.ptr, src.step, src.rows, src.cols, - dst[0].ptr, dst[0].step, - dst[1].ptr, dst[1].step); + dim3 gridDim(divUp(dst.cols, blockDim.x), divUp(dst.rows, blockDim.y)); + mergeC2_<<>>( + src[0].ptr, src[0].step, + src[1].ptr, src[1].step, + dst.rows, dst.cols, dst.ptr, dst.step); if (stream == 0) cudaSafeCall(cudaThreadSynchronize()); } - template - static void splitC3_(const DevMem2D& src, DevMem2D* dst, const cudaStream_t& stream) + template + static void mergeC3_(const DevMem2D* src, DevMem2D& dst, const cudaStream_t& stream) { dim3 blockDim(32, 8); - dim3 gridDim(divUp(src.cols, blockDim.x), divUp(src.rows, blockDim.y)); - splitC3_<<>>( - src.ptr, src.step, src.rows, src.cols, - dst[0].ptr, dst[0].step, - dst[1].ptr, dst[1].step, - dst[2].ptr, dst[2].step); + dim3 gridDim(divUp(dst.cols, blockDim.x), divUp(dst.rows, blockDim.y)); + mergeC3_<<>>( + src[0].ptr, src[0].step, + src[1].ptr, src[1].step, + src[2].ptr, src[2].step, + dst.rows, dst.cols, dst.ptr, dst.step); if (stream == 0) cudaSafeCall(cudaThreadSynchronize()); } - template - static void splitC4_(const DevMem2D& src, DevMem2D* dst, const cudaStream_t& stream) + template + static void mergeC4_(const DevMem2D* src, DevMem2D& dst, const cudaStream_t& stream) { dim3 blockDim(32, 8); - dim3 gridDim(divUp(src.cols, blockDim.x), divUp(src.rows, blockDim.y)); - splitC4_<<>>( - src.ptr, src.step, src.rows, src.cols, - dst[0].ptr, dst[0].step, - dst[1].ptr, dst[1].step, - dst[2].ptr, dst[2].step, - dst[3].ptr, dst[3].step); + dim3 gridDim(divUp(dst.cols, blockDim.x), divUp(dst.rows, blockDim.y)); + mergeC4_<<>>( + src[0].ptr, src[0].step, + src[1].ptr, src[1].step, + src[2].ptr, src[2].step, + src[3].ptr, src[3].step, + dst.rows, dst.cols, dst.ptr, dst.step); if (stream == 0) cudaSafeCall(cudaThreadSynchronize()); } - extern "C" void split_caller(const DevMem2D& src, DevMem2D* dst, - int num_channels, int elem_size1, - const cudaStream_t& stream) + extern "C" void merge_caller(const DevMem2D* src, DevMem2D& dst, + int total_channels, int elem_size, + const cudaStream_t& stream) { - static SplitFunction split_func_tbl[] = + static MergeFunction merge_func_tbl[] = { - splitC2_, splitC2_, splitC2_, 0, splitC2_, - splitC3_, splitC3_, splitC3_, 0, splitC3_, - splitC4_, splitC4_, splitC4_, 0, splitC4_, + mergeC2_, mergeC2_, mergeC2_, 0, mergeC2_, + mergeC3_, mergeC3_, mergeC3_, 0, mergeC3_, + mergeC4_, mergeC4_, mergeC4_, 0, mergeC4_, }; - int split_func_id = (num_channels - 2) * 5 + (elem_size1 >> 1); - SplitFunction split_func = split_func_tbl[split_func_id]; + int merge_func_id = (total_channels - 2) * 5 + (elem_size >> 1); + MergeFunction merge_func = merge_func_tbl[merge_func_id]; - if (split_func == 0) + if (merge_func == 0) cv::gpu::error("Unsupported channel count or data type", __FILE__, __LINE__); - split_func(src, dst, stream); + merge_func(src, dst, stream); } + + //------------------------------------------------------------ + // Split + + template __global__ void splitC2_(const uchar* src, size_t src_step, int rows, int cols, @@ -491,4 +427,69 @@ namespace cv { namespace gpu { namespace split_merge { } } -}}} // namespace cv::gpu::split_merge \ No newline at end of file + template + static void splitC2_(const DevMem2D& src, DevMem2D* dst, const cudaStream_t& stream) + { + dim3 blockDim(32, 8); + dim3 gridDim(divUp(src.cols, blockDim.x), divUp(src.rows, blockDim.y)); + splitC2_<<>>( + src.ptr, src.step, src.rows, src.cols, + dst[0].ptr, dst[0].step, + dst[1].ptr, dst[1].step); + if (stream == 0) + cudaSafeCall(cudaThreadSynchronize()); + } + + + template + static void splitC3_(const DevMem2D& src, DevMem2D* dst, const cudaStream_t& stream) + { + dim3 blockDim(32, 8); + dim3 gridDim(divUp(src.cols, blockDim.x), divUp(src.rows, blockDim.y)); + splitC3_<<>>( + src.ptr, src.step, src.rows, src.cols, + dst[0].ptr, dst[0].step, + dst[1].ptr, dst[1].step, + dst[2].ptr, dst[2].step); + if (stream == 0) + cudaSafeCall(cudaThreadSynchronize()); + } + + + template + static void splitC4_(const DevMem2D& src, DevMem2D* dst, const cudaStream_t& stream) + { + dim3 blockDim(32, 8); + dim3 gridDim(divUp(src.cols, blockDim.x), divUp(src.rows, blockDim.y)); + splitC4_<<>>( + src.ptr, src.step, src.rows, src.cols, + dst[0].ptr, dst[0].step, + dst[1].ptr, dst[1].step, + dst[2].ptr, dst[2].step, + dst[3].ptr, dst[3].step); + if (stream == 0) + cudaSafeCall(cudaThreadSynchronize()); + } + + + extern "C" void split_caller(const DevMem2D& src, DevMem2D* dst, + int num_channels, int elem_size1, + const cudaStream_t& stream) + { + static SplitFunction split_func_tbl[] = + { + splitC2_, splitC2_, splitC2_, 0, splitC2_, + splitC3_, splitC3_, splitC3_, 0, splitC3_, + splitC4_, splitC4_, splitC4_, 0, splitC4_, + }; + + int split_func_id = (num_channels - 2) * 5 + (elem_size1 >> 1); + SplitFunction split_func = split_func_tbl[split_func_id]; + + if (split_func == 0) + cv::gpu::error("Unsupported channel count or data type", __FILE__, __LINE__); + + split_func(src, dst, stream); + } + +}}} // namespace cv::gpu::split_merge diff --git a/modules/gpu/src/split_merge.cpp b/modules/gpu/src/split_merge.cpp index 254ccbf..7dcc446 100644 --- a/modules/gpu/src/split_merge.cpp +++ b/modules/gpu/src/split_merge.cpp @@ -100,8 +100,9 @@ namespace cv { namespace gpu { namespace split_merge for(size_t i = 0; i < n; ++i) src_as_devmem[i] = src[i]; - split_merge::merge_caller(src_as_devmem, (DevMem2D)dst, - total_channels, CV_ELEM_SIZE(depth), + DevMem2D dst_as_devmem(dst); + split_merge::merge_caller(src_as_devmem, dst_as_devmem, + total_channels, CV_ELEM_SIZE(depth), stream); } } @@ -130,7 +131,8 @@ namespace cv { namespace gpu { namespace split_merge for (int i = 0; i < num_channels; ++i) dst_as_devmem[i] = dst[i]; - split_merge::split_caller((DevMem2D)src, dst_as_devmem, + DevMem2D src_as_devmem(src); + split_merge::split_caller(src_as_devmem, dst_as_devmem, num_channels, src.elemSize1(), stream); } @@ -190,4 +192,4 @@ void cv::gpu::split(const GpuMat& src, vector& dst, const Stream& stream split_merge::split(src, &dst[0], StreamAccessor::getStream(stream)); } -#endif /* !defined (HAVE_CUDA) */ \ No newline at end of file +#endif /* !defined (HAVE_CUDA) */ diff --git a/tests/gpu/src/imgproc_gpu.cpp b/tests/gpu/src/imgproc_gpu.cpp index c093ddf..9094458 100644 --- a/tests/gpu/src/imgproc_gpu.cpp +++ b/tests/gpu/src/imgproc_gpu.cpp @@ -379,7 +379,7 @@ struct CV_GpuNppImageIntegralTest : public CV_GpuImageProcTest { CV_GpuNppImageIntegralTest() : CV_GpuImageProcTest( "GPU-NppImageIntegral", "integral" ) {} - int CV_GpuNppImageIntegralTest::test(const Mat& img) + int test(const Mat& img) { if (img.type() != CV_8UC1) { @@ -554,4 +554,4 @@ CV_GpuNppImageWarpAffineTest CV_GpuNppImageWarpAffine_test; CV_GpuNppImageWarpPerspectiveTest CV_GpuNppImageWarpPerspective_test; CV_GpuNppImageIntegralTest CV_GpuNppImageIntegral_test; CV_GpuNppImageBlurTest CV_GpuNppImageBlur_test; -CV_GpuCvtColorTest CV_GpuCvtColor_test; \ No newline at end of file +CV_GpuCvtColorTest CV_GpuCvtColor_test; -- 2.7.4