From ae529f4bc6f3354bc3912ce249c38a84f453d1c8 Mon Sep 17 00:00:00 2001 From: Alexey Spizhevoy Date: Mon, 31 Jan 2011 14:37:03 +0000 Subject: [PATCH] added absSum function --- modules/gpu/include/opencv2/gpu/gpu.hpp | 8 +++ modules/gpu/src/cuda/matrix_reductions.cu | 110 ++++++++++++++++++++++++++++++ modules/gpu/src/matrix_reductions.cpp | 54 ++++++++++++++- tests/gpu/src/arithm.cpp | 26 +++++++ 4 files changed, 197 insertions(+), 1 deletion(-) diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp index 2531d1e..77f2acd 100644 --- a/modules/gpu/include/opencv2/gpu/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu/gpu.hpp @@ -766,6 +766,14 @@ namespace cv //! supports only single channel images CV_EXPORTS Scalar sum(const GpuMat& src, GpuMat& buf); + //! computes sum of array elements absolute values + //! supports only single channel images + CV_EXPORTS Scalar absSum(const GpuMat& src); + + //! computes sum of array elements absolute values + //! supports only single channel images + CV_EXPORTS Scalar absSum(const GpuMat& src, GpuMat& buf); + //! computes squared sum of array elements //! supports only single channel images CV_EXPORTS Scalar sqrSum(const GpuMat& src); diff --git a/modules/gpu/src/cuda/matrix_reductions.cu b/modules/gpu/src/cuda/matrix_reductions.cu index 6dd7dab..398e376 100644 --- a/modules/gpu/src/cuda/matrix_reductions.cu +++ b/modules/gpu/src/cuda/matrix_reductions.cu @@ -954,6 +954,12 @@ namespace cv { namespace gpu { namespace mathfunc struct IdentityOp { static __device__ R call(R x) { return x; } }; template + struct AbsOp { static __device__ R call(R x) { return abs(x); } }; + + template <> + struct AbsOp { static __device__ uint call(uint x) { return x; } }; + + template struct SqrOp { static __device__ R call(R x) { return x * x; } }; __constant__ int ctwidth; @@ -1510,6 +1516,110 @@ namespace cv { namespace gpu { namespace mathfunc template + void absSumMultipassCaller(const DevMem2D src, PtrStep buf, double* sum, int cn) + { + using namespace sums; + typedef typename SumType::R R; + + dim3 threads, grid; + estimateThreadCfg(src.cols, src.rows, threads, grid); + setKernelConsts(src.cols, src.rows, threads, grid); + + switch (cn) + { + case 1: + sumKernel, threads_x * threads_y><<>>( + src, (typename TypeVec::vec_t*)buf.ptr(0)); + sumPass2Kernel<<<1, threads_x * threads_y>>>( + (typename TypeVec::vec_t*)buf.ptr(0), grid.x * grid.y); + break; + case 2: + sumKernel_C2, threads_x * threads_y><<>>( + src, (typename TypeVec::vec_t*)buf.ptr(0)); + sumPass2Kernel_C2<<<1, threads_x * threads_y>>>( + (typename TypeVec::vec_t*)buf.ptr(0), grid.x * grid.y); + break; + case 3: + sumKernel_C3, threads_x * threads_y><<>>( + src, (typename TypeVec::vec_t*)buf.ptr(0)); + sumPass2Kernel_C3<<<1, threads_x * threads_y>>>( + (typename TypeVec::vec_t*)buf.ptr(0), grid.x * grid.y); + break; + case 4: + sumKernel_C4, threads_x * threads_y><<>>( + src, (typename TypeVec::vec_t*)buf.ptr(0)); + sumPass2Kernel_C4<<<1, threads_x * threads_y>>>( + (typename TypeVec::vec_t*)buf.ptr(0), grid.x * grid.y); + break; + } + cudaSafeCall(cudaThreadSynchronize()); + + R result[4] = {0, 0, 0, 0}; + cudaSafeCall(cudaMemcpy(result, buf.ptr(0), sizeof(R) * cn, cudaMemcpyDeviceToHost)); + + sum[0] = result[0]; + sum[1] = result[1]; + sum[2] = result[2]; + sum[3] = result[3]; + } + + template void absSumMultipassCaller(const DevMem2D, PtrStep, double*, int); + template void absSumMultipassCaller(const DevMem2D, PtrStep, double*, int); + template void absSumMultipassCaller(const DevMem2D, PtrStep, double*, int); + template void absSumMultipassCaller(const DevMem2D, PtrStep, double*, int); + template void absSumMultipassCaller(const DevMem2D, PtrStep, double*, int); + template void absSumMultipassCaller(const DevMem2D, PtrStep, double*, int); + + + template + void absSumCaller(const DevMem2D src, PtrStep buf, double* sum, int cn) + { + using namespace sums; + typedef typename SumType::R R; + + dim3 threads, grid; + estimateThreadCfg(src.cols, src.rows, threads, grid); + setKernelConsts(src.cols, src.rows, threads, grid); + + switch (cn) + { + case 1: + sumKernel, threads_x * threads_y><<>>( + src, (typename TypeVec::vec_t*)buf.ptr(0)); + break; + case 2: + sumKernel_C2, threads_x * threads_y><<>>( + src, (typename TypeVec::vec_t*)buf.ptr(0)); + break; + case 3: + sumKernel_C3, threads_x * threads_y><<>>( + src, (typename TypeVec::vec_t*)buf.ptr(0)); + break; + case 4: + sumKernel_C4, threads_x * threads_y><<>>( + src, (typename TypeVec::vec_t*)buf.ptr(0)); + break; + } + cudaSafeCall(cudaThreadSynchronize()); + + R result[4] = {0, 0, 0, 0}; + cudaSafeCall(cudaMemcpy(result, buf.ptr(0), sizeof(R) * cn, cudaMemcpyDeviceToHost)); + + sum[0] = result[0]; + sum[1] = result[1]; + sum[2] = result[2]; + sum[3] = result[3]; + } + + template void absSumCaller(const DevMem2D, PtrStep, double*, int); + template void absSumCaller(const DevMem2D, PtrStep, double*, int); + template void absSumCaller(const DevMem2D, PtrStep, double*, int); + template void absSumCaller(const DevMem2D, PtrStep, double*, int); + template void absSumCaller(const DevMem2D, PtrStep, double*, int); + template void absSumCaller(const DevMem2D, PtrStep, double*, int); + + + template void sqrSumMultipassCaller(const DevMem2D src, PtrStep buf, double* sum, int cn) { using namespace sums; diff --git a/modules/gpu/src/matrix_reductions.cpp b/modules/gpu/src/matrix_reductions.cpp index 8f4fb95..0c83271 100644 --- a/modules/gpu/src/matrix_reductions.cpp +++ b/modules/gpu/src/matrix_reductions.cpp @@ -52,6 +52,8 @@ double cv::gpu::norm(const GpuMat&, int) { throw_nogpu(); return 0.0; } double cv::gpu::norm(const GpuMat&, const GpuMat&, int) { throw_nogpu(); return 0.0; } Scalar cv::gpu::sum(const GpuMat&) { throw_nogpu(); return Scalar(); } Scalar cv::gpu::sum(const GpuMat&, GpuMat&) { throw_nogpu(); return Scalar(); } +Scalar cv::gpu::absSum(const GpuMat&) { throw_nogpu(); return Scalar(); } +Scalar cv::gpu::absSum(const GpuMat&, GpuMat&) { throw_nogpu(); return Scalar(); } Scalar cv::gpu::sqrSum(const GpuMat&) { throw_nogpu(); return Scalar(); } Scalar cv::gpu::sqrSum(const GpuMat&, GpuMat&) { throw_nogpu(); return Scalar(); } void cv::gpu::minMax(const GpuMat&, double*, double*, const GpuMat&) { throw_nogpu(); } @@ -129,6 +131,12 @@ namespace cv { namespace gpu { namespace mathfunc void sumMultipassCaller(const DevMem2D src, PtrStep buf, double* sum, int cn); template + void absSumCaller(const DevMem2D src, PtrStep buf, double* sum, int cn); + + template + void absSumMultipassCaller(const DevMem2D src, PtrStep buf, double* sum, int cn); + + template void sqrSumCaller(const DevMem2D src, PtrStep buf, double* sum, int cn); template @@ -166,7 +174,7 @@ Scalar cv::gpu::sum(const GpuMat& src, GpuMat& buf) Size buf_size; sums::getBufSizeRequired(src.cols, src.rows, src.channels(), - buf_size.width, buf_size.height); + buf_size.width, buf_size.height); ensureSizeIsEnough(buf_size, CV_8U, buf); Caller* callers = multipass_callers; @@ -182,6 +190,47 @@ Scalar cv::gpu::sum(const GpuMat& src, GpuMat& buf) } +Scalar cv::gpu::absSum(const GpuMat& src) +{ + GpuMat buf; + return absSum(src, buf); +} + + +Scalar cv::gpu::absSum(const GpuMat& src, GpuMat& buf) +{ + using namespace mathfunc; + + typedef void (*Caller)(const DevMem2D, PtrStep, double*, int); + + static Caller multipass_callers[7] = { + absSumMultipassCaller, absSumMultipassCaller, + absSumMultipassCaller, absSumMultipassCaller, + absSumMultipassCaller, absSumMultipassCaller, 0 }; + + static Caller singlepass_callers[7] = { + absSumCaller, absSumCaller, + absSumCaller, absSumCaller, + absSumCaller, absSumCaller, 0 }; + + Size buf_size; + sums::getBufSizeRequired(src.cols, src.rows, src.channels(), + buf_size.width, buf_size.height); + ensureSizeIsEnough(buf_size, CV_8U, buf); + + Caller* callers = multipass_callers; + if (TargetArchs::builtWith(ATOMICS) && DeviceInfo().has(ATOMICS)) + callers = singlepass_callers; + + Caller caller = callers[src.depth()]; + if (!caller) CV_Error(CV_StsBadArg, "absSum: unsupported type"); + + double result[4]; + caller(src, buf, result, src.channels()); + return Scalar(result[0], result[1], result[2], result[3]); +} + + Scalar cv::gpu::sqrSum(const GpuMat& src) { GpuMat buf; @@ -222,6 +271,9 @@ Scalar cv::gpu::sqrSum(const GpuMat& src, GpuMat& buf) return Scalar(result[0], result[1], result[2], result[3]); } + + + //////////////////////////////////////////////////////////////////////// // Find min or max diff --git a/tests/gpu/src/arithm.cpp b/tests/gpu/src/arithm.cpp index d03642a..d42580b 100644 --- a/tests/gpu/src/arithm.cpp +++ b/tests/gpu/src/arithm.cpp @@ -956,6 +956,10 @@ struct CV_GpuSumTest: CvTest int typemax = CV_32F; for (int type = CV_8U; type <= typemax; ++type) { + // + // sum + // + gen(1 + rand() % 500, 1 + rand() % 500, CV_MAKETYPE(type, 2), src); a = sum(src); b = sum(GpuMat(src)); @@ -965,6 +969,7 @@ struct CV_GpuSumTest: CvTest ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT); return; } + gen(1 + rand() % 500, 1 + rand() % 500, CV_MAKETYPE(type, 3), src); a = sum(src); b = sum(GpuMat(src)); @@ -974,6 +979,7 @@ struct CV_GpuSumTest: CvTest ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT); return; } + gen(1 + rand() % 500, 1 + rand() % 500, CV_MAKETYPE(type, 4), src); a = sum(src); b = sum(GpuMat(src)); @@ -983,6 +989,7 @@ struct CV_GpuSumTest: CvTest ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT); return; } + gen(1 + rand() % 500, 1 + rand() % 500, type, src); a = sum(src); b = sum(GpuMat(src)); @@ -992,6 +999,25 @@ struct CV_GpuSumTest: CvTest ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT); return; } + + // + // absSum + // + + gen(1 + rand() % 200, 1 + rand() % 200, CV_MAKETYPE(type, 1), src); + b = absSum(GpuMat(src)); + a = norm(src, NORM_L1); + if (abs(a[0] - b[0]) > src.size().area() * max_err) + { + ts->printf(CvTS::CONSOLE, "type: %d, cols: %d, rows: %d, expected: %f, actual: %f\n", type, src.cols, src.rows, a[0], b[0]); + ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT); + return; + } + + // + // sqrSum + // + if (type != CV_8S) { gen(1 + rand() % 200, 1 + rand() % 200, CV_MAKETYPE(type, 1), src); -- 2.7.4